From b5eec4550d7972f43ebee4eb5c4bbba14859d7ef Mon Sep 17 00:00:00 2001 From: Daniel Tomlinson Date: Sun, 26 Jun 2022 17:27:37 +0100 Subject: [PATCH] chore: update flags spider --- .../wikipedia_country_scraper/pipelines.py | 2 +- .../wikipedia_country_scraper/spiders/flags.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py index f381daf..e4025d0 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py @@ -47,5 +47,5 @@ class FlagDownloadFilesPipeline(FilesPipeline): flag_filename = re.search(r"([^\/]*)$", request.url) if isinstance(flag_filename, re.Match): - if filename := flag_filename[1].endswith(".svg"): + if (filename := flag_filename[1]).endswith(".svg"): return f"files/flags/{filename}" diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py index 2bc8005..9653934 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py @@ -36,7 +36,7 @@ class FlagsSpider(scrapy.Spider): def get_country_page(self, response: TextResponse, country_name: str): flag_data = response.xpath( - "//table[contains(@class, 'infobox')]/tbody/tr/td/div/div[1]/div/a[not(contains(@href, 'cite_note'))]/@href" + "//table[contains(@class, 'infobox')]/tbody/tr/td/div/div/div/a[not(contains(@href, 'cite_note'))]/@href" ).getall() flag_image_url = flag_data[0] @@ -70,6 +70,6 @@ class FlagsSpider(scrapy.Spider): flags_item = FlagsItem() flags_item["country_name"] = country_name - flags_item["file_urls"] = [f"https://{flag_image_result}"] + flags_item["file_urls"] = [f"https:{flag_image_result}"] yield flags_item