chore: update flags spider
This commit is contained in:
@@ -47,5 +47,5 @@ class FlagDownloadFilesPipeline(FilesPipeline):
|
|||||||
flag_filename = re.search(r"([^\/]*)$", request.url)
|
flag_filename = re.search(r"([^\/]*)$", request.url)
|
||||||
|
|
||||||
if isinstance(flag_filename, re.Match):
|
if isinstance(flag_filename, re.Match):
|
||||||
if filename := flag_filename[1].endswith(".svg"):
|
if (filename := flag_filename[1]).endswith(".svg"):
|
||||||
return f"files/flags/{filename}"
|
return f"files/flags/{filename}"
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class FlagsSpider(scrapy.Spider):
|
|||||||
|
|
||||||
def get_country_page(self, response: TextResponse, country_name: str):
|
def get_country_page(self, response: TextResponse, country_name: str):
|
||||||
flag_data = response.xpath(
|
flag_data = response.xpath(
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td/div/div[1]/div/a[not(contains(@href, 'cite_note'))]/@href"
|
"//table[contains(@class, 'infobox')]/tbody/tr/td/div/div/div/a[not(contains(@href, 'cite_note'))]/@href"
|
||||||
).getall()
|
).getall()
|
||||||
|
|
||||||
flag_image_url = flag_data[0]
|
flag_image_url = flag_data[0]
|
||||||
@@ -70,6 +70,6 @@ class FlagsSpider(scrapy.Spider):
|
|||||||
|
|
||||||
flags_item = FlagsItem()
|
flags_item = FlagsItem()
|
||||||
flags_item["country_name"] = country_name
|
flags_item["country_name"] = country_name
|
||||||
flags_item["file_urls"] = [f"https://{flag_image_result}"]
|
flags_item["file_urls"] = [f"https:{flag_image_result}"]
|
||||||
|
|
||||||
yield flags_item
|
yield flags_item
|
||||||
|
|||||||
Reference in New Issue
Block a user