chore: add flag image url to saved item

This commit is contained in:
2022-06-24 22:43:15 +01:00
parent 17b0462da5
commit fa26c99ba5
3 changed files with 220 additions and 27 deletions

View File

@@ -8,6 +8,7 @@ import scrapy
class WikipediaCountryScraperItem(scrapy.Item):
country_url = scrapy.Field()
flag_image_url = scrapy.Field()
short_country_name = scrapy.Field()
country_html = scrapy.Field()
flag_html = scrapy.Field()

View File

@@ -49,9 +49,15 @@ class CountrydownloaderSpider(scrapy.Spider):
flag_image_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div[1]/a/@href"
).get()
flag_description_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
).getall()[-1]
try:
flag_description_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
).getall()[-1]
except IndexError:
flag_description_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
).get()
country_item = {
**country_item,
@@ -97,6 +103,7 @@ class CountrydownloaderSpider(scrapy.Spider):
# yield the country item containing scraped data
country_scrapy_item = WikipediaCountryScraperItem()
country_scrapy_item["country_url"] = country_item["country_url"]
country_scrapy_item["flag_image_url"] = urls["flag_image_url"]
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
country_scrapy_item["country_html"] = country_item["country_html"]
country_scrapy_item["flag_html"] = country_item["flag_html"]