chore: add flag image url to saved item
This commit is contained in:
@@ -8,6 +8,7 @@ import scrapy
|
||||
|
||||
class WikipediaCountryScraperItem(scrapy.Item):
|
||||
country_url = scrapy.Field()
|
||||
flag_image_url = scrapy.Field()
|
||||
short_country_name = scrapy.Field()
|
||||
country_html = scrapy.Field()
|
||||
flag_html = scrapy.Field()
|
||||
|
||||
@@ -49,9 +49,15 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
flag_image_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div[1]/a/@href"
|
||||
).get()
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
|
||||
).getall()[-1]
|
||||
|
||||
try:
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
|
||||
).getall()[-1]
|
||||
except IndexError:
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
|
||||
).get()
|
||||
|
||||
country_item = {
|
||||
**country_item,
|
||||
@@ -97,6 +103,7 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
# yield the country item containing scraped data
|
||||
country_scrapy_item = WikipediaCountryScraperItem()
|
||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||
country_scrapy_item["flag_image_url"] = urls["flag_image_url"]
|
||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||
country_scrapy_item["country_html"] = country_item["country_html"]
|
||||
country_scrapy_item["flag_html"] = country_item["flag_html"]
|
||||
|
||||
Reference in New Issue
Block a user