chore: change xml for flag description url
This commit is contained in:
@@ -8,7 +8,7 @@ import scrapy
|
||||
|
||||
class WikipediaCountryScraperItem(scrapy.Item):
|
||||
country_url = scrapy.Field()
|
||||
flag_image_url = scrapy.Field()
|
||||
flag_description_url = scrapy.Field()
|
||||
short_country_name = scrapy.Field()
|
||||
country_html = scrapy.Field()
|
||||
flag_html = scrapy.Field()
|
||||
|
||||
@@ -50,14 +50,9 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div[1]/a/@href"
|
||||
).get()
|
||||
|
||||
try:
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
|
||||
).getall()[-1]
|
||||
except IndexError:
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a/@href"
|
||||
).get()
|
||||
flag_description_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr[2]/td/div/div[1]/div/a[not(contains(@href, 'cite_note'))]/@href"
|
||||
).getall()[-1]
|
||||
|
||||
country_item = {
|
||||
**country_item,
|
||||
@@ -71,6 +66,7 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
"country_item": country_item,
|
||||
"urls": {
|
||||
"flag_image_url": f"https://en.wikipedia.org{flag_image_url}",
|
||||
"flag_description_url": f"https://en.wikipedia.org{flag_description_url}",
|
||||
},
|
||||
},
|
||||
)
|
||||
@@ -103,7 +99,7 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
# yield the country item containing scraped data
|
||||
country_scrapy_item = WikipediaCountryScraperItem()
|
||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||
country_scrapy_item["flag_image_url"] = urls["flag_image_url"]
|
||||
country_scrapy_item["flag_description_url"] = urls["flag_description_url"]
|
||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||
country_scrapy_item["country_html"] = country_item["country_html"]
|
||||
country_scrapy_item["flag_html"] = country_item["flag_html"]
|
||||
|
||||
Reference in New Issue
Block a user