chore: change anthem to store html
This commit is contained in:
@@ -11,8 +11,7 @@ class WikipediaCountryScraperItem(scrapy.Item):
|
||||
short_country_name = scrapy.Field()
|
||||
country = scrapy.Field()
|
||||
flag_description = scrapy.Field()
|
||||
anthem_native_title = scrapy.Field()
|
||||
anthem_english_title = scrapy.Field()
|
||||
anthem = scrapy.Field()
|
||||
|
||||
file_urls = scrapy.Field()
|
||||
files = scrapy.Field()
|
||||
|
||||
@@ -52,17 +52,14 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
anthem_file_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
|
||||
).get()
|
||||
anthem_native_title = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/@title"
|
||||
).get()
|
||||
anthem_english_title = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/text()"
|
||||
anthem_item = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
|
||||
).get()
|
||||
|
||||
country_item = {
|
||||
**country_item,
|
||||
"country": country_information_xpath,
|
||||
"anthem": {"anthem_native_title": anthem_native_title, "anthem_english_title": anthem_english_title},
|
||||
"anthem": anthem_item,
|
||||
}
|
||||
|
||||
yield scrapy.Request(
|
||||
@@ -97,13 +94,13 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
||||
|
||||
country_scrapy_item = WikipediaCountryScraperItem()
|
||||
print(f"ANTHEM: {country_item['anthem']}")
|
||||
|
||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||
# country_scrapy_item["country"] = country_item["country"]
|
||||
country_scrapy_item["flag_description"] = country_item["flag_description"]
|
||||
country_scrapy_item["anthem_native_title"] = country_item["anthem"]["anthem_native_title"]
|
||||
country_scrapy_item["anthem_english_title"] = country_item["anthem"]["anthem_english_title"]
|
||||
country_scrapy_item["anthem"] = country_item["anthem"]
|
||||
|
||||
country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user