chore: change anthem to store html

This commit is contained in:
2022-06-22 21:47:04 +01:00
parent 97be860627
commit 3cb4b4ba46
2 changed files with 6 additions and 10 deletions

View File

@@ -11,8 +11,7 @@ class WikipediaCountryScraperItem(scrapy.Item):
short_country_name = scrapy.Field() short_country_name = scrapy.Field()
country = scrapy.Field() country = scrapy.Field()
flag_description = scrapy.Field() flag_description = scrapy.Field()
anthem_native_title = scrapy.Field() anthem = scrapy.Field()
anthem_english_title = scrapy.Field()
file_urls = scrapy.Field() file_urls = scrapy.Field()
files = scrapy.Field() files = scrapy.Field()

View File

@@ -52,17 +52,14 @@ class CountrydownloaderSpider(scrapy.Spider):
anthem_file_url = response.xpath( anthem_file_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src" "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
).get() ).get()
anthem_native_title = response.xpath( anthem_item = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/@title" "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
).get()
anthem_english_title = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/text()"
).get() ).get()
country_item = { country_item = {
**country_item, **country_item,
"country": country_information_xpath, "country": country_information_xpath,
"anthem": {"anthem_native_title": anthem_native_title, "anthem_english_title": anthem_english_title}, "anthem": anthem_item,
} }
yield scrapy.Request( yield scrapy.Request(
@@ -97,13 +94,13 @@ class CountrydownloaderSpider(scrapy.Spider):
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"} country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
country_scrapy_item = WikipediaCountryScraperItem() country_scrapy_item = WikipediaCountryScraperItem()
print(f"ANTHEM: {country_item['anthem']}")
country_scrapy_item["country_url"] = country_item["country_url"] country_scrapy_item["country_url"] = country_item["country_url"]
country_scrapy_item["short_country_name"] = country_item["short_country_name"] country_scrapy_item["short_country_name"] = country_item["short_country_name"]
# country_scrapy_item["country"] = country_item["country"] # country_scrapy_item["country"] = country_item["country"]
country_scrapy_item["flag_description"] = country_item["flag_description"] country_scrapy_item["flag_description"] = country_item["flag_description"]
country_scrapy_item["anthem_native_title"] = country_item["anthem"]["anthem_native_title"] country_scrapy_item["anthem"] = country_item["anthem"]
country_scrapy_item["anthem_english_title"] = country_item["anthem"]["anthem_english_title"]
country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]] country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]]