chore: change anthem to store html
This commit is contained in:
@@ -11,8 +11,7 @@ class WikipediaCountryScraperItem(scrapy.Item):
|
|||||||
short_country_name = scrapy.Field()
|
short_country_name = scrapy.Field()
|
||||||
country = scrapy.Field()
|
country = scrapy.Field()
|
||||||
flag_description = scrapy.Field()
|
flag_description = scrapy.Field()
|
||||||
anthem_native_title = scrapy.Field()
|
anthem = scrapy.Field()
|
||||||
anthem_english_title = scrapy.Field()
|
|
||||||
|
|
||||||
file_urls = scrapy.Field()
|
file_urls = scrapy.Field()
|
||||||
files = scrapy.Field()
|
files = scrapy.Field()
|
||||||
|
|||||||
@@ -52,17 +52,14 @@ class CountrydownloaderSpider(scrapy.Spider):
|
|||||||
anthem_file_url = response.xpath(
|
anthem_file_url = response.xpath(
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
|
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
|
||||||
).get()
|
).get()
|
||||||
anthem_native_title = response.xpath(
|
anthem_item = response.xpath(
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/@title"
|
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
|
||||||
).get()
|
|
||||||
anthem_english_title = response.xpath(
|
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/text()"
|
|
||||||
).get()
|
).get()
|
||||||
|
|
||||||
country_item = {
|
country_item = {
|
||||||
**country_item,
|
**country_item,
|
||||||
"country": country_information_xpath,
|
"country": country_information_xpath,
|
||||||
"anthem": {"anthem_native_title": anthem_native_title, "anthem_english_title": anthem_english_title},
|
"anthem": anthem_item,
|
||||||
}
|
}
|
||||||
|
|
||||||
yield scrapy.Request(
|
yield scrapy.Request(
|
||||||
@@ -97,13 +94,13 @@ class CountrydownloaderSpider(scrapy.Spider):
|
|||||||
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
||||||
|
|
||||||
country_scrapy_item = WikipediaCountryScraperItem()
|
country_scrapy_item = WikipediaCountryScraperItem()
|
||||||
|
print(f"ANTHEM: {country_item['anthem']}")
|
||||||
|
|
||||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||||
# country_scrapy_item["country"] = country_item["country"]
|
# country_scrapy_item["country"] = country_item["country"]
|
||||||
country_scrapy_item["flag_description"] = country_item["flag_description"]
|
country_scrapy_item["flag_description"] = country_item["flag_description"]
|
||||||
country_scrapy_item["anthem_native_title"] = country_item["anthem"]["anthem_native_title"]
|
country_scrapy_item["anthem"] = country_item["anthem"]
|
||||||
country_scrapy_item["anthem_english_title"] = country_item["anthem"]["anthem_english_title"]
|
|
||||||
|
|
||||||
country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]]
|
country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user