chore: change anthem download to .ogg instead of .mp3

This commit is contained in:
2022-06-22 23:19:49 +01:00
parent c781e337b8
commit 5581762c39
6 changed files with 5500 additions and 3 deletions

View File

@@ -1,2 +1,3 @@
#!/bin/bash
source .venv/bin/activate
scrapy crawl CountrydownloaderSpider

View File

@@ -24,5 +24,5 @@ class WikipediaCountryScraperFilesPipeline(FilesPipeline):
if isinstance(flag_filename, re.Match):
if (filename := flag_filename[1]).endswith(".svg"):
return f"files/flags/{filename}"
elif filename.endswith(".ogg.mp3"):
elif filename.endswith(".ogg"):
return f"files/anthems/{filename}"

View File

@@ -50,7 +50,7 @@ class CountrydownloaderSpider(scrapy.Spider):
).get()
anthem_file_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[contains(@type, 'audio/ogg')]/@src"
).get()
anthem_item = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
@@ -94,7 +94,6 @@ class CountrydownloaderSpider(scrapy.Spider):
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
country_scrapy_item = WikipediaCountryScraperItem()
print(f"ANTHEM: {country_item['anthem']}")
country_scrapy_item["country_url"] = country_item["country_url"]
country_scrapy_item["short_country_name"] = country_item["short_country_name"]