chore: change anthem download to .ogg instead of .mp3
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
#!/bin/bash
|
||||
source .venv/bin/activate
|
||||
scrapy crawl CountrydownloaderSpider
|
||||
|
||||
@@ -24,5 +24,5 @@ class WikipediaCountryScraperFilesPipeline(FilesPipeline):
|
||||
if isinstance(flag_filename, re.Match):
|
||||
if (filename := flag_filename[1]).endswith(".svg"):
|
||||
return f"files/flags/{filename}"
|
||||
elif filename.endswith(".ogg.mp3"):
|
||||
elif filename.endswith(".ogg"):
|
||||
return f"files/anthems/{filename}"
|
||||
|
||||
@@ -50,7 +50,7 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
).get()
|
||||
|
||||
anthem_file_url = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[contains(@type, 'audio/ogg')]/@src"
|
||||
).get()
|
||||
anthem_item = response.xpath(
|
||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
|
||||
@@ -94,7 +94,6 @@ class CountrydownloaderSpider(scrapy.Spider):
|
||||
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
||||
|
||||
country_scrapy_item = WikipediaCountryScraperItem()
|
||||
print(f"ANTHEM: {country_item['anthem']}")
|
||||
|
||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||
|
||||
3431
playground/downloaded_data_inspection/inspection.ipynb
Normal file
3431
playground/downloaded_data_inspection/inspection.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
2047
playground/downloaded_data_inspection/poetry.lock
generated
Normal file
2047
playground/downloaded_data_inspection/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
19
playground/downloaded_data_inspection/pyproject.toml
Normal file
19
playground/downloaded_data_inspection/pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[tool.poetry]
|
||||
name = "downloaded_data_inspection"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Daniel Tomlinson <dtomlinson@panaetius.co.uk>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
notebook = "^6.4.12"
|
||||
pandas = "^1.4.2"
|
||||
jupyterthemes = "^0.20.0"
|
||||
jupyter-contrib-nbextensions = "^0.5.1"
|
||||
jupyter-resource-usage = "^0.6.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
Reference in New Issue
Block a user