chore: change anthem download to .ogg instead of .mp3
This commit is contained in:
@@ -1,2 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
scrapy crawl CountrydownloaderSpider
|
scrapy crawl CountrydownloaderSpider
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ class WikipediaCountryScraperFilesPipeline(FilesPipeline):
|
|||||||
if isinstance(flag_filename, re.Match):
|
if isinstance(flag_filename, re.Match):
|
||||||
if (filename := flag_filename[1]).endswith(".svg"):
|
if (filename := flag_filename[1]).endswith(".svg"):
|
||||||
return f"files/flags/{filename}"
|
return f"files/flags/{filename}"
|
||||||
elif filename.endswith(".ogg.mp3"):
|
elif filename.endswith(".ogg"):
|
||||||
return f"files/anthems/{filename}"
|
return f"files/anthems/{filename}"
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ class CountrydownloaderSpider(scrapy.Spider):
|
|||||||
).get()
|
).get()
|
||||||
|
|
||||||
anthem_file_url = response.xpath(
|
anthem_file_url = response.xpath(
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src"
|
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[contains(@type, 'audio/ogg')]/@src"
|
||||||
).get()
|
).get()
|
||||||
anthem_item = response.xpath(
|
anthem_item = response.xpath(
|
||||||
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
|
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]"
|
||||||
@@ -94,7 +94,6 @@ class CountrydownloaderSpider(scrapy.Spider):
|
|||||||
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"}
|
||||||
|
|
||||||
country_scrapy_item = WikipediaCountryScraperItem()
|
country_scrapy_item = WikipediaCountryScraperItem()
|
||||||
print(f"ANTHEM: {country_item['anthem']}")
|
|
||||||
|
|
||||||
country_scrapy_item["country_url"] = country_item["country_url"]
|
country_scrapy_item["country_url"] = country_item["country_url"]
|
||||||
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
country_scrapy_item["short_country_name"] = country_item["short_country_name"]
|
||||||
|
|||||||
3431
playground/downloaded_data_inspection/inspection.ipynb
Normal file
3431
playground/downloaded_data_inspection/inspection.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
2047
playground/downloaded_data_inspection/poetry.lock
generated
Normal file
2047
playground/downloaded_data_inspection/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
19
playground/downloaded_data_inspection/pyproject.toml
Normal file
19
playground/downloaded_data_inspection/pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "downloaded_data_inspection"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = ""
|
||||||
|
authors = ["Daniel Tomlinson <dtomlinson@panaetius.co.uk>"]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.8"
|
||||||
|
notebook = "^6.4.12"
|
||||||
|
pandas = "^1.4.2"
|
||||||
|
jupyterthemes = "^0.20.0"
|
||||||
|
jupyter-contrib-nbextensions = "^0.5.1"
|
||||||
|
jupyter-resource-usage = "^0.6.1"
|
||||||
|
|
||||||
|
[tool.poetry.dev-dependencies]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
Reference in New Issue
Block a user