diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py index b67bf63..d24e56f 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py @@ -100,7 +100,14 @@ class CountrydownloaderSpider(scrapy.Spider): def extract_anthem_file(self, response: TextResponse, country_item: dict, urls: dict): anthem_text = response.xpath("//div[@id='mw-content-text']/div/p").get() - anthem_file_url = response.xpath("//tr[contains(@class, 'haudio')]//a/@href").get() + _anthem_file_url = response.xpath("//tr[contains(@class, 'haudio')]//a/@href").getall() + + for file in _anthem_file_url: + if file.endswith(".ogg") or file.endswith(".oga"): + anthem_file_url = file + break + else: + anthem_file_url = None country_scrapy_item = WikipediaCountryScraperItem() country_scrapy_item["country_url"] = country_item["country_url"]