diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py index 29d9e7c..1d49cdb 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py @@ -11,8 +11,7 @@ class WikipediaCountryScraperItem(scrapy.Item): short_country_name = scrapy.Field() country = scrapy.Field() flag_description = scrapy.Field() - anthem_native_title = scrapy.Field() - anthem_english_title = scrapy.Field() + anthem = scrapy.Field() file_urls = scrapy.Field() files = scrapy.Field() diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py index 6c877ed..e795175 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py @@ -52,17 +52,14 @@ class CountrydownloaderSpider(scrapy.Spider): anthem_file_url = response.xpath( "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//source[@data-title='MP3']/@src" ).get() - anthem_native_title = response.xpath( - "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/@title" - ).get() - anthem_english_title = response.xpath( - "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/text()" + anthem_item = response.xpath( + "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]" ).get() country_item = { **country_item, "country": country_information_xpath, - "anthem": {"anthem_native_title": anthem_native_title, "anthem_english_title": anthem_english_title}, + "anthem": anthem_item, } yield scrapy.Request( @@ -97,13 +94,13 @@ class CountrydownloaderSpider(scrapy.Spider): country_item = {**country_item, "flag_image_url": f"https:{flag_image_xpath}"} country_scrapy_item = WikipediaCountryScraperItem() + print(f"ANTHEM: {country_item['anthem']}") country_scrapy_item["country_url"] = country_item["country_url"] country_scrapy_item["short_country_name"] = country_item["short_country_name"] # country_scrapy_item["country"] = country_item["country"] country_scrapy_item["flag_description"] = country_item["flag_description"] - country_scrapy_item["anthem_native_title"] = country_item["anthem"]["anthem_native_title"] - country_scrapy_item["anthem_english_title"] = country_item["anthem"]["anthem_english_title"] + country_scrapy_item["anthem"] = country_item["anthem"] country_scrapy_item["file_urls"] = [country_item["flag_image_url"], urls["anthem_file_url"]]