chore: update itemloader and exclusion filter for flags

This commit is contained in:
2022-06-26 22:19:50 +01:00
parent ceb7aa5b08
commit 6fcae89c5d
2 changed files with 3 additions and 3 deletions

View File

@@ -9,4 +9,4 @@ class FlagsItemLoader(ItemLoader):
country_name_in = MapCompose(lambda country: country.replace("_", " "))
# flag_description - remove html tags
flag_description = MapCompose(remove_tags)
flag_description_in = MapCompose(remove_tags)

View File

@@ -6,7 +6,7 @@ from wikipedia_country_scraper.items import FlagsItem
from wikipedia_country_scraper.itemloaders import FlagsItemLoader
EXCLUDED = ["Réunion", "Svalbard", "Mayotte", "Guadeloupe", "French_Guiana"]
EXCLUDED = ["réunion", "svalbard", "mayotte", "guadeloupe", "french guiana"]
class FlagsSpider(scrapy.Spider):
@@ -34,7 +34,7 @@ class FlagsSpider(scrapy.Spider):
]
for country_name, country_url in zip(country_names, country_urls):
if country_name not in EXCLUDED:
if country_name.lower() not in EXCLUDED:
yield scrapy.Request(
url=country_url, callback=self.get_country_page, cb_kwargs={"country_name": country_name}
)