chore: update itemloader and exclusion filter for flags
This commit is contained in:
@@ -9,4 +9,4 @@ class FlagsItemLoader(ItemLoader):
|
||||
country_name_in = MapCompose(lambda country: country.replace("_", " "))
|
||||
|
||||
# flag_description - remove html tags
|
||||
flag_description = MapCompose(remove_tags)
|
||||
flag_description_in = MapCompose(remove_tags)
|
||||
|
||||
@@ -6,7 +6,7 @@ from wikipedia_country_scraper.items import FlagsItem
|
||||
from wikipedia_country_scraper.itemloaders import FlagsItemLoader
|
||||
|
||||
|
||||
EXCLUDED = ["Réunion", "Svalbard", "Mayotte", "Guadeloupe", "French_Guiana"]
|
||||
EXCLUDED = ["réunion", "svalbard", "mayotte", "guadeloupe", "french guiana"]
|
||||
|
||||
|
||||
class FlagsSpider(scrapy.Spider):
|
||||
@@ -34,7 +34,7 @@ class FlagsSpider(scrapy.Spider):
|
||||
]
|
||||
|
||||
for country_name, country_url in zip(country_names, country_urls):
|
||||
if country_name not in EXCLUDED:
|
||||
if country_name.lower() not in EXCLUDED:
|
||||
yield scrapy.Request(
|
||||
url=country_url, callback=self.get_country_page, cb_kwargs={"country_name": country_name}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user