chore: add remove citations to item loader
This commit is contained in:
@@ -1,12 +1,15 @@
|
||||
import re
|
||||
|
||||
from itemloaders.processors import TakeFirst, MapCompose
|
||||
from scrapy.loader import ItemLoader
|
||||
from w3lib.html import remove_tags
|
||||
|
||||
|
||||
class FlagsItemLoader(ItemLoader):
|
||||
default_output_processor = TakeFirst()
|
||||
|
||||
# country_name - convert "_" to " "
|
||||
country_name_in = MapCompose(lambda country: country.replace("_", " "))
|
||||
|
||||
# flag_description - remove html tags
|
||||
flag_description_in = MapCompose(remove_tags)
|
||||
# flag_description - remove html tags, remove citations [1]
|
||||
flag_description_in = MapCompose(remove_tags, lambda line: re.sub(r"(\[\d+\])", "", line))
|
||||
|
||||
Reference in New Issue
Block a user