29 lines
940 B
Python
29 lines
940 B
Python
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
import re
|
|
|
|
# useful for handling different item types with a single interface
|
|
from itemadapter import ItemAdapter
|
|
from scrapy.pipelines.files import FilesPipeline
|
|
|
|
|
|
class WikipediaCountryScraperPipeline:
|
|
def process_item(self, item, spider):
|
|
return item
|
|
|
|
|
|
class WikipediaCountryScraperFilesPipeline(FilesPipeline):
|
|
def file_path(self, request, response=None, info=None, *, item=None):
|
|
print(f"request URLs: {request.url}")
|
|
flag_filename = re.search(r"([^\/]*)$", request.url)
|
|
|
|
if isinstance(flag_filename, re.Match):
|
|
if (filename := flag_filename[1]).endswith(".svg"):
|
|
return f"files/flags/{filename}"
|
|
elif filename.endswith(".ogg.mp3"):
|
|
return f"files/anthems/{filename}"
|