chore: add filter middleware
This commit is contained in:
@@ -101,3 +101,9 @@ class WikipediaCountryScraperDownloaderMiddleware:
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info('Spider opened: %s' % spider.name)
|
||||
|
||||
|
||||
class WikipediaCountryScraperDecodeURLMiddleware:
|
||||
def process_request(self, request, spider):
|
||||
request._url = request.url.replace("%28", "(")
|
||||
request._url = request.url.replace("%29", ")")
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
import urllib
|
||||
import re
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
|
||||
@@ -23,6 +23,9 @@ class FlagsSpider(scrapy.Spider):
|
||||
/ "raw_country_data"
|
||||
/ "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
|
||||
},
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
"wikipedia_country_scraper.middlewares.WikipediaCountryScraperDecodeURLMiddleware": 900
|
||||
},
|
||||
}
|
||||
|
||||
def parse(self, response: TextResponse):
|
||||
|
||||
Reference in New Issue
Block a user