chore: add filter middleware
This commit is contained in:
@@ -101,3 +101,9 @@ class WikipediaCountryScraperDownloaderMiddleware:
|
|||||||
|
|
||||||
def spider_opened(self, spider):
|
def spider_opened(self, spider):
|
||||||
spider.logger.info('Spider opened: %s' % spider.name)
|
spider.logger.info('Spider opened: %s' % spider.name)
|
||||||
|
|
||||||
|
|
||||||
|
class WikipediaCountryScraperDecodeURLMiddleware:
|
||||||
|
def process_request(self, request, spider):
|
||||||
|
request._url = request.url.replace("%28", "(")
|
||||||
|
request._url = request.url.replace("%29", ")")
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
|
||||||
|
import urllib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# useful for handling different item types with a single interface
|
# useful for handling different item types with a single interface
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ class FlagsSpider(scrapy.Spider):
|
|||||||
/ "raw_country_data"
|
/ "raw_country_data"
|
||||||
/ "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
|
/ "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
|
||||||
},
|
},
|
||||||
|
"DOWNLOADER_MIDDLEWARES": {
|
||||||
|
"wikipedia_country_scraper.middlewares.WikipediaCountryScraperDecodeURLMiddleware": 900
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse(self, response: TextResponse):
|
def parse(self, response: TextResponse):
|
||||||
|
|||||||
Reference in New Issue
Block a user