chore: add filter middleware

This commit is contained in:
2022-06-26 23:45:53 +01:00
parent 55e4ff7722
commit a07bb6f5bd
3 changed files with 10 additions and 1 deletions

View File

@@ -101,3 +101,9 @@ class WikipediaCountryScraperDownloaderMiddleware:
def spider_opened(self, spider): def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name) spider.logger.info('Spider opened: %s' % spider.name)
class WikipediaCountryScraperDecodeURLMiddleware:
def process_request(self, request, spider):
request._url = request.url.replace("%28", "(")
request._url = request.url.replace("%29", ")")

View File

@@ -3,7 +3,7 @@
# Don't forget to add your pipeline to the ITEM_PIPELINES setting # Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import urllib
import re import re
# useful for handling different item types with a single interface # useful for handling different item types with a single interface

View File

@@ -23,6 +23,9 @@ class FlagsSpider(scrapy.Spider):
/ "raw_country_data" / "raw_country_data"
/ "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} / "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
}, },
"DOWNLOADER_MIDDLEWARES": {
"wikipedia_country_scraper.middlewares.WikipediaCountryScraperDecodeURLMiddleware": 900
},
} }
def parse(self, response: TextResponse): def parse(self, response: TextResponse):