From a07bb6f5bd642cef8db676712b4afc3251e29ff5 Mon Sep 17 00:00:00 2001 From: Daniel Tomlinson Date: Sun, 26 Jun 2022 23:45:53 +0100 Subject: [PATCH] chore: add filter middleware --- .../wikipedia_country_scraper/middlewares.py | 6 ++++++ .../wikipedia_country_scraper/pipelines.py | 2 +- .../wikipedia_country_scraper/spiders/flags.py | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py index 0f6b595..c962064 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py @@ -101,3 +101,9 @@ class WikipediaCountryScraperDownloaderMiddleware: def spider_opened(self, spider): spider.logger.info('Spider opened: %s' % spider.name) + + +class WikipediaCountryScraperDecodeURLMiddleware: + def process_request(self, request, spider): + request._url = request.url.replace("%28", "(") + request._url = request.url.replace("%29", ")") diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py index 680fbaa..9057f6a 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py @@ -3,7 +3,7 @@ # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html - +import urllib import re # useful for handling different item types with a single interface diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py index 046812f..a03e8e4 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/flags.py @@ -23,6 +23,9 @@ class FlagsSpider(scrapy.Spider): / "raw_country_data" / "flags.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} }, + "DOWNLOADER_MIDDLEWARES": { + "wikipedia_country_scraper.middlewares.WikipediaCountryScraperDecodeURLMiddleware": 900 + }, } def parse(self, response: TextResponse):