From 747c0e8d79693a09e0677c6c20635e26db229a64 Mon Sep 17 00:00:00 2001 From: Daniel Tomlinson Date: Mon, 27 Jun 2022 00:05:43 +0100 Subject: [PATCH] chore: move filter into pipeline --- .../wikipedia_country_scraper/middlewares.py | 8 ++++---- .../wikipedia_country_scraper/pipelines.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py index 5ae9a46..bc42aa8 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/middlewares.py @@ -109,7 +109,7 @@ class WikipediaCountryScraperDecodeURLMiddleware: request._url = request.url.replace("%27", "'") request._url = request.url.replace("%28", "(") request._url = request.url.replace("%29", ")") - request._url = request.url.replace("%C3%A7", "ç") - request._url = request.url.replace("%C3%B1", "ñ") - request._url = request.url.replace("%C3%B4", "ô") - request._url = request.url.replace("%C3%85", "Å") + # request._url = request.url.replace("%C3%A7", "ç") + # request._url = request.url.replace("%C3%B1", "ñ") + # request._url = request.url.replace("%C3%B4", "ô") + # request._url = request.url.replace("%C3%85", "Å") diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py index e306cd0..4ca09e0 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py @@ -42,4 +42,11 @@ class FlagDownloadFilesPipeline(FilesPipeline): if isinstance(flag_filename, re.Match): if (filename := flag_filename[1]).endswith(("svg", "png")): + # filename = filename.replace("%27", "'") + # filename = filename.replace("%28", "(") + # filename = filename.replace("%29", ")") + filename = filename.replace("%C3%A7", "ç") + filename = filename.replace("%C3%B1", "ñ") + filename = filename.replace("%C3%B4", "ô") + filename = filename.replace("%C3%85", "Å") return f"files/flags/{filename}"