chore: update filter middleware

This commit is contained in:
2022-06-26 23:57:07 +01:00
parent a07bb6f5bd
commit 5cc9297a01
2 changed files with 6 additions and 1 deletions

View File

@@ -105,5 +105,11 @@ class WikipediaCountryScraperDownloaderMiddleware:
class WikipediaCountryScraperDecodeURLMiddleware:
def process_request(self, request, spider):
# https://www.w3schools.com/tags/ref_urlencode.ASP
request._url = request.url.replace("%27", "'")
request._url = request.url.replace("%28", "(")
request._url = request.url.replace("%29", ")")
request._url = request.url.replace("%C3%A7", "ç")
request._url = request.url.replace("%C3%B1", "ñ")
request._url = request.url.replace("%C3%B4", "ô")
request._url = request.url.replace("%C3%85", "Å")

View File

@@ -3,7 +3,6 @@
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import urllib
import re
# useful for handling different item types with a single interface