chore: update filter middleware
This commit is contained in:
@@ -105,5 +105,11 @@ class WikipediaCountryScraperDownloaderMiddleware:
|
|||||||
|
|
||||||
class WikipediaCountryScraperDecodeURLMiddleware:
|
class WikipediaCountryScraperDecodeURLMiddleware:
|
||||||
def process_request(self, request, spider):
|
def process_request(self, request, spider):
|
||||||
|
# https://www.w3schools.com/tags/ref_urlencode.ASP
|
||||||
|
request._url = request.url.replace("%27", "'")
|
||||||
request._url = request.url.replace("%28", "(")
|
request._url = request.url.replace("%28", "(")
|
||||||
request._url = request.url.replace("%29", ")")
|
request._url = request.url.replace("%29", ")")
|
||||||
|
request._url = request.url.replace("%C3%A7", "ç")
|
||||||
|
request._url = request.url.replace("%C3%B1", "ñ")
|
||||||
|
request._url = request.url.replace("%C3%B4", "ô")
|
||||||
|
request._url = request.url.replace("%C3%85", "Å")
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
|
|
||||||
import urllib
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# useful for handling different item types with a single interface
|
# useful for handling different item types with a single interface
|
||||||
|
|||||||
Reference in New Issue
Block a user