From 97be860627dca73077b090408403322411ecbc3c Mon Sep 17 00:00:00 2001 From: Daniel Tomlinson Date: Wed, 22 Jun 2022 21:46:33 +0100 Subject: [PATCH] chore: add json feeds output --- 01_scrapy/wikipedia_country_scraper/download_countries.sh | 2 +- .../wikipedia_country_scraper/settings.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/01_scrapy/wikipedia_country_scraper/download_countries.sh b/01_scrapy/wikipedia_country_scraper/download_countries.sh index fdde2e8..b83a550 100755 --- a/01_scrapy/wikipedia_country_scraper/download_countries.sh +++ b/01_scrapy/wikipedia_country_scraper/download_countries.sh @@ -1,2 +1,2 @@ source .venv/bin/activate -scrapy crawl CountrydownloaderSpider -O ../../data/scrapy/raw_country_data/countries.json +scrapy crawl CountrydownloaderSpider diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py index 53444b5..26fa7f7 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py @@ -91,3 +91,11 @@ FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy # HTTPCACHE_DIR = 'httpcache' # HTTPCACHE_IGNORE_HTTP_CODES = [] # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' + +FEEDS = { + pathlib.Path(__file__).resolve().parents[3] + / "data" + / "scrapy" + / "raw_country_data" + / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": True, "indent": 2} +}