diff --git a/01_scrapy/wikipedia_country_scraper/download_countries.sh b/01_scrapy/wikipedia_country_scraper/download_countries.sh index fdde2e8..b83a550 100755 --- a/01_scrapy/wikipedia_country_scraper/download_countries.sh +++ b/01_scrapy/wikipedia_country_scraper/download_countries.sh @@ -1,2 +1,2 @@ source .venv/bin/activate -scrapy crawl CountrydownloaderSpider -O ../../data/scrapy/raw_country_data/countries.json +scrapy crawl CountrydownloaderSpider diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py index 53444b5..26fa7f7 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py @@ -91,3 +91,11 @@ FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy # HTTPCACHE_DIR = 'httpcache' # HTTPCACHE_IGNORE_HTTP_CODES = [] # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' + +FEEDS = { + pathlib.Path(__file__).resolve().parents[3] + / "data" + / "scrapy" + / "raw_country_data" + / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": True, "indent": 2} +}