chore: try new xpath for anthem

This commit is contained in:
2022-06-24 02:14:45 +01:00
parent 4b305f757c
commit 0bd759a002
3 changed files with 78 additions and 110 deletions

View File

@@ -97,5 +97,5 @@ FEEDS = {
/ "data" / "data"
/ "scrapy" / "scrapy"
/ "raw_country_data" / "raw_country_data"
/ "countries.json": {"format": "json", "encoding": "utf8", "store_empty": True} / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False}
} }

View File

@@ -50,7 +50,7 @@ class CountrydownloaderSpider(scrapy.Spider):
).get() ).get()
anthem_page_url = response.xpath( anthem_page_url = response.xpath(
"//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]/a/@href" "//table[contains(@class, 'infobox')]/tbody/tr/td[contains(@class, 'anthem')]//span[contains(@class, 'audio')]/a/@href"
).get() ).get()
country_item = { country_item = {

View File

@@ -58,15 +58,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 24,
"id": "36149580-91d9-431d-99c3-51feee829e79", "id": "36149580-91d9-431d-99c3-51feee829e79",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-23T23:01:32.912901Z", "iopub.execute_input": "2022-06-24T00:47:50.283172Z",
"iopub.status.busy": "2022-06-23T23:01:32.912446Z", "iopub.status.busy": "2022-06-24T00:47:50.282750Z",
"iopub.status.idle": "2022-06-23T23:01:32.917561Z", "iopub.status.idle": "2022-06-24T00:47:50.301549Z",
"shell.execute_reply": "2022-06-23T23:01:32.916608Z", "shell.execute_reply": "2022-06-24T00:47:50.300747Z",
"shell.execute_reply.started": "2022-06-23T23:01:32.912867Z" "shell.execute_reply.started": "2022-06-24T00:47:50.283143Z"
}, },
"tags": [] "tags": []
}, },
@@ -79,88 +79,69 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 35,
"id": "d03be94e-8642-4916-8a43-1711e0c21b36", "id": "d03be94e-8642-4916-8a43-1711e0c21b36",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-24T00:14:02.657283Z", "iopub.execute_input": "2022-06-24T01:09:19.590298Z",
"iopub.status.busy": "2022-06-24T00:14:02.656916Z", "iopub.status.busy": "2022-06-24T01:09:19.589666Z",
"iopub.status.idle": "2022-06-24T00:14:02.728545Z", "iopub.status.idle": "2022-06-24T01:09:19.676856Z",
"shell.execute_reply": "2022-06-24T00:14:02.726698Z", "shell.execute_reply": "2022-06-24T01:09:19.674877Z",
"shell.execute_reply.started": "2022-06-24T00:14:02.657254Z" "shell.execute_reply.started": "2022-06-24T01:09:19.590267Z"
}, },
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{ {
"ename": "JSONDecodeError", "ename": "JSONDecodeError",
"evalue": "Extra data: line 83 column 2 (char 2276290)", "evalue": "Extra data: line 83 column 2 (char 2294639)",
"output_type": "error", "output_type": "error",
"traceback": [ "traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [23]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m countries_file \u001b[38;5;241m=\u001b[39m data_directory \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcountries.json\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m countries \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcountries_file\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m countries\n", "Input \u001b[0;32mIn [35]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m countries_file \u001b[38;5;241m=\u001b[39m data_directory \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcountries.json\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m countries \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcountries_file\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.pyenv/versions/3.8.12/lib/python3.8/json/__init__.py:357\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kw[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 355\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 356\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 357\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONDecoder\n", "File \u001b[0;32m~/.pyenv/versions/3.8.12/lib/python3.8/json/__init__.py:357\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kw[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 355\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 356\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 357\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONDecoder\n",
"File \u001b[0;32m~/.pyenv/versions/3.8.12/lib/python3.8/json/decoder.py:340\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n\u001b[0;32m--> 340\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExtra data\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, end)\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n", "File \u001b[0;32m~/.pyenv/versions/3.8.12/lib/python3.8/json/decoder.py:340\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n\u001b[0;32m--> 340\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExtra data\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, end)\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Extra data: line 83 column 2 (char 2276290)" "\u001b[0;31mJSONDecodeError\u001b[0m: Extra data: line 83 column 2 (char 2294639)"
] ]
} }
], ],
"source": [ "source": [
"countries_file = data_directory / \"countries.json\"\n", "countries_file = data_directory / \"countries.json\"\n",
"countries = json.loads(countries_file.read_text())\n", "countries = json.loads(countries_file.read_text())\n",
"countries" "# countries"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 32,
"id": "29cca9ea-16d3-4534-8c9e-49fde37f8cdd", "id": "29cca9ea-16d3-4534-8c9e-49fde37f8cdd",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-24T00:13:20.594318Z", "iopub.execute_input": "2022-06-24T00:48:48.927613Z",
"iopub.status.busy": "2022-06-24T00:13:20.593642Z", "iopub.status.busy": "2022-06-24T00:48:48.926883Z",
"iopub.status.idle": "2022-06-24T00:13:20.718607Z", "iopub.status.idle": "2022-06-24T00:48:49.010610Z",
"shell.execute_reply": "2022-06-24T00:13:20.717214Z", "shell.execute_reply": "2022-06-24T00:48:49.008078Z",
"shell.execute_reply.started": "2022-06-24T00:13:20.594287Z" "shell.execute_reply.started": "2022-06-24T00:48:48.927549Z"
}, },
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [],
{
"ename": "ValueError",
"evalue": "Trailing data",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [21]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcountries_file\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/util/_decorators.py:207\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 206\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 207\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/util/_decorators.py:311\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[1;32m 306\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 307\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39marguments),\n\u001b[1;32m 308\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 309\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mstacklevel,\n\u001b[1;32m 310\u001b[0m )\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/io/json/_json.py:612\u001b[0m, in \u001b[0;36mread_json\u001b[0;34m(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options)\u001b[0m\n\u001b[1;32m 609\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m json_reader\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m json_reader:\n\u001b[0;32m--> 612\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjson_reader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/io/json/_json.py:746\u001b[0m, in \u001b[0;36mJsonReader.read\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 744\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_object_parser(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_combine_lines(data_lines))\n\u001b[1;32m 745\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 746\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_object_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 747\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 748\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/io/json/_json.py:768\u001b[0m, in \u001b[0;36mJsonReader._get_object_parser\u001b[0;34m(self, json)\u001b[0m\n\u001b[1;32m 766\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 767\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m typ \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mframe\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 768\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[43mFrameParser\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m typ \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseries\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 771\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, \u001b[38;5;28mbool\u001b[39m):\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/io/json/_json.py:880\u001b[0m, in \u001b[0;36mParser.parse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 878\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parse_numpy()\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 880\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_parse_no_numpy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/git-repos/geography-anki/playground/downloaded_data_inspection/.venv/lib/python3.8/site-packages/pandas/io/json/_json.py:1133\u001b[0m, in \u001b[0;36mFrameParser._parse_no_numpy\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1129\u001b[0m orient \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39morient\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m orient \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;241m=\u001b[39m DataFrame(\n\u001b[0;32m-> 1133\u001b[0m \u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprecise_float\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprecise_float\u001b[49m\u001b[43m)\u001b[49m, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m )\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m orient \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1136\u001b[0m decoded \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 1137\u001b[0m \u001b[38;5;28mstr\u001b[39m(k): v\n\u001b[1;32m 1138\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m loads(json, precise_float\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprecise_float)\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 1139\u001b[0m }\n",
"\u001b[0;31mValueError\u001b[0m: Trailing data"
]
}
],
"source": [ "source": [
"df = pd.read_json(countries_file)" "df = pd.read_json(countries_file)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 33,
"id": "ef8bc3ce-08dd-4260-807c-2616b2e1c1ba", "id": "ef8bc3ce-08dd-4260-807c-2616b2e1c1ba",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-23T23:05:33.776164Z", "iopub.execute_input": "2022-06-24T00:48:51.018167Z",
"iopub.status.busy": "2022-06-23T23:05:33.775761Z", "iopub.status.busy": "2022-06-24T00:48:51.017745Z",
"iopub.status.idle": "2022-06-23T23:05:33.782482Z", "iopub.status.idle": "2022-06-24T00:48:51.023756Z",
"shell.execute_reply": "2022-06-23T23:05:33.781523Z", "shell.execute_reply": "2022-06-24T00:48:51.022902Z",
"shell.execute_reply.started": "2022-06-23T23:05:33.776132Z" "shell.execute_reply.started": "2022-06-24T00:48:51.018137Z"
}, },
"tags": [] "tags": []
}, },
@@ -169,7 +150,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"(206,)\n", "(81,)\n",
"[False]\n", "[False]\n",
"[False]\n" "[False]\n"
] ]
@@ -184,23 +165,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 34,
"id": "48db8f93-659b-45a4-8477-a7cec139bebc", "id": "48db8f93-659b-45a4-8477-a7cec139bebc",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-23T23:05:34.455157Z", "iopub.execute_input": "2022-06-24T00:48:52.316175Z",
"iopub.status.busy": "2022-06-23T23:05:34.454626Z", "iopub.status.busy": "2022-06-24T00:48:52.315575Z",
"iopub.status.idle": "2022-06-23T23:05:34.464728Z", "iopub.status.idle": "2022-06-24T00:48:52.323965Z",
"shell.execute_reply": "2022-06-23T23:05:34.463338Z", "shell.execute_reply": "2022-06-24T00:48:52.323184Z",
"shell.execute_reply.started": "2022-06-23T23:05:34.455117Z" "shell.execute_reply.started": "2022-06-24T00:48:52.316146Z"
} },
"tags": []
}, },
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"(206,)\n", "(81,)\n",
"[False]\n", "[False]\n",
"[False]\n" "[False]\n"
] ]
@@ -215,23 +197,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 29,
"id": "a52f6aa2-5bbd-46e4-9b2f-cdbd7269cb6e", "id": "a52f6aa2-5bbd-46e4-9b2f-cdbd7269cb6e",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-23T23:05:35.088529Z", "iopub.execute_input": "2022-06-24T00:47:57.991196Z",
"iopub.status.busy": "2022-06-23T23:05:35.088020Z", "iopub.status.busy": "2022-06-24T00:47:57.990582Z",
"iopub.status.idle": "2022-06-23T23:05:35.096880Z", "iopub.status.idle": "2022-06-24T00:47:58.001189Z",
"shell.execute_reply": "2022-06-23T23:05:35.095870Z", "shell.execute_reply": "2022-06-24T00:47:57.999654Z",
"shell.execute_reply.started": "2022-06-23T23:05:35.088490Z" "shell.execute_reply.started": "2022-06-24T00:47:57.991142Z"
} },
"tags": []
}, },
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"(206,)\n", "(81,)\n",
"[False]\n", "[False]\n",
"[False]\n" "[False]\n"
] ]
@@ -246,23 +229,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 30,
"id": "643e6512-1e5b-4eb2-9f0a-6b680ada787b", "id": "643e6512-1e5b-4eb2-9f0a-6b680ada787b",
"metadata": { "metadata": {
"execution": { "execution": {
"iopub.execute_input": "2022-06-23T23:05:35.979818Z", "iopub.execute_input": "2022-06-24T00:47:59.412325Z",
"iopub.status.busy": "2022-06-23T23:05:35.979064Z", "iopub.status.busy": "2022-06-24T00:47:59.411973Z",
"iopub.status.idle": "2022-06-23T23:05:35.993588Z", "iopub.status.idle": "2022-06-24T00:47:59.420681Z",
"shell.execute_reply": "2022-06-23T23:05:35.992386Z", "shell.execute_reply": "2022-06-24T00:47:59.419781Z",
"shell.execute_reply.started": "2022-06-23T23:05:35.979769Z" "shell.execute_reply.started": "2022-06-24T00:47:59.412296Z"
} },
"tags": []
}, },
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"11\n" "7\n"
] ]
}, },
{ {
@@ -272,49 +256,33 @@
" 'path': 'files/flags/Flag_of_the_Taliban.svg',\n", " 'path': 'files/flags/Flag_of_the_Taliban.svg',\n",
" 'checksum': '153b7b9dc8133d542e744f5ff6102710',\n", " 'checksum': '153b7b9dc8133d542e744f5ff6102710',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/5/50/Flag_of_Burundi.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/0/01/Flag_of_Niue.svg',\n",
" 'path': 'files/flags/Flag_of_Burundi.svg',\n", " 'path': 'files/flags/Flag_of_Niue.svg',\n",
" 'checksum': '08f6719ece3f3a45661e629e8a7a3dc4',\n", " 'checksum': 'ce971e9afe79c9a63fd706a617b34ce2',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/b/bf/Flag_of_Bosnia_and_Herzegovina.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/4/4e/Flag_of_Uganda.svg',\n",
" 'path': 'files/flags/Flag_of_Bosnia_and_Herzegovina.svg',\n", " 'path': 'files/flags/Flag_of_Uganda.svg',\n",
" 'checksum': '38a248a0da355ec7d6591e67489ed08b',\n", " 'checksum': 'f8bb736e5832232610b5b65dd3c0a121',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/b/bc/Flag_of_Transnistria_%28state%29.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/e/e4/Flag_of_the_Federated_States_of_Micronesia.svg',\n",
" 'path': 'files/flags/Flag_of_Transnistria_%28state%29.svg',\n", " 'path': 'files/flags/Flag_of_the_Federated_States_of_Micronesia.svg',\n",
" 'checksum': 'b94340e38c45f716216d05968158ee9c',\n", " 'checksum': 'cfc3756759f4002983b49217456fc8e4',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/3/38/Flag_of_Tuvalu.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/4/49/Flag_of_Kenya.svg',\n",
" 'path': 'files/flags/Flag_of_Tuvalu.svg',\n", " 'path': 'files/flags/Flag_of_Kenya.svg',\n",
" 'checksum': '096e1716f1863eb3fddc1291434907f7',\n", " 'checksum': 'aa572e0e7ad47c23e37633f1b370da8d',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/f/f4/Flag_of_Niger.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/9/91/Flag_of_Bhutan.svg',\n",
" 'path': 'files/flags/Flag_of_Niger.svg',\n", " 'path': 'files/flags/Flag_of_Bhutan.svg',\n",
" 'checksum': '0c4322271e2aecc54aec87a8935228f0',\n", " 'checksum': 'ce4684f240e15637d2c67eb222d63fe5',\n",
" 'status': 'downloaded'}],\n", " 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/3/3e/Flag_of_New_Zealand.svg',\n", " [{'url': 'https://upload.wikimedia.org/wikipedia/commons/8/85/Flag_of_Belarus.svg',\n",
" 'path': 'files/flags/Flag_of_New_Zealand.svg',\n", " 'path': 'files/flags/Flag_of_Belarus.svg',\n",
" 'checksum': '7d1ea3d5c1fee2c14d81152169da672b',\n", " 'checksum': '22ec6af94d36453ca6e7c0830000a6c1',\n",
" 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/3/30/Flag_of_Nauru.svg',\n",
" 'path': 'files/flags/Flag_of_Nauru.svg',\n",
" 'checksum': '2526a443e0f48c0a8ee7c3314bf15f85',\n",
" 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/5/51/Flag_of_North_Korea.svg',\n",
" 'path': 'files/flags/Flag_of_North_Korea.svg',\n",
" 'checksum': 'a9f988e5c3b8644f1555b6830600581c',\n",
" 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/c/c1/Flag_of_Hungary.svg',\n",
" 'path': 'files/flags/Flag_of_Hungary.svg',\n",
" 'checksum': '164af2ef3ec9e4ddd560d6eb0fe982a2',\n",
" 'status': 'downloaded'}],\n",
" [{'url': 'https://upload.wikimedia.org/wikipedia/commons/f/f6/Flag_of_Denmark_%28state%29.svg',\n",
" 'path': 'files/flags/Flag_of_Denmark_%28state%29.svg',\n",
" 'checksum': 'c39cfa20d6a4bb16e0dadc83c0214013',\n",
" 'status': 'downloaded'}]]" " 'status': 'downloaded'}]]"
] ]
}, },
"execution_count": 17, "execution_count": 30,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }