From 115697682302f453f9cde1e59a1dacf413f1ff73 Mon Sep 17 00:00:00 2001 From: Daniel Tomlinson Date: Sat, 25 Jun 2022 17:01:53 +0100 Subject: [PATCH] chore: add anthems spider --- .../download_anthems.sh | 3 + .../wikipedia_country_scraper/items.py | 9 + .../wikipedia_country_scraper/pipelines.py | 9 + .../wikipedia_country_scraper/settings.py | 20 +- .../spiders/anthems.py | 65 ++++ .../spiders/countrydownloader.py | 11 + docs/scraping.md | 4 + .../Untitled.ipynb | 285 +++++++++--------- 8 files changed, 260 insertions(+), 146 deletions(-) create mode 100755 01_scrapy/wikipedia_country_scraper/download_anthems.sh create mode 100644 01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py diff --git a/01_scrapy/wikipedia_country_scraper/download_anthems.sh b/01_scrapy/wikipedia_country_scraper/download_anthems.sh new file mode 100755 index 0000000..0067468 --- /dev/null +++ b/01_scrapy/wikipedia_country_scraper/download_anthems.sh @@ -0,0 +1,3 @@ +#!/bin/bash +source .venv/bin/activate +scrapy crawl AnthemsSpider diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py index 8560523..c578484 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py @@ -15,3 +15,12 @@ class WikipediaCountryScraperItem(scrapy.Item): file_urls = scrapy.Field() files = scrapy.Field() + + +class AnthemsItem(scrapy.Item): + country_name = scrapy.Field() + native_anthem_title = scrapy.Field() + english_title = scrapy.Field() + + file_urls = scrapy.Field() + files = scrapy.Field() diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py index c4e4160..4bcaef9 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py @@ -26,3 +26,12 @@ class WikipediaCountryScraperFilesPipeline(FilesPipeline): return f"files/flags/{filename}" elif filename.endswith(".ogg") or filename.endswith("oga"): return f"files/anthems/{filename}" + + +class AnthemDownloadFilesPipeline(FilesPipeline): + def file_path(self, request, response=None, info=None, *, item=None): + flag_filename = re.search(r"([^\/]*)$", request.url) + + if isinstance(flag_filename, re.Match): + if (filename := flag_filename[1]).endswith("ogg") or filename.endswith("oga"): + return f"files/anthems/{filename}" diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py index 6193f0f..3b99f44 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py @@ -65,10 +65,10 @@ DOWNLOADER_MIDDLEWARES = { # Configure item pipelines # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - "wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 300, - # "scrapy.pipelines.files.FilesPipeline": 1 -} +# ITEM_PIPELINES = { +# "wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 300, +# # "scrapy.pipelines.files.FilesPipeline": 1 +# } FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy" / "raw_country_data") # Enable and configure the AutoThrottle extension (disabled by default) @@ -92,10 +92,10 @@ FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy # HTTPCACHE_IGNORE_HTTP_CODES = [] # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' -FEEDS = { - pathlib.Path(__file__).resolve().parents[3] - / "data" - / "scrapy" - / "raw_country_data" - / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} +# FEEDS = { +# pathlib.Path(__file__).resolve().parents[3] +# / "data" +# / "scrapy" +# / "raw_country_data" +# / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} } diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py new file mode 100644 index 0000000..30710a9 --- /dev/null +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py @@ -0,0 +1,65 @@ +import pathlib +import re + +import scrapy +from scrapy.http import TextResponse + +from wikipedia_country_scraper.items import AnthemsItem + + +class AnthemsSpider(scrapy.Spider): + name = "anthems" + start_urls = ["https://en.wikipedia.org/wiki/List_of_national_anthems"] + custom_settings = { + "ITEM_PIPELINES": {"wikipedia_country_scraper.pipelines.AnthemDownloadFilesPipeline": 100}, + "FEEDS": { + pathlib.Path(__file__).resolve().parents[4] + / "data" + / "scrapy" + / "raw_country_data" + / "anthems.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} + }, + } + + def parse(self, response: TextResponse): + country_names = [] + native_anthem_titles = [] + english_titles = [] + anthem_urls = [] + + _country_names = response.xpath("//table[contains(@class, 'wikitable')]/tbody/tr/th[1]") + for link in _country_names: + if (country_name := link.xpath("a/text()").get()) is not None: + country_names.append(country_name) + + _native_anthem_titles = response.xpath("//table[contains(@class, 'wikitable')]/tbody/tr/td[1]") + for link in _native_anthem_titles: + titles = link.xpath("a/text()").getall() + native_anthem_title = titles[0] if len(titles) == 0 else "\n".join(titles) + native_anthem_titles.append(native_anthem_title) + + for link in _native_anthem_titles: + if (english_title := link.xpath("small/text()").get()) is not None: + english_titles.append(re.search(r"(?:[\W]*)(?P[^\"]*)", english_title)["title"]) + else: + english_titles.append(None) + + _country_names = response.xpath("//table[contains(@class, 'wikitable')]/tbody") + for index, link in enumerate(_country_names): + if index == 0: + recognised_countries = link.xpath("tr/td[5]") + anthem_urls.extend(anthem_url.xpath("a/@href").get() for anthem_url in recognised_countries) + elif index == 1: + partially_recognised_countries = link.xpath("tr/td[6]") + anthem_urls.extend(anthem_url.xpath("a/@href").get() for anthem_url in partially_recognised_countries) + + for country_name, native_anthem_title, english_title, anthem_url in zip( + country_names, native_anthem_titles, english_titles, anthem_urls + ): + anthem_item = AnthemsItem() + anthem_item["country_name"] = country_name + anthem_item["native_anthem_title"] = native_anthem_title + anthem_item["english_title"] = english_title + anthem_item["file_urls"] = [f"https://en.wikipedia.org{anthem_url}" if anthem_url is not None else None] + + yield anthem_item diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py index e42b339..211cde2 100644 --- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py +++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py @@ -1,5 +1,6 @@ from __future__ import annotations +import pathlib import re import scrapy @@ -10,6 +11,16 @@ from wikipedia_country_scraper.items import WikipediaCountryScraperItem class CountrydownloaderSpider(scrapy.Spider): name = "CountrydownloaderSpider" + custom_settings = { + "ITEM_PIPELINES": {"wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 100}, + "FEEDS": { + pathlib.Path(__file__).resolve().parents[4] + / "data" + / "scrapy" + / "raw_country_data" + / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2} + }, + } def start_requests(self): return [ diff --git a/docs/scraping.md b/docs/scraping.md index 7d3fb87..9f16bb5 100644 --- a/docs/scraping.md +++ b/docs/scraping.md @@ -23,9 +23,13 @@ Using selectors: Download files/images: <https://docs.scrapy.org/en/latest/topics/media-pipeline.html> +Setting pipelines per spider: +<https://stackoverflow.com/a/34647090> Exporting JSON: <https://docs.scrapy.org/en/latest/topics/feed-exports.html#std-setting-FEEDS> +Setting exports per spider: +<https://stackoverflow.com/a/53322959> ### new project diff --git a/playground/downloaded_data_inspection_lab/Untitled.ipynb b/playground/downloaded_data_inspection_lab/Untitled.ipynb index 5f91a00..896acc4 100644 --- a/playground/downloaded_data_inspection_lab/Untitled.ipynb +++ b/playground/downloaded_data_inspection_lab/Untitled.ipynb @@ -2,15 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 38, + "execution_count": 1, "id": "d8185790-0793-4881-99e8-6730f95a8006", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:31:57.297266Z", - "iopub.status.busy": "2022-06-24T21:31:57.284090Z", - "iopub.status.idle": "2022-06-24T21:31:57.366471Z", - "shell.execute_reply": "2022-06-24T21:31:57.365193Z", - "shell.execute_reply.started": "2022-06-24T21:31:57.293844Z" + "iopub.execute_input": "2022-06-24T22:04:54.386982Z", + "iopub.status.busy": "2022-06-24T22:04:54.386313Z", + "iopub.status.idle": "2022-06-24T22:04:54.854521Z", + "shell.execute_reply": "2022-06-24T22:04:54.853581Z", + "shell.execute_reply.started": "2022-06-24T22:04:54.386910Z" }, "tags": [] }, @@ -24,15 +24,15 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 2, "id": "ea2b3e33-d58e-4e30-a0cc-8218a1f252c9", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:31:58.025200Z", - "iopub.status.busy": "2022-06-24T21:31:58.024201Z", - "iopub.status.idle": "2022-06-24T21:31:58.108904Z", - "shell.execute_reply": "2022-06-24T21:31:58.107402Z", - "shell.execute_reply.started": "2022-06-24T21:31:58.025121Z" + "iopub.execute_input": "2022-06-24T22:04:55.458615Z", + "iopub.status.busy": "2022-06-24T22:04:55.457695Z", + "iopub.status.idle": "2022-06-24T22:04:55.475878Z", + "shell.execute_reply": "2022-06-24T22:04:55.474706Z", + "shell.execute_reply.started": "2022-06-24T22:04:55.458548Z" }, "tags": [] }, @@ -43,7 +43,7 @@ "[None]" ] }, - "execution_count": 39, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -58,15 +58,15 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 3, "id": "36149580-91d9-431d-99c3-51feee829e79", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:31:58.650508Z", - "iopub.status.busy": "2022-06-24T21:31:58.650001Z", - "iopub.status.idle": "2022-06-24T21:31:58.670264Z", - "shell.execute_reply": "2022-06-24T21:31:58.669296Z", - "shell.execute_reply.started": "2022-06-24T21:31:58.650473Z" + "iopub.execute_input": "2022-06-24T22:04:56.134416Z", + "iopub.status.busy": "2022-06-24T22:04:56.133745Z", + "iopub.status.idle": "2022-06-24T22:04:56.140326Z", + "shell.execute_reply": "2022-06-24T22:04:56.138507Z", + "shell.execute_reply.started": "2022-06-24T22:04:56.134371Z" }, "tags": [] }, @@ -79,15 +79,15 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 4, "id": "d03be94e-8642-4916-8a43-1711e0c21b36", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:52:35.726961Z", - "iopub.status.busy": "2022-06-24T21:52:35.726356Z", - "iopub.status.idle": "2022-06-24T21:52:35.823456Z", - "shell.execute_reply": "2022-06-24T21:52:35.822464Z", - "shell.execute_reply.started": "2022-06-24T21:52:35.726932Z" + "iopub.execute_input": "2022-06-24T22:04:56.621163Z", + "iopub.status.busy": "2022-06-24T22:04:56.620692Z", + "iopub.status.idle": "2022-06-24T22:04:56.731001Z", + "shell.execute_reply": "2022-06-24T22:04:56.728392Z", + "shell.execute_reply.started": "2022-06-24T22:04:56.621128Z" }, "tags": [] }, @@ -100,34 +100,48 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 5, "id": "29cca9ea-16d3-4534-8c9e-49fde37f8cdd", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:52:36.453655Z", - "iopub.status.busy": "2022-06-24T21:52:36.452202Z", - "iopub.status.idle": "2022-06-24T21:52:36.555461Z", - "shell.execute_reply": "2022-06-24T21:52:36.554096Z", - "shell.execute_reply.started": "2022-06-24T21:52:36.453559Z" + "iopub.execute_input": "2022-06-24T22:04:57.257218Z", + "iopub.status.busy": "2022-06-24T22:04:57.256573Z", + "iopub.status.idle": "2022-06-24T22:04:57.333032Z", + "shell.execute_reply": "2022-06-24T22:04:57.332120Z", + "shell.execute_reply.started": "2022-06-24T22:04:57.257174Z" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['country_url', 'flag_description_url', 'short_country_name',\n", + " 'country_html', 'flag_html', 'file_urls', 'files'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df = pd.read_json(countries_file)" + "df = pd.read_json(countries_file)\n", + "df.columns" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 6, "id": "ef8bc3ce-08dd-4260-807c-2616b2e1c1ba", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:52:36.950435Z", - "iopub.status.busy": "2022-06-24T21:52:36.949946Z", - "iopub.status.idle": "2022-06-24T21:52:36.959935Z", - "shell.execute_reply": "2022-06-24T21:52:36.958581Z", - "shell.execute_reply.started": "2022-06-24T21:52:36.950398Z" + "iopub.execute_input": "2022-06-24T22:04:59.223608Z", + "iopub.status.busy": "2022-06-24T22:04:59.222961Z", + "iopub.status.idle": "2022-06-24T22:04:59.229384Z", + "shell.execute_reply": "2022-06-24T22:04:59.228618Z", + "shell.execute_reply.started": "2022-06-24T22:04:59.223578Z" }, "tags": [] }, @@ -151,15 +165,15 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 7, "id": "48db8f93-659b-45a4-8477-a7cec139bebc", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:52:37.635010Z", - "iopub.status.busy": "2022-06-24T21:52:37.634417Z", - "iopub.status.idle": "2022-06-24T21:52:37.645162Z", - "shell.execute_reply": "2022-06-24T21:52:37.643796Z", - "shell.execute_reply.started": "2022-06-24T21:52:37.634953Z" + "iopub.execute_input": "2022-06-24T22:04:59.710467Z", + "iopub.status.busy": "2022-06-24T22:04:59.709874Z", + "iopub.status.idle": "2022-06-24T22:04:59.720517Z", + "shell.execute_reply": "2022-06-24T22:04:59.717623Z", + "shell.execute_reply.started": "2022-06-24T22:04:59.710431Z" }, "tags": [] }, @@ -183,15 +197,15 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 8, "id": "a52f6aa2-5bbd-46e4-9b2f-cdbd7269cb6e", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:52:49.070882Z", - "iopub.status.busy": "2022-06-24T21:52:49.070107Z", - "iopub.status.idle": "2022-06-24T21:52:49.076033Z", - "shell.execute_reply": "2022-06-24T21:52:49.075243Z", - "shell.execute_reply.started": "2022-06-24T21:52:49.070853Z" + "iopub.execute_input": "2022-06-24T22:04:59.950051Z", + "iopub.status.busy": "2022-06-24T22:04:59.949622Z", + "iopub.status.idle": "2022-06-24T22:04:59.956484Z", + "shell.execute_reply": "2022-06-24T22:04:59.955471Z", + "shell.execute_reply.started": "2022-06-24T22:04:59.950016Z" }, "tags": [] }, @@ -201,8 +215,8 @@ "output_type": "stream", "text": [ "(206,)\n", - "[False True]\n", - "[False True]\n" + "[False]\n", + "[False]\n" ] } ], @@ -215,15 +229,15 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 9, "id": "643e6512-1e5b-4eb2-9f0a-6b680ada787b", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:53:15.797108Z", - "iopub.status.busy": "2022-06-24T21:53:15.796761Z", - "iopub.status.idle": "2022-06-24T21:53:15.809904Z", - "shell.execute_reply": "2022-06-24T21:53:15.809157Z", - "shell.execute_reply.started": "2022-06-24T21:53:15.797079Z" + "iopub.execute_input": "2022-06-24T22:05:00.166633Z", + "iopub.status.busy": "2022-06-24T22:05:00.166278Z", + "iopub.status.idle": "2022-06-24T22:05:00.178277Z", + "shell.execute_reply": "2022-06-24T22:05:00.177378Z", + "shell.execute_reply.started": "2022-06-24T22:05:00.166609Z" }, "tags": [] }, @@ -250,7 +264,7 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>country_url</th>\n", - " <th>flag_image_url</th>\n", + " <th>flag_description_url</th>\n", " <th>short_country_name</th>\n", " <th>country_html</th>\n", " <th>flag_html</th>\n", @@ -259,38 +273,17 @@ " </tr>\n", " </thead>\n", " <tbody>\n", - " <tr>\n", - " <th>84</th>\n", - " <td>https://en.wikipedia.org/wiki/Paraguay</td>\n", - " <td>https://en.wikipedia.org/wiki/File:Flag_of_Par...</td>\n", - " <td>Paraguay</td>\n", - " <td>[<tr><th colspan=\"2\" class=\"infobox-above adr\"...</td>\n", - " <td>None</td>\n", - " <td>[https:////upload.wikimedia.org/wikipedia/comm...</td>\n", - " <td>[{'url': 'https://upload.wikimedia.org/wikiped...</td>\n", - " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " country_url \\\n", - "84 https://en.wikipedia.org/wiki/Paraguay \n", - "\n", - " flag_image_url short_country_name \\\n", - "84 https://en.wikipedia.org/wiki/File:Flag_of_Par... Paraguay \n", - "\n", - " country_html flag_html \\\n", - "84 [<tr><th colspan=\"2\" class=\"infobox-above adr\"... None \n", - "\n", - " file_urls \\\n", - "84 [https:////upload.wikimedia.org/wikipedia/comm... \n", - "\n", - " files \n", - "84 [{'url': 'https://upload.wikimedia.org/wikiped... " + "Empty DataFrame\n", + "Columns: [country_url, flag_description_url, short_country_name, country_html, flag_html, file_urls, files]\n", + "Index: []" ] }, - "execution_count": 84, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -301,16 +294,17 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 10, "id": "5e21e98a-56ba-4e55-b5d4-89dab2232c29", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:53:31.685434Z", - "iopub.status.busy": "2022-06-24T21:53:31.684830Z", - "iopub.status.idle": "2022-06-24T21:53:31.697841Z", - "shell.execute_reply": "2022-06-24T21:53:31.697000Z", - "shell.execute_reply.started": "2022-06-24T21:53:31.685404Z" - } + "iopub.execute_input": "2022-06-24T22:05:00.714817Z", + "iopub.status.busy": "2022-06-24T22:05:00.714232Z", + "iopub.status.idle": "2022-06-24T22:05:00.728680Z", + "shell.execute_reply": "2022-06-24T22:05:00.727307Z", + "shell.execute_reply.started": "2022-06-24T22:05:00.714774Z" + }, + "tags": [] }, "outputs": [ { @@ -335,7 +329,7 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>country_url</th>\n", - " <th>flag_image_url</th>\n", + " <th>flag_description_url</th>\n", " <th>short_country_name</th>\n", " <th>country_html</th>\n", " <th>flag_html</th>\n", @@ -344,38 +338,17 @@ " </tr>\n", " </thead>\n", " <tbody>\n", - " <tr>\n", - " <th>84</th>\n", - " <td>https://en.wikipedia.org/wiki/Paraguay</td>\n", - " <td>https://en.wikipedia.org/wiki/File:Flag_of_Par...</td>\n", - " <td>Paraguay</td>\n", - " <td>[<tr><th colspan=\"2\" class=\"infobox-above adr\"...</td>\n", - " <td>None</td>\n", - " <td>[https:////upload.wikimedia.org/wikipedia/comm...</td>\n", - " <td>[{'url': 'https://upload.wikimedia.org/wikiped...</td>\n", - " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " country_url \\\n", - "84 https://en.wikipedia.org/wiki/Paraguay \n", - "\n", - " flag_image_url short_country_name \\\n", - "84 https://en.wikipedia.org/wiki/File:Flag_of_Par... Paraguay \n", - "\n", - " country_html flag_html \\\n", - "84 [<tr><th colspan=\"2\" class=\"infobox-above adr\"... None \n", - "\n", - " file_urls \\\n", - "84 [https:////upload.wikimedia.org/wikipedia/comm... \n", - "\n", - " files \n", - "84 [{'url': 'https://upload.wikimedia.org/wikiped... " + "Empty DataFrame\n", + "Columns: [country_url, flag_description_url, short_country_name, country_html, flag_html, file_urls, files]\n", + "Index: []" ] }, - "execution_count": 85, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -386,15 +359,15 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 18, "id": "227b0c76-9e45-4849-849e-36355976cba9", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:54:13.588753Z", - "iopub.status.busy": "2022-06-24T21:54:13.588402Z", - "iopub.status.idle": "2022-06-24T21:54:13.594182Z", - "shell.execute_reply": "2022-06-24T21:54:13.593418Z", - "shell.execute_reply.started": "2022-06-24T21:54:13.588723Z" + "iopub.execute_input": "2022-06-24T22:13:34.716780Z", + "iopub.status.busy": "2022-06-24T22:13:34.716226Z", + "iopub.status.idle": "2022-06-24T22:13:34.734266Z", + "shell.execute_reply": "2022-06-24T22:13:34.733297Z", + "shell.execute_reply.started": "2022-06-24T22:13:34.716742Z" }, "tags": [] }, @@ -402,30 +375,38 @@ { "data": { "text/plain": [ - "'https://en.wikipedia.org/wiki/File:Flag_of_Paraguay.svg'" + "country_url https://en.wikipedia.org/wiki/Paraguay\n", + "flag_description_url https://en.wikipedia.org/wiki/Flag_of_Paraguay\n", + "short_country_name Paraguay\n", + "country_html [<tr><th colspan=\"2\" class=\"infobox-above adr\"...\n", + "flag_html <p>The <b>flag of <a href=\"/wiki/Paraguay\" tit...\n", + "file_urls [https:////upload.wikimedia.org/wikipedia/comm...\n", + "files [{'url': 'https://upload.wikimedia.org/wikiped...\n", + "Name: 84, dtype: object" ] }, - "execution_count": 89, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.iloc[84][\"flag_image_url\"]" + "df.iloc[84]" ] }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 12, "id": "f7712d7d-9074-4fc5-89f2-6e5f47c57d20", "metadata": { "execution": { - "iopub.execute_input": "2022-06-24T21:53:55.941453Z", - "iopub.status.busy": "2022-06-24T21:53:55.940811Z", - "iopub.status.idle": "2022-06-24T21:53:55.947972Z", - "shell.execute_reply": "2022-06-24T21:53:55.947077Z", - "shell.execute_reply.started": "2022-06-24T21:53:55.941423Z" - } + "iopub.execute_input": "2022-06-24T22:05:01.455249Z", + "iopub.status.busy": "2022-06-24T22:05:01.454414Z", + "iopub.status.idle": "2022-06-24T22:05:01.462954Z", + "shell.execute_reply": "2022-06-24T22:05:01.462044Z", + "shell.execute_reply.started": "2022-06-24T22:05:01.455210Z" + }, + "tags": [] }, "outputs": [ { @@ -441,7 +422,7 @@ "[]" ] }, - "execution_count": 87, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -453,8 +434,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "d7e60156-1ee5-4bf9-ab9a-d529ee988301", + "metadata": { + "execution": { + "iopub.execute_input": "2022-06-24T22:07:56.396461Z", + "iopub.status.busy": "2022-06-24T22:07:56.396043Z", + "iopub.status.idle": "2022-06-24T22:07:56.403177Z", + "shell.execute_reply": "2022-06-24T22:07:56.402329Z", + "shell.execute_reply.started": "2022-06-24T22:07:56.396433Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'<tr><th colspan=\"2\" class=\"infobox-above adr\"><div class=\"fn org country-name\">Islamic Emirate of Afghanistan</div><div class=\"ib-country-names\"><div class=\"plainlist\"><ul><li><span title=\"Pashto-language text\"><span lang=\"ps\" dir=\"rtl\" style=\"font-style: normal;\">د افغانستان اسلامي امارت</span></span>\\xa0<span class=\"languageicon\" style=\"font-size:100%; font-weight:normal\">(<a href=\"/wiki/Pashto_language\" class=\"mw-redirect\" title=\"Pashto language\">Pashto</a>)</span><br><span style=\"font-size:85%;\"><span title=\"Pashto-language romanization\"><i lang=\"ps-Latn\">Də Afġānistān Islāmī Imārat</i></span></span></li><li><span title=\"Dari-language text\"><span lang=\"prs\" dir=\"rtl\" style=\"font-style: normal;\">امارت اسلامی افغانستان</span></span>\\xa0<span class=\"languageicon\" style=\"font-size:100%; font-weight:normal\">(<a href=\"/wiki/Dari_language\" class=\"mw-redirect\" title=\"Dari language\">Dari</a>)</span><br><span style=\"font-size:85%;\"><span title=\"Dari-language romanization\"><i lang=\"prs-Latn\">Imārat-i Islāmī-yi Afghānistān</i></span></span></li></ul></div></div></th></tr><tr><td colspan=\"2\" class=\"infobox-image\"><div style=\"display:table; width:100%;\">\\n <div style=\"display:table-cell; vertical-align:middle; padding-left:5px;\">\\n <div style=\"padding-bottom:3px;\"><a href=\"/wiki/File:Flag_of_the_Taliban.svg\" class=\"image\" title=\"Flag of Afghanistan\"><img alt=\"Flag of Afghanistan\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/125px-Flag_of_the_Taliban.svg.png\" decoding=\"async\" width=\"125\" height=\"63\" class=\"thumbborder\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/188px-Flag_of_the_Taliban.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/250px-Flag_of_the_Taliban.svg.png 2x\" data-file-width=\"1000\" data-file-height=\"500\"></a></div>\\n <div><a href=\"/wiki/Flag_of_Afghanistan\" title=\"Flag of Afghanistan\">Flag</a></div>\\n </div>\\n <div style=\"display:table-cell; vertical-align:middle; padding: 0px 5px;\">\\n <div style=\"padding-bottom:3px;\"><a href=\"/wiki/File:Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg\" class=\"image\" title=\"Emblem of Afghanistan\"><img alt=\"Coat of Arms of the Islamic Emirate [1]\" src=\"//upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/85px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png\" decoding=\"async\" width=\"85\" height=\"86\" srcset=\"//upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/128px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/170px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png 2x\" data-file-width=\"315\" data-file-height=\"318\"></a></div>\\n <div><a href=\"/wiki/Emblem_of_Afghanistan\" title=\"Emblem of Afghanistan\">Emblem</a></div>\\n </div>\\n </div></td></tr><tr><td colspan=\"2\" class=\"infobox-full-data anthem\"><b>Anthem:</b>\\xa0<span title=\"Pashto-language text\"><span lang=\"ps\" dir=\"rtl\">دا د باتورانو کور</span></span><br><span title=\"Pashto-language romanization\"><i lang=\"ps-Latn\">Dā Də Bātorāno Kor</i></span><br>\"<a href=\"/wiki/This_is_the_Home_of_the_Brave\" class=\"mw-redirect\" title=\"This is the Home of the Brave\">This is the Home of the Brave</a>\"<sup id=\"cite_ref-Tharoor_2-0\" class=\"reference\"><a href=\"#cite_note-Tharoor-2\">[2]</a></sup></td></tr><tr><td colspan=\"2\" class=\"infobox-full-data\"><div class=\"switcher-container\"><div><a href=\"/wiki/File:Afghanistan_(orthographic_projection).svg\" class=\"image\"><img alt=\"Afghanistan (orthographic projection).svg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/250px-Afghanistan_%28orthographic_projection%29.svg.png\" decoding=\"async\" width=\"250\" height=\"250\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/375px-Afghanistan_%28orthographic_projection%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/500px-Afghanistan_%28orthographic_projection%29.svg.png 2x\" data-file-width=\"553\" data-file-height=\"553\"></a><span class=\"switcher-label\" style=\"display:none\">Afghanistan on the globe</span></div><div><a href=\"/wiki/File:Afghanistan_-_Location_Map_(2013)_-_AFG_-_UNOCHA.svg\" class=\"image\"><img alt=\"Afghanistan - Location Map (2013) - AFG - UNOCHA.svg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/250px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png\" decoding=\"async\" width=\"250\" height=\"250\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/375px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/500px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png 2x\" data-file-width=\"254\" data-file-height=\"254\"></a><span class=\"switcher-label\" style=\"display:none\">Map of Afghanistan</span></div></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Status</th><td class=\"infobox-data\"><a href=\"/wiki/UN_member_state\" class=\"mw-redirect\" title=\"UN member state\">UN member state</a> under an <a href=\"/wiki/Recognition_of_the_Islamic_Emirate_of_Afghanistan\" title=\"Recognition of the Islamic Emirate of Afghanistan\">unrecognized government</a><sup id=\"cite_ref-3\" class=\"reference\"><a href=\"#cite_note-3\">[3]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Capital<div class=\"ib-country-largest\">and largest city</div></th><td class=\"infobox-data\"><a href=\"/wiki/Kabul\" title=\"Kabul\">Kabul</a><br><style data-mw-deduplicate=\"TemplateStyles:r1073938472\">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class=\"plainlinks nourlexpansion\"><a class=\"external text\" href=\"//geohack.toolforge.org/geohack.php?pagename=Afghanistan&params=34_31_N_69_11_E_region:AF_source:geonames_type:city\"><span class=\"geo-default\"><span class=\"geo-dms\" title=\"Maps, aerial photos, and other data for this location\"><span class=\"latitude\">34°31′N</span> <span class=\"longitude\">69°11′E</span></span></span><span class=\"geo-multi-punct\">\\ufeff / \\ufeff</span><span class=\"geo-nondefault\"><span class=\"geo-dec\" title=\"Maps, aerial photos, and other data for this location\">34.517°N 69.183°E</span><span style=\"display:none\">\\ufeff / <span class=\"geo\">34.517; 69.183</span></span></span></a></span><span style=\"font-size: small;\"><span id=\"coordinates\"><a href=\"/wiki/Geographic_coordinate_system\" title=\"Geographic coordinate system\">Coordinates</a>: <link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1073938472\"><span class=\"plainlinks nourlexpansion\"><a class=\"external text\" href=\"//geohack.toolforge.org/geohack.php?pagename=Afghanistan&params=34_31_N_69_11_E_region:AF_source:geonames_type:city\"><span class=\"geo-default\"><span class=\"geo-dms\" title=\"Maps, aerial photos, and other data for this location\"><span class=\"latitude\">34°31′N</span> <span class=\"longitude\">69°11′E</span></span></span><span class=\"geo-multi-punct\">\\ufeff / \\ufeff</span><span class=\"geo-nondefault\"><span class=\"geo-dec\" title=\"Maps, aerial photos, and other data for this location\">34.517°N 69.183°E</span><span style=\"display:none\">\\ufeff / <span class=\"geo\">34.517; 69.183</span></span></span></a></span></span></span><sup id=\"cite_ref-4\" class=\"reference\"><a href=\"#cite_note-4\">[4]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Languages_of_Afghanistan\" title=\"Languages of Afghanistan\">Major languages</a></th><td class=\"infobox-data\"><div class=\"hlist hlist-separated\"><ul><li><a href=\"/wiki/Pashto\" title=\"Pashto\">Pashto</a></li><li><a href=\"/wiki/Dari\" title=\"Dari\">Dari</a></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Ethnic_group\" title=\"Ethnic group\">Ethnic\\xa0groups</a> <div class=\"ib-country-ethnic\"> (2019 unofficial estimates)<sup id=\"cite_ref-6\" class=\"reference\"><a href=\"#cite_note-6\">[a]</a></sup><sup id=\"cite_ref-7\" class=\"reference\"><a href=\"#cite_note-7\">[6]</a></sup><sup id=\"cite_ref-:2_8-0\" class=\"reference\"><a href=\"#cite_note-:2-8\">[7]</a></sup><sup id=\"cite_ref-9\" class=\"reference\"><a href=\"#cite_note-9\">[8]</a></sup><sup id=\"cite_ref-10\" class=\"reference\"><a href=\"#cite_note-10\">[9]</a></sup></div></th><td class=\"infobox-data\"><div class=\"plainlist\"><ul><li>42% <a href=\"/wiki/Pashtun\" class=\"mw-redirect\" title=\"Pashtun\">Pashtun</a></li><li>27% <a href=\"/wiki/Tajiks\" title=\"Tajiks\">Tajik</a></li><li><span class=\"nowrap\">\\u2007</span>9% <a href=\"/wiki/Hazaras\" title=\"Hazaras\">Hazara</a></li><li><span class=\"nowrap\">\\u2007</span>9% <a href=\"/wiki/Uzbeks\" title=\"Uzbeks\">Uzbek</a></li><li><span class=\"nowrap\">\\u2007</span>4% <a href=\"/wiki/Aimaq_people\" title=\"Aimaq people\">Aimaq</a></li><li><span class=\"nowrap\">\\u2007</span>3% <a href=\"/wiki/Turkmen_people\" class=\"mw-redirect\" title=\"Turkmen people\">Turkmen</a></li><li><span class=\"nowrap\">\\u2007</span>2% <a href=\"/wiki/Baloch_people\" title=\"Baloch people\">Baloch</a></li><li><span class=\"nowrap\">\\u2007</span>4% <a href=\"/wiki/Ethnic_groups_in_Afghanistan\" title=\"Ethnic groups in Afghanistan\">Others</a></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Religion <div class=\"ib-country-religion\"></div></th><td class=\"infobox-data\"><div class=\"plainlist\"><ul><li>99.7% <a href=\"/wiki/Islam_in_Afghanistan\" title=\"Islam in Afghanistan\">Islam</a> (<a href=\"/wiki/State_religion\" title=\"State religion\">official</a>)</li><li>0.3% <a href=\"/wiki/Demographics_of_Afghanistan#Religion\" title=\"Demographics of Afghanistan\">Others</a> (2009 estimate)<sup id=\"cite_ref-Factbook_11-0\" class=\"reference\"><a href=\"#cite_note-Factbook-11\">[10]</a></sup></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Demonym\" title=\"Demonym\">Demonym(s)</a></th><td class=\"infobox-data\"><a href=\"/wiki/Afghans\" title=\"Afghans\">Afghan</a><sup id=\"cite_ref-Demonym_14-0\" class=\"reference\"><a href=\"#cite_note-Demonym-14\">[b]</a></sup><sup id=\"cite_ref-Constitution_of_Afghanistan_15-0\" class=\"reference\"><a href=\"#cite_note-Constitution_of_Afghanistan-15\">[13]</a></sup><sup id=\"cite_ref-16\" class=\"reference\"><a href=\"#cite_note-16\">[14]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Politics_of_Afghanistan\" title=\"Politics of Afghanistan\">Government</a></th><td class=\"infobox-data\"><a href=\"/wiki/Unitary_state\" title=\"Unitary state\">Unitary</a> <a href=\"/wiki/Provisional_government\" title=\"Provisional government\">provisional</a> <a href=\"/wiki/Theocratic\" class=\"mw-redirect\" title=\"Theocratic\">theocratic</a> <a href=\"/wiki/Islamic_state\" title=\"Islamic state\">Islamic</a> <a href=\"/wiki/Emirate\" title=\"Emirate\">emirate</a><sup id=\"cite_ref-17\" class=\"reference\"><a href=\"#cite_note-17\">[15]</a></sup><sup id=\"cite_ref-18\" class=\"reference\"><a href=\"#cite_note-18\">[16]</a></sup><sup id=\"cite_ref-CTC_Sentinel_19-0\" class=\"reference\"><a href=\"#cite_note-CTC_Sentinel-19\">[17]</a></sup></td></tr><tr class=\"mergedrow\"><td colspan=\"2\" class=\"infobox-full-data\"><link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1066479718\"></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Leader_of_the_Islamic_Emirate_of_Afghanistan\" title=\"Leader of the Islamic Emirate of Afghanistan\">Leader</a> </div></th><td class=\"infobox-data\"><span class=\"nowrap\"><a href=\"/wiki/Hibatullah_Akhundzada\" title=\"Hibatullah Akhundzada\">Hibatullah Akhundzada</a></span></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Prime_Minister_of_Afghanistan\" title=\"Prime Minister of Afghanistan\">Prime Minister</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Hasan_Akhund\" title=\"Hasan Akhund\">Hasan Akhund</a> (<a href=\"/wiki/Acting_prime_minister\" title=\"Acting prime minister\">acting</a>)</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Chief_Justice_of_Afghanistan\" title=\"Chief Justice of Afghanistan\">Chief Justice</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Abdul_Hakim_Ishaqzai\" title=\"Abdul Hakim Ishaqzai\">Abdul Hakim Ishaqzai</a></td></tr><tr style=\"display:none\"><td colspan=\"2\">\\n</td></tr><tr><th scope=\"row\" class=\"infobox-label\">Legislature</th><td class=\"infobox-data\"><a href=\"/wiki/Leadership_Council_of_Afghanistan\" title=\"Leadership Council of Afghanistan\">Leadership Council</a> (consultative body)<sup id=\"cite_ref-20\" class=\"reference\"><a href=\"#cite_note-20\">[18]</a></sup></td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/History_of_Afghanistan\" title=\"History of Afghanistan\">Formation</a></th></tr><tr class=\"mergedrow\"><td colspan=\"2\" class=\"infobox-full-data\"><link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1066479718\"></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Hotak_dynasty\" title=\"Hotak dynasty\">Hotak Empire</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Mirwais_Hotak\" title=\"Mirwais Hotak\">1709</a>–<a href=\"/wiki/Siege_of_Kandahar\" title=\"Siege of Kandahar\">1738</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<span class=\"nowrap\"><a href=\"/wiki/Durrani_Empire\" title=\"Durrani Empire\">Durrani Empire</a></span> </div></th><td class=\"infobox-data\">1747–1823</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Emirate_of_Afghanistan\" title=\"Emirate of Afghanistan\">Emirate</a> </div></th><td class=\"infobox-data\">1823–1839</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Durrani_Empire\" title=\"Durrani Empire\">Restoration of the Durrani Kingdom</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/First_Anglo-Afghan_War\" title=\"First Anglo-Afghan War\">1839–1842</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Emirate_of_Afghanistan\" title=\"Emirate of Afghanistan\">Restoration of the Emirate</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/First_Anglo-Afghan_War\" title=\"First Anglo-Afghan War\">1842–1926</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Dost_Mohammad_Khan\" title=\"Dost Mohammad Khan\">Dost Mohammad unites Afghanistan</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Herat_Campaign_of_1862-63\" class=\"mw-redirect\" title=\"Herat Campaign of 1862-63\">27 May 1863</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Treaty_of_Gandamak\" title=\"Treaty of Gandamak\">Anglo-Afghan Agreement</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Second_Anglo-Afghan_War\" title=\"Second Anglo-Afghan War\">26 May 1879</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Third_Anglo-Afghan_War\" title=\"Third Anglo-Afghan War\">Independence</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Afghan_Independence_Day\" title=\"Afghan Independence Day\">19 August 1919</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Kingdom_of_Afghanistan\" title=\"Kingdom of Afghanistan\">Kingdom</a> </div></th><td class=\"infobox-data\">9 June 1926</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Republic_of_Afghanistan_(1973%E2%80%931978)\" title=\"Republic of Afghanistan (1973–1978)\">Republic</a> </div></th><td class=\"infobox-data\">17 July 1973</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Democratic_Republic_of_Afghanistan\" title=\"Democratic Republic of Afghanistan\">Democratic Republic</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Saur_Revolution\" title=\"Saur Revolution\">27–28 April 1978</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Islamic_State_of_Afghanistan\" title=\"Islamic State of Afghanistan\">Islamic State</a> </div></th><td class=\"infobox-data\">28 April 1992</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Islamic_Emirate_of_Afghanistan_(1996%E2%80%932001)\" title=\"Islamic Emirate of Afghanistan (1996–2001)\">Islamic Emirate</a> </div></th><td class=\"infobox-data\">27 September 1996</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<span class=\"nowrap\"><a href=\"/wiki/Islamic_Republic_of_Afghanistan\" title=\"Islamic Republic of Afghanistan\">Islamic Republic</a></span> </div></th><td class=\"infobox-data\">26 January 2004</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Fall_of_Kabul_(2021)\" title=\"Fall of Kabul (2021)\">Restoration of Islamic Emirate</a> </div></th><td class=\"infobox-data\">15 August 2021</td></tr><tr style=\"display:none\"><td colspan=\"2\">\\n</td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/Geography_of_Afghanistan\" title=\"Geography of Afghanistan\">Area </a></th></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">652,867<sup id=\"cite_ref-21\" class=\"reference\"><a href=\"#cite_note-21\">[19]</a></sup>\\xa0km<sup>2</sup> (252,073\\xa0sq\\xa0mi) (<a href=\"/wiki/List_of_countries_and_dependencies_by_area\" title=\"List of countries and dependencies by area\">40th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Water\\xa0(%)</div></th><td class=\"infobox-data\">negligible</td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/Demographics_of_Afghanistan\" title=\"Demographics of Afghanistan\">Population</a></th></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa02021 estimate</div></th><td class=\"infobox-data\">40,218,234<sup id=\"cite_ref-:2_8-1\" class=\"reference\"><a href=\"#cite_note-:2-8\">[7]</a></sup> (<a href=\"/wiki/List_of_countries_and_dependencies_by_population\" title=\"List of countries and dependencies by population\">37th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Density</div></th><td class=\"infobox-data\">48.08/km<sup>2</sup> (124.5/sq\\xa0mi) (<a href=\"/wiki/List_of_countries_and_dependencies_by_population_density\" title=\"List of countries and dependencies by population density\">174th</a>)</td></tr><tr class=\"mergedtoprow\"><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Gross_domestic_product\" title=\"Gross domestic product\">GDP</a>\\xa0<style data-mw-deduplicate=\"TemplateStyles:r886047488\">.mw-parser-output .nobold{font-weight:normal}</style><span class=\"nobold\">(<a href=\"/wiki/Purchasing_power_parity\" title=\"Purchasing power parity\">PPP</a>)</span></th><td class=\"infobox-data\">2018\\xa0estimate</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">$72.911\\xa0billion<sup id=\"cite_ref-imf2_22-0\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(PPP)\" title=\"List of countries by GDP (PPP)\">96th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Per capita</div></th><td class=\"infobox-data\">$2,024<sup id=\"cite_ref-imf2_22-1\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(PPP)_per_capita\" title=\"List of countries by GDP (PPP) per capita\">169th</a>)</td></tr><tr class=\"mergedtoprow\"><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Gross_domestic_product\" title=\"Gross domestic product\">GDP</a>\\xa0<link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r886047488\"><span class=\"nobold\">(nominal)</span></th><td class=\"infobox-data\">2018\\xa0estimate</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">$21.657\\xa0billion<sup id=\"cite_ref-imf2_22-2\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(nominal)\" title=\"List of countries by GDP (nominal)\">111st</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Per capita</div></th><td class=\"infobox-data\">$493<sup id=\"cite_ref-imf2_22-3\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(nominal)_per_capita\" title=\"List of countries by GDP (nominal) per capita\">177th</a>)</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Human_Development_Index\" title=\"Human Development Index\">HDI</a>\\xa0<link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r886047488\"><span class=\"nobold\">(2019)</span></th><td class=\"infobox-data\"><img alt=\"Increase\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/11px-Increase2.svg.png\" decoding=\"async\" title=\"Increase\" width=\"11\" height=\"11\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/17px-Increase2.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/22px-Increase2.svg.png 2x\" data-file-width=\"300\" data-file-height=\"300\">\\xa00.511<sup id=\"cite_ref-UNHDR_23-0\" class=\"reference\"><a href=\"#cite_note-UNHDR-23\">[21]</a></sup><br><span class=\"nowrap\"><span style=\"color:red\">low</span></span>\\xa0·\\xa0<a href=\"/wiki/List_of_countries_by_Human_Development_Index\" title=\"List of countries by Human Development Index\">169th</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Currency</th><td class=\"infobox-data\"><a href=\"/wiki/Afghan_afghani\" title=\"Afghan afghani\">Afghani</a> (<span title=\"Dari-language text\"><span lang=\"prs\" dir=\"rtl\">افغانی</span></span>) (<a href=\"/wiki/ISO_4217\" title=\"ISO 4217\">AFN</a>)</td></tr><tr><th scope=\"row\" class=\"infobox-label\">Time zone</th><td class=\"infobox-data\"><span class=\"nowrap\"><a href=\"/wiki/Coordinated_Universal_Time\" title=\"Coordinated Universal Time\">UTC</a>+4:30<br><a href=\"/wiki/Solar_Hijri_calendar\" title=\"Solar Hijri calendar\">Solar Calendar</a></span> (D†)</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Left-_and_right-hand_traffic\" title=\"Left- and right-hand traffic\">Driving side</a></th><td class=\"infobox-data\">right</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Telephone_numbers_in_Afghanistan\" title=\"Telephone numbers in Afghanistan\">Calling code</a></th><td class=\"infobox-data\"><a href=\"/wiki/Telephone_numbers_in_Afghanistan\" title=\"Telephone numbers in Afghanistan\">+93</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/ISO_3166\" title=\"ISO 3166\">ISO 3166 code</a></th><td class=\"infobox-data\"><a href=\"/wiki/ISO_3166-2:AF\" title=\"ISO 3166-2:AF\">AF</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Country_code_top-level_domain\" title=\"Country code top-level domain\">Internet TLD</a></th><td class=\"infobox-data\"><a href=\"/wiki/.af\" title=\".af\">.af</a><br><a href=\"/wiki/%D8%A7%D9%81%D8%BA%D8%A7%D9%86%D8%B3%D8%AA%D8%A7%D9%86.\" class=\"mw-redirect\" title=\"افغانستان.\">افغانستان.</a></td></tr>'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df.iloc[0].country_html)\n", + "content = \"\".join(df.iloc[0].country_html)\n", + "content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97c1e41f-30f3-4116-aa11-5797e05b95ba", "metadata": {}, "outputs": [], "source": []