Files
geography-anki/playground/downloaded_data_inspection_lab/Untitled.ipynb

500 lines
39 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d8185790-0793-4881-99e8-6730f95a8006",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:54.386982Z",
"iopub.status.busy": "2022-06-24T22:04:54.386313Z",
"iopub.status.idle": "2022-06-24T22:04:54.854521Z",
"shell.execute_reply": "2022-06-24T22:04:54.853581Z",
"shell.execute_reply.started": "2022-06-24T22:04:54.386910Z"
},
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"import pathlib\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ea2b3e33-d58e-4e30-a0cc-8218a1f252c9",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:55.458615Z",
"iopub.status.busy": "2022-06-24T22:04:55.457695Z",
"iopub.status.idle": "2022-06-24T22:04:55.475878Z",
"shell.execute_reply": "2022-06-24T22:04:55.474706Z",
"shell.execute_reply.started": "2022-06-24T22:04:55.458548Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[None]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd_options = {\n",
" \"display.max_rows\": None,\n",
"}\n",
"\n",
"[pd.set_option(option, value) for option, value in pd_options.items()]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "36149580-91d9-431d-99c3-51feee829e79",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:56.134416Z",
"iopub.status.busy": "2022-06-24T22:04:56.133745Z",
"iopub.status.idle": "2022-06-24T22:04:56.140326Z",
"shell.execute_reply": "2022-06-24T22:04:56.138507Z",
"shell.execute_reply.started": "2022-06-24T22:04:56.134371Z"
},
"tags": []
},
"outputs": [],
"source": [
"data_directory = (\n",
" pathlib.Path(\".\").resolve().parents[1] / \"data\" / \"scrapy\" / \"raw_country_data\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d03be94e-8642-4916-8a43-1711e0c21b36",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:56.621163Z",
"iopub.status.busy": "2022-06-24T22:04:56.620692Z",
"iopub.status.idle": "2022-06-24T22:04:56.731001Z",
"shell.execute_reply": "2022-06-24T22:04:56.728392Z",
"shell.execute_reply.started": "2022-06-24T22:04:56.621128Z"
},
"tags": []
},
"outputs": [],
"source": [
"countries_file = data_directory / \"countries.json\"\n",
"countries = json.loads(countries_file.read_text())\n",
"# countries"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "29cca9ea-16d3-4534-8c9e-49fde37f8cdd",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:57.257218Z",
"iopub.status.busy": "2022-06-24T22:04:57.256573Z",
"iopub.status.idle": "2022-06-24T22:04:57.333032Z",
"shell.execute_reply": "2022-06-24T22:04:57.332120Z",
"shell.execute_reply.started": "2022-06-24T22:04:57.257174Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['country_url', 'flag_description_url', 'short_country_name',\n",
" 'country_html', 'flag_html', 'file_urls', 'files'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_json(countries_file)\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ef8bc3ce-08dd-4260-807c-2616b2e1c1ba",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:59.223608Z",
"iopub.status.busy": "2022-06-24T22:04:59.222961Z",
"iopub.status.idle": "2022-06-24T22:04:59.229384Z",
"shell.execute_reply": "2022-06-24T22:04:59.228618Z",
"shell.execute_reply.started": "2022-06-24T22:04:59.223578Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(206,)\n",
"[False]\n",
"[False]\n"
]
}
],
"source": [
"country_url = df[\"country_url\"]\n",
"print(country_url.shape)\n",
"print(country_url.isnull().unique())\n",
"print(country_url.isna().unique())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "48db8f93-659b-45a4-8477-a7cec139bebc",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:59.710467Z",
"iopub.status.busy": "2022-06-24T22:04:59.709874Z",
"iopub.status.idle": "2022-06-24T22:04:59.720517Z",
"shell.execute_reply": "2022-06-24T22:04:59.717623Z",
"shell.execute_reply.started": "2022-06-24T22:04:59.710431Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(206,)\n",
"[False]\n",
"[False]\n"
]
}
],
"source": [
"short_country_name = df[\"short_country_name\"]\n",
"print(short_country_name.shape)\n",
"print(short_country_name.isnull().unique())\n",
"print(short_country_name.isna().unique())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a52f6aa2-5bbd-46e4-9b2f-cdbd7269cb6e",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:04:59.950051Z",
"iopub.status.busy": "2022-06-24T22:04:59.949622Z",
"iopub.status.idle": "2022-06-24T22:04:59.956484Z",
"shell.execute_reply": "2022-06-24T22:04:59.955471Z",
"shell.execute_reply.started": "2022-06-24T22:04:59.950016Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(206,)\n",
"[False]\n",
"[False]\n"
]
}
],
"source": [
"flag_html = df[\"flag_html\"]\n",
"print(flag_html.shape)\n",
"print(flag_html.isnull().unique())\n",
"print(flag_html.isna().unique())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "643e6512-1e5b-4eb2-9f0a-6b680ada787b",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:05:00.166633Z",
"iopub.status.busy": "2022-06-24T22:05:00.166278Z",
"iopub.status.idle": "2022-06-24T22:05:00.178277Z",
"shell.execute_reply": "2022-06-24T22:05:00.177378Z",
"shell.execute_reply.started": "2022-06-24T22:05:00.166609Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country_url</th>\n",
" <th>flag_description_url</th>\n",
" <th>short_country_name</th>\n",
" <th>country_html</th>\n",
" <th>flag_html</th>\n",
" <th>file_urls</th>\n",
" <th>files</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [country_url, flag_description_url, short_country_name, country_html, flag_html, file_urls, files]\n",
"Index: []"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"flag_html\"].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5e21e98a-56ba-4e55-b5d4-89dab2232c29",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:05:00.714817Z",
"iopub.status.busy": "2022-06-24T22:05:00.714232Z",
"iopub.status.idle": "2022-06-24T22:05:00.728680Z",
"shell.execute_reply": "2022-06-24T22:05:00.727307Z",
"shell.execute_reply.started": "2022-06-24T22:05:00.714774Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country_url</th>\n",
" <th>flag_description_url</th>\n",
" <th>short_country_name</th>\n",
" <th>country_html</th>\n",
" <th>flag_html</th>\n",
" <th>file_urls</th>\n",
" <th>files</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [country_url, flag_description_url, short_country_name, country_html, flag_html, file_urls, files]\n",
"Index: []"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"flag_html\"].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "227b0c76-9e45-4849-849e-36355976cba9",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:13:34.716780Z",
"iopub.status.busy": "2022-06-24T22:13:34.716226Z",
"iopub.status.idle": "2022-06-24T22:13:34.734266Z",
"shell.execute_reply": "2022-06-24T22:13:34.733297Z",
"shell.execute_reply.started": "2022-06-24T22:13:34.716742Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"country_url https://en.wikipedia.org/wiki/Paraguay\n",
"flag_description_url https://en.wikipedia.org/wiki/Flag_of_Paraguay\n",
"short_country_name Paraguay\n",
"country_html [<tr><th colspan=\"2\" class=\"infobox-above adr\"...\n",
"flag_html <p>The <b>flag of <a href=\"/wiki/Paraguay\" tit...\n",
"file_urls [https:////upload.wikimedia.org/wikipedia/comm...\n",
"files [{'url': 'https://upload.wikimedia.org/wikiped...\n",
"Name: 84, dtype: object"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[84]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f7712d7d-9074-4fc5-89f2-6e5f47c57d20",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:05:01.455249Z",
"iopub.status.busy": "2022-06-24T22:05:01.454414Z",
"iopub.status.idle": "2022-06-24T22:05:01.462954Z",
"shell.execute_reply": "2022-06-24T22:05:01.462044Z",
"shell.execute_reply.started": "2022-06-24T22:05:01.455210Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"206\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(len([file for file in df.files if len(file) != 0]))\n",
"[file for file in df.files if len(file) == 0]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d7e60156-1ee5-4bf9-ab9a-d529ee988301",
"metadata": {
"execution": {
"iopub.execute_input": "2022-06-24T22:07:56.396461Z",
"iopub.status.busy": "2022-06-24T22:07:56.396043Z",
"iopub.status.idle": "2022-06-24T22:07:56.403177Z",
"shell.execute_reply": "2022-06-24T22:07:56.402329Z",
"shell.execute_reply.started": "2022-06-24T22:07:56.396433Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'<tr><th colspan=\"2\" class=\"infobox-above adr\"><div class=\"fn org country-name\">Islamic Emirate of Afghanistan</div><div class=\"ib-country-names\"><div class=\"plainlist\"><ul><li><span title=\"Pashto-language text\"><span lang=\"ps\" dir=\"rtl\" style=\"font-style: normal;\">د افغانستان اسلامي امارت</span></span>\\xa0<span class=\"languageicon\" style=\"font-size:100%; font-weight:normal\">(<a href=\"/wiki/Pashto_language\" class=\"mw-redirect\" title=\"Pashto language\">Pashto</a>)</span><br><span style=\"font-size:85%;\"><span title=\"Pashto-language romanization\"><i lang=\"ps-Latn\">Də Afġānistān Islāmī Imārat</i></span></span></li><li><span title=\"Dari-language text\"><span lang=\"prs\" dir=\"rtl\" style=\"font-style: normal;\">امارت اسلامی افغانستان</span></span>\\xa0<span class=\"languageicon\" style=\"font-size:100%; font-weight:normal\">(<a href=\"/wiki/Dari_language\" class=\"mw-redirect\" title=\"Dari language\">Dari</a>)</span><br><span style=\"font-size:85%;\"><span title=\"Dari-language romanization\"><i lang=\"prs-Latn\">Imārat-i Islāmī-yi Afghānistān</i></span></span></li></ul></div></div></th></tr><tr><td colspan=\"2\" class=\"infobox-image\"><div style=\"display:table; width:100%;\">\\n <div style=\"display:table-cell; vertical-align:middle; padding-left:5px;\">\\n <div style=\"padding-bottom:3px;\"><a href=\"/wiki/File:Flag_of_the_Taliban.svg\" class=\"image\" title=\"Flag of Afghanistan\"><img alt=\"Flag of Afghanistan\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/125px-Flag_of_the_Taliban.svg.png\" decoding=\"async\" width=\"125\" height=\"63\" class=\"thumbborder\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/188px-Flag_of_the_Taliban.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/5/5c/Flag_of_the_Taliban.svg/250px-Flag_of_the_Taliban.svg.png 2x\" data-file-width=\"1000\" data-file-height=\"500\"></a></div>\\n <div><a href=\"/wiki/Flag_of_Afghanistan\" title=\"Flag of Afghanistan\">Flag</a></div>\\n </div>\\n <div style=\"display:table-cell; vertical-align:middle; padding: 0px 5px;\">\\n <div style=\"padding-bottom:3px;\"><a href=\"/wiki/File:Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg\" class=\"image\" title=\"Emblem of Afghanistan\"><img alt=\"Coat of Arms of the Islamic Emirate [1]\" src=\"//upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/85px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png\" decoding=\"async\" width=\"85\" height=\"86\" srcset=\"//upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/128px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/84/Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg/170px-Emblem_of_the_Islamic_Emirate_of_Afghanistan.svg.png 2x\" data-file-width=\"315\" data-file-height=\"318\"></a></div>\\n <div><a href=\"/wiki/Emblem_of_Afghanistan\" title=\"Emblem of Afghanistan\">Emblem</a></div>\\n </div>\\n </div></td></tr><tr><td colspan=\"2\" class=\"infobox-full-data anthem\"><b>Anthem:</b>\\xa0<span title=\"Pashto-language text\"><span lang=\"ps\" dir=\"rtl\">دا د باتورانو کور</span></span><br><span title=\"Pashto-language romanization\"><i lang=\"ps-Latn\">Dā Də Bātorāno Kor</i></span><br>\"<a href=\"/wiki/This_is_the_Home_of_the_Brave\" class=\"mw-redirect\" title=\"This is the Home of the Brave\">This is the Home of the Brave</a>\"<sup id=\"cite_ref-Tharoor_2-0\" class=\"reference\"><a href=\"#cite_note-Tharoor-2\">[2]</a></sup></td></tr><tr><td colspan=\"2\" class=\"infobox-full-data\"><div class=\"switcher-container\"><div><a href=\"/wiki/File:Afghanistan_(orthographic_projection).svg\" class=\"image\"><img alt=\"Afghanistan (orthographic projection).svg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/250px-Afghanistan_%28orthographic_projection%29.svg.png\" decoding=\"async\" width=\"250\" height=\"250\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/375px-Afghanistan_%28orthographic_projection%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/1/19/Afghanistan_%28orthographic_projection%29.svg/500px-Afghanistan_%28orthographic_projection%29.svg.png 2x\" data-file-width=\"553\" data-file-height=\"553\"></a><span class=\"switcher-label\" style=\"display:none\">Afghanistan on the globe</span></div><div><a href=\"/wiki/File:Afghanistan_-_Location_Map_(2013)_-_AFG_-_UNOCHA.svg\" class=\"image\"><img alt=\"Afghanistan - Location Map (2013) - AFG - UNOCHA.svg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/250px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png\" decoding=\"async\" width=\"250\" height=\"250\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/375px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/31/Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg/500px-Afghanistan_-_Location_Map_%282013%29_-_AFG_-_UNOCHA.svg.png 2x\" data-file-width=\"254\" data-file-height=\"254\"></a><span class=\"switcher-label\" style=\"display:none\">Map of Afghanistan</span></div></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Status</th><td class=\"infobox-data\"><a href=\"/wiki/UN_member_state\" class=\"mw-redirect\" title=\"UN member state\">UN member state</a> under an <a href=\"/wiki/Recognition_of_the_Islamic_Emirate_of_Afghanistan\" title=\"Recognition of the Islamic Emirate of Afghanistan\">unrecognized government</a><sup id=\"cite_ref-3\" class=\"reference\"><a href=\"#cite_note-3\">[3]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Capital<div class=\"ib-country-largest\">and largest city</div></th><td class=\"infobox-data\"><a href=\"/wiki/Kabul\" title=\"Kabul\">Kabul</a><br><style data-mw-deduplicate=\"TemplateStyles:r1073938472\">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class=\"plainlinks nourlexpansion\"><a class=\"external text\" href=\"//geohack.toolforge.org/geohack.php?pagename=Afghanistan&amp;params=34_31_N_69_11_E_region:AF_source:geonames_type:city\"><span class=\"geo-default\"><span class=\"geo-dms\" title=\"Maps, aerial photos, and other data for this location\"><span class=\"latitude\">34°31N</span> <span class=\"longitude\">69°11E</span></span></span><span class=\"geo-multi-punct\">\\ufeff / \\ufeff</span><span class=\"geo-nondefault\"><span class=\"geo-dec\" title=\"Maps, aerial photos, and other data for this location\">34.517°N 69.183°E</span><span style=\"display:none\">\\ufeff / <span class=\"geo\">34.517; 69.183</span></span></span></a></span><span style=\"font-size: small;\"><span id=\"coordinates\"><a href=\"/wiki/Geographic_coordinate_system\" title=\"Geographic coordinate system\">Coordinates</a>: <link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1073938472\"><span class=\"plainlinks nourlexpansion\"><a class=\"external text\" href=\"//geohack.toolforge.org/geohack.php?pagename=Afghanistan&amp;params=34_31_N_69_11_E_region:AF_source:geonames_type:city\"><span class=\"geo-default\"><span class=\"geo-dms\" title=\"Maps, aerial photos, and other data for this location\"><span class=\"latitude\">34°31N</span> <span class=\"longitude\">69°11E</span></span></span><span class=\"geo-multi-punct\">\\ufeff / \\ufeff</span><span class=\"geo-nondefault\"><span class=\"geo-dec\" title=\"Maps, aerial photos, and other data for this location\">34.517°N 69.183°E</span><span style=\"display:none\">\\ufeff / <span class=\"geo\">34.517; 69.183</span></span></span></a></span></span></span><sup id=\"cite_ref-4\" class=\"reference\"><a href=\"#cite_note-4\">[4]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Languages_of_Afghanistan\" title=\"Languages of Afghanistan\">Major languages</a></th><td class=\"infobox-data\"><div class=\"hlist hlist-separated\"><ul><li><a href=\"/wiki/Pashto\" title=\"Pashto\">Pashto</a></li><li><a href=\"/wiki/Dari\" title=\"Dari\">Dari</a></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Ethnic_group\" title=\"Ethnic group\">Ethnic\\xa0groups</a> <div class=\"ib-country-ethnic\"> (2019 unofficial estimates)<sup id=\"cite_ref-6\" class=\"reference\"><a href=\"#cite_note-6\">[a]</a></sup><sup id=\"cite_ref-7\" class=\"reference\"><a href=\"#cite_note-7\">[6]</a></sup><sup id=\"cite_ref-:2_8-0\" class=\"reference\"><a href=\"#cite_note-:2-8\">[7]</a></sup><sup id=\"cite_ref-9\" class=\"reference\"><a href=\"#cite_note-9\">[8]</a></sup><sup id=\"cite_ref-10\" class=\"reference\"><a href=\"#cite_note-10\">[9]</a></sup></div></th><td class=\"infobox-data\"><div class=\"plainlist\"><ul><li>42% <a href=\"/wiki/Pashtun\" class=\"mw-redirect\" title=\"Pashtun\">Pashtun</a></li><li>27% <a href=\"/wiki/Tajiks\" title=\"Tajiks\">Tajik</a></li><li><span class=\"nowrap\">\\u2007</span>9% <a href=\"/wiki/Hazaras\" title=\"Hazaras\">Hazara</a></li><li><span class=\"nowrap\">\\u2007</span>9% <a href=\"/wiki/Uzbeks\" title=\"Uzbeks\">Uzbek</a></li><li><span class=\"nowrap\">\\u2007</span>4% <a href=\"/wiki/Aimaq_people\" title=\"Aimaq people\">Aimaq</a></li><li><span class=\"nowrap\">\\u2007</span>3% <a href=\"/wiki/Turkmen_people\" class=\"mw-redirect\" title=\"Turkmen people\">Turkmen</a></li><li><span class=\"nowrap\">\\u2007</span>2% <a href=\"/wiki/Baloch_people\" title=\"Baloch people\">Baloch</a></li><li><span class=\"nowrap\">\\u2007</span>4% <a href=\"/wiki/Ethnic_groups_in_Afghanistan\" title=\"Ethnic groups in Afghanistan\">Others</a></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Religion <div class=\"ib-country-religion\"></div></th><td class=\"infobox-data\"><div class=\"plainlist\"><ul><li>99.7% <a href=\"/wiki/Islam_in_Afghanistan\" title=\"Islam in Afghanistan\">Islam</a> (<a href=\"/wiki/State_religion\" title=\"State religion\">official</a>)</li><li>0.3% <a href=\"/wiki/Demographics_of_Afghanistan#Religion\" title=\"Demographics of Afghanistan\">Others</a> (2009 estimate)<sup id=\"cite_ref-Factbook_11-0\" class=\"reference\"><a href=\"#cite_note-Factbook-11\">[10]</a></sup></li></ul></div></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Demonym\" title=\"Demonym\">Demonym(s)</a></th><td class=\"infobox-data\"><a href=\"/wiki/Afghans\" title=\"Afghans\">Afghan</a><sup id=\"cite_ref-Demonym_14-0\" class=\"reference\"><a href=\"#cite_note-Demonym-14\">[b]</a></sup><sup id=\"cite_ref-Constitution_of_Afghanistan_15-0\" class=\"reference\"><a href=\"#cite_note-Constitution_of_Afghanistan-15\">[13]</a></sup><sup id=\"cite_ref-16\" class=\"reference\"><a href=\"#cite_note-16\">[14]</a></sup></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Politics_of_Afghanistan\" title=\"Politics of Afghanistan\">Government</a></th><td class=\"infobox-data\"><a href=\"/wiki/Unitary_state\" title=\"Unitary state\">Unitary</a> <a href=\"/wiki/Provisional_government\" title=\"Provisional government\">provisional</a> <a href=\"/wiki/Theocratic\" class=\"mw-redirect\" title=\"Theocratic\">theocratic</a> <a href=\"/wiki/Islamic_state\" title=\"Islamic state\">Islamic</a> <a href=\"/wiki/Emirate\" title=\"Emirate\">emirate</a><sup id=\"cite_ref-17\" class=\"reference\"><a href=\"#cite_note-17\">[15]</a></sup><sup id=\"cite_ref-18\" class=\"reference\"><a href=\"#cite_note-18\">[16]</a></sup><sup id=\"cite_ref-CTC_Sentinel_19-0\" class=\"reference\"><a href=\"#cite_note-CTC_Sentinel-19\">[17]</a></sup></td></tr><tr class=\"mergedrow\"><td colspan=\"2\" class=\"infobox-full-data\"><link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1066479718\"></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Leader_of_the_Islamic_Emirate_of_Afghanistan\" title=\"Leader of the Islamic Emirate of Afghanistan\">Leader</a> </div></th><td class=\"infobox-data\"><span class=\"nowrap\"><a href=\"/wiki/Hibatullah_Akhundzada\" title=\"Hibatullah Akhundzada\">Hibatullah Akhundzada</a></span></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Prime_Minister_of_Afghanistan\" title=\"Prime Minister of Afghanistan\">Prime Minister</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Hasan_Akhund\" title=\"Hasan Akhund\">Hasan Akhund</a> (<a href=\"/wiki/Acting_prime_minister\" title=\"Acting prime minister\">acting</a>)</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Chief_Justice_of_Afghanistan\" title=\"Chief Justice of Afghanistan\">Chief Justice</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Abdul_Hakim_Ishaqzai\" title=\"Abdul Hakim Ishaqzai\">Abdul Hakim Ishaqzai</a></td></tr><tr style=\"display:none\"><td colspan=\"2\">\\n</td></tr><tr><th scope=\"row\" class=\"infobox-label\">Legislature</th><td class=\"infobox-data\"><a href=\"/wiki/Leadership_Council_of_Afghanistan\" title=\"Leadership Council of Afghanistan\">Leadership Council</a> (consultative body)<sup id=\"cite_ref-20\" class=\"reference\"><a href=\"#cite_note-20\">[18]</a></sup></td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/History_of_Afghanistan\" title=\"History of Afghanistan\">Formation</a></th></tr><tr class=\"mergedrow\"><td colspan=\"2\" class=\"infobox-full-data\"><link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r1066479718\"></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Hotak_dynasty\" title=\"Hotak dynasty\">Hotak Empire</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Mirwais_Hotak\" title=\"Mirwais Hotak\">1709</a><a href=\"/wiki/Siege_of_Kandahar\" title=\"Siege of Kandahar\">1738</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<span class=\"nowrap\"><a href=\"/wiki/Durrani_Empire\" title=\"Durrani Empire\">Durrani Empire</a></span> </div></th><td class=\"infobox-data\">17471823</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Emirate_of_Afghanistan\" title=\"Emirate of Afghanistan\">Emirate</a> </div></th><td class=\"infobox-data\">18231839</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Durrani_Empire\" title=\"Durrani Empire\">Restoration of the Durrani Kingdom</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/First_Anglo-Afghan_War\" title=\"First Anglo-Afghan War\">18391842</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Emirate_of_Afghanistan\" title=\"Emirate of Afghanistan\">Restoration of the Emirate</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/First_Anglo-Afghan_War\" title=\"First Anglo-Afghan War\">18421926</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Dost_Mohammad_Khan\" title=\"Dost Mohammad Khan\">Dost Mohammad unites Afghanistan</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Herat_Campaign_of_1862-63\" class=\"mw-redirect\" title=\"Herat Campaign of 1862-63\">27 May 1863</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Treaty_of_Gandamak\" title=\"Treaty of Gandamak\">Anglo-Afghan Agreement</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Second_Anglo-Afghan_War\" title=\"Second Anglo-Afghan War\">26 May 1879</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Third_Anglo-Afghan_War\" title=\"Third Anglo-Afghan War\">Independence</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Afghan_Independence_Day\" title=\"Afghan Independence Day\">19 August 1919</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Kingdom_of_Afghanistan\" title=\"Kingdom of Afghanistan\">Kingdom</a> </div></th><td class=\"infobox-data\">9 June 1926</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Republic_of_Afghanistan_(1973%E2%80%931978)\" title=\"Republic of Afghanistan (19731978)\">Republic</a> </div></th><td class=\"infobox-data\">17 July 1973</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Democratic_Republic_of_Afghanistan\" title=\"Democratic Republic of Afghanistan\">Democratic Republic</a> </div></th><td class=\"infobox-data\"><a href=\"/wiki/Saur_Revolution\" title=\"Saur Revolution\">2728 April 1978</a></td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Islamic_State_of_Afghanistan\" title=\"Islamic State of Afghanistan\">Islamic State</a> </div></th><td class=\"infobox-data\">28 April 1992</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Islamic_Emirate_of_Afghanistan_(1996%E2%80%932001)\" title=\"Islamic Emirate of Afghanistan (19962001)\">Islamic Emirate</a> </div></th><td class=\"infobox-data\">27 September 1996</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<span class=\"nowrap\"><a href=\"/wiki/Islamic_Republic_of_Afghanistan\" title=\"Islamic Republic of Afghanistan\">Islamic Republic</a></span> </div></th><td class=\"infobox-data\">26 January 2004</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div style=\"text-indent:-0.9em;margin-left:1.2em;font-weight:normal;\">•\\xa0<a href=\"/wiki/Fall_of_Kabul_(2021)\" title=\"Fall of Kabul (2021)\">Restoration of Islamic Emirate</a> </div></th><td class=\"infobox-data\">15 August 2021</td></tr><tr style=\"display:none\"><td colspan=\"2\">\\n</td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/Geography_of_Afghanistan\" title=\"Geography of Afghanistan\">Area </a></th></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">652,867<sup id=\"cite_ref-21\" class=\"reference\"><a href=\"#cite_note-21\">[19]</a></sup>\\xa0km<sup>2</sup> (252,073\\xa0sq\\xa0mi) (<a href=\"/wiki/List_of_countries_and_dependencies_by_area\" title=\"List of countries and dependencies by area\">40th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Water\\xa0(%)</div></th><td class=\"infobox-data\">negligible</td></tr><tr class=\"mergedtoprow\"><th colspan=\"2\" class=\"infobox-header\"><a href=\"/wiki/Demographics_of_Afghanistan\" title=\"Demographics of Afghanistan\">Population</a></th></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa02021 estimate</div></th><td class=\"infobox-data\">40,218,234<sup id=\"cite_ref-:2_8-1\" class=\"reference\"><a href=\"#cite_note-:2-8\">[7]</a></sup> (<a href=\"/wiki/List_of_countries_and_dependencies_by_population\" title=\"List of countries and dependencies by population\">37th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Density</div></th><td class=\"infobox-data\">48.08/km<sup>2</sup> (124.5/sq\\xa0mi) (<a href=\"/wiki/List_of_countries_and_dependencies_by_population_density\" title=\"List of countries and dependencies by population density\">174th</a>)</td></tr><tr class=\"mergedtoprow\"><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Gross_domestic_product\" title=\"Gross domestic product\">GDP</a>\\xa0<style data-mw-deduplicate=\"TemplateStyles:r886047488\">.mw-parser-output .nobold{font-weight:normal}</style><span class=\"nobold\">(<a href=\"/wiki/Purchasing_power_parity\" title=\"Purchasing power parity\">PPP</a>)</span></th><td class=\"infobox-data\">2018\\xa0estimate</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">$72.911\\xa0billion<sup id=\"cite_ref-imf2_22-0\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(PPP)\" title=\"List of countries by GDP (PPP)\">96th</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Per capita</div></th><td class=\"infobox-data\">$2,024<sup id=\"cite_ref-imf2_22-1\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(PPP)_per_capita\" title=\"List of countries by GDP (PPP) per capita\">169th</a>)</td></tr><tr class=\"mergedtoprow\"><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Gross_domestic_product\" title=\"Gross domestic product\">GDP</a>\\xa0<link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r886047488\"><span class=\"nobold\">(nominal)</span></th><td class=\"infobox-data\">2018\\xa0estimate</td></tr><tr class=\"mergedrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Total</div></th><td class=\"infobox-data\">$21.657\\xa0billion<sup id=\"cite_ref-imf2_22-2\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(nominal)\" title=\"List of countries by GDP (nominal)\">111st</a>)</td></tr><tr class=\"mergedbottomrow\"><th scope=\"row\" class=\"infobox-label\"><div class=\"ib-country-fake-li\">•\\xa0Per capita</div></th><td class=\"infobox-data\">$493<sup id=\"cite_ref-imf2_22-3\" class=\"reference\"><a href=\"#cite_note-imf2-22\">[20]</a></sup> (<a href=\"/wiki/List_of_countries_by_GDP_(nominal)_per_capita\" title=\"List of countries by GDP (nominal) per capita\">177th</a>)</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Human_Development_Index\" title=\"Human Development Index\">HDI</a>\\xa0<link rel=\"mw-deduplicated-inline-style\" href=\"mw-data:TemplateStyles:r886047488\"><span class=\"nobold\">(2019)</span></th><td class=\"infobox-data\"><img alt=\"Increase\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/11px-Increase2.svg.png\" decoding=\"async\" title=\"Increase\" width=\"11\" height=\"11\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/17px-Increase2.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Increase2.svg/22px-Increase2.svg.png 2x\" data-file-width=\"300\" data-file-height=\"300\">\\xa00.511<sup id=\"cite_ref-UNHDR_23-0\" class=\"reference\"><a href=\"#cite_note-UNHDR-23\">[21]</a></sup><br><span class=\"nowrap\"><span style=\"color:red\">low</span></span>\\xa0·\\xa0<a href=\"/wiki/List_of_countries_by_Human_Development_Index\" title=\"List of countries by Human Development Index\">169th</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\">Currency</th><td class=\"infobox-data\"><a href=\"/wiki/Afghan_afghani\" title=\"Afghan afghani\">Afghani</a> (<span title=\"Dari-language text\"><span lang=\"prs\" dir=\"rtl\">افغانی</span></span>) (<a href=\"/wiki/ISO_4217\" title=\"ISO 4217\">AFN</a>)</td></tr><tr><th scope=\"row\" class=\"infobox-label\">Time zone</th><td class=\"infobox-data\"><span class=\"nowrap\"><a href=\"/wiki/Coordinated_Universal_Time\" title=\"Coordinated Universal Time\">UTC</a>+4:30<br><a href=\"/wiki/Solar_Hijri_calendar\" title=\"Solar Hijri calendar\">Solar Calendar</a></span> (D†)</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Left-_and_right-hand_traffic\" title=\"Left- and right-hand traffic\">Driving side</a></th><td class=\"infobox-data\">right</td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Telephone_numbers_in_Afghanistan\" title=\"Telephone numbers in Afghanistan\">Calling code</a></th><td class=\"infobox-data\"><a href=\"/wiki/Telephone_numbers_in_Afghanistan\" title=\"Telephone numbers in Afghanistan\">+93</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/ISO_3166\" title=\"ISO 3166\">ISO 3166 code</a></th><td class=\"infobox-data\"><a href=\"/wiki/ISO_3166-2:AF\" title=\"ISO 3166-2:AF\">AF</a></td></tr><tr><th scope=\"row\" class=\"infobox-label\"><a href=\"/wiki/Country_code_top-level_domain\" title=\"Country code top-level domain\">Internet TLD</a></th><td class=\"infobox-data\"><a href=\"/wiki/.af\" title=\".af\">.af</a><br><a href=\"/wiki/%D8%A7%D9%81%D8%BA%D8%A7%D9%86%D8%B3%D8%AA%D8%A7%D9%86.\" class=\"mw-redirect\" title=\"افغانستان.\">افغانستان.</a></td></tr>'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df.iloc[0].country_html)\n",
"content = \"\".join(df.iloc[0].country_html)\n",
"content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97c1e41f-30f3-4116-aa11-5797e05b95ba",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
},
"toc-autonumbering": true,
"toc-showcode": false
},
"nbformat": 4,
"nbformat_minor": 5
}