diff --git a/01_scrapy/wikipedia_country_scraper/download_anthems.sh b/01_scrapy/wikipedia_country_scraper/download_anthems.sh
new file mode 100755
index 0000000..0067468
--- /dev/null
+++ b/01_scrapy/wikipedia_country_scraper/download_anthems.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+source .venv/bin/activate
+scrapy crawl AnthemsSpider
diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py
index 8560523..c578484 100644
--- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py
+++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/items.py
@@ -15,3 +15,12 @@ class WikipediaCountryScraperItem(scrapy.Item):
file_urls = scrapy.Field()
files = scrapy.Field()
+
+
+class AnthemsItem(scrapy.Item):
+ country_name = scrapy.Field()
+ native_anthem_title = scrapy.Field()
+ english_title = scrapy.Field()
+
+ file_urls = scrapy.Field()
+ files = scrapy.Field()
diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py
index c4e4160..4bcaef9 100644
--- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py
+++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/pipelines.py
@@ -26,3 +26,12 @@ class WikipediaCountryScraperFilesPipeline(FilesPipeline):
return f"files/flags/{filename}"
elif filename.endswith(".ogg") or filename.endswith("oga"):
return f"files/anthems/{filename}"
+
+
+class AnthemDownloadFilesPipeline(FilesPipeline):
+ def file_path(self, request, response=None, info=None, *, item=None):
+ flag_filename = re.search(r"([^\/]*)$", request.url)
+
+ if isinstance(flag_filename, re.Match):
+ if (filename := flag_filename[1]).endswith("ogg") or filename.endswith("oga"):
+ return f"files/anthems/{filename}"
diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py
index 6193f0f..3b99f44 100644
--- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py
+++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/settings.py
@@ -65,10 +65,10 @@ DOWNLOADER_MIDDLEWARES = {
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-ITEM_PIPELINES = {
- "wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 300,
- # "scrapy.pipelines.files.FilesPipeline": 1
-}
+# ITEM_PIPELINES = {
+# "wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 300,
+# # "scrapy.pipelines.files.FilesPipeline": 1
+# }
FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy" / "raw_country_data")
# Enable and configure the AutoThrottle extension (disabled by default)
@@ -92,10 +92,10 @@ FILES_STORE = str(pathlib.Path(__file__).resolve().parents[3] / "data" / "scrapy
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
-FEEDS = {
- pathlib.Path(__file__).resolve().parents[3]
- / "data"
- / "scrapy"
- / "raw_country_data"
- / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
+# FEEDS = {
+# pathlib.Path(__file__).resolve().parents[3]
+# / "data"
+# / "scrapy"
+# / "raw_country_data"
+# / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
}
diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py
new file mode 100644
index 0000000..30710a9
--- /dev/null
+++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/anthems.py
@@ -0,0 +1,65 @@
+import pathlib
+import re
+
+import scrapy
+from scrapy.http import TextResponse
+
+from wikipedia_country_scraper.items import AnthemsItem
+
+
+class AnthemsSpider(scrapy.Spider):
+ name = "anthems"
+ start_urls = ["https://en.wikipedia.org/wiki/List_of_national_anthems"]
+ custom_settings = {
+ "ITEM_PIPELINES": {"wikipedia_country_scraper.pipelines.AnthemDownloadFilesPipeline": 100},
+ "FEEDS": {
+ pathlib.Path(__file__).resolve().parents[4]
+ / "data"
+ / "scrapy"
+ / "raw_country_data"
+ / "anthems.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
+ },
+ }
+
+ def parse(self, response: TextResponse):
+ country_names = []
+ native_anthem_titles = []
+ english_titles = []
+ anthem_urls = []
+
+ _country_names = response.xpath("//table[contains(@class, 'wikitable')]/tbody/tr/th[1]")
+ for link in _country_names:
+ if (country_name := link.xpath("a/text()").get()) is not None:
+ country_names.append(country_name)
+
+ _native_anthem_titles = response.xpath("//table[contains(@class, 'wikitable')]/tbody/tr/td[1]")
+ for link in _native_anthem_titles:
+ titles = link.xpath("a/text()").getall()
+ native_anthem_title = titles[0] if len(titles) == 0 else "\n".join(titles)
+ native_anthem_titles.append(native_anthem_title)
+
+ for link in _native_anthem_titles:
+ if (english_title := link.xpath("small/text()").get()) is not None:
+ english_titles.append(re.search(r"(?:[\W]*)(?P
[^\"]*)", english_title)["title"])
+ else:
+ english_titles.append(None)
+
+ _country_names = response.xpath("//table[contains(@class, 'wikitable')]/tbody")
+ for index, link in enumerate(_country_names):
+ if index == 0:
+ recognised_countries = link.xpath("tr/td[5]")
+ anthem_urls.extend(anthem_url.xpath("a/@href").get() for anthem_url in recognised_countries)
+ elif index == 1:
+ partially_recognised_countries = link.xpath("tr/td[6]")
+ anthem_urls.extend(anthem_url.xpath("a/@href").get() for anthem_url in partially_recognised_countries)
+
+ for country_name, native_anthem_title, english_title, anthem_url in zip(
+ country_names, native_anthem_titles, english_titles, anthem_urls
+ ):
+ anthem_item = AnthemsItem()
+ anthem_item["country_name"] = country_name
+ anthem_item["native_anthem_title"] = native_anthem_title
+ anthem_item["english_title"] = english_title
+ anthem_item["file_urls"] = [f"https://en.wikipedia.org{anthem_url}" if anthem_url is not None else None]
+
+ yield anthem_item
diff --git a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py
index e42b339..211cde2 100644
--- a/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py
+++ b/01_scrapy/wikipedia_country_scraper/wikipedia_country_scraper/spiders/countrydownloader.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import pathlib
import re
import scrapy
@@ -10,6 +11,16 @@ from wikipedia_country_scraper.items import WikipediaCountryScraperItem
class CountrydownloaderSpider(scrapy.Spider):
name = "CountrydownloaderSpider"
+ custom_settings = {
+ "ITEM_PIPELINES": {"wikipedia_country_scraper.pipelines.WikipediaCountryScraperFilesPipeline": 100},
+ "FEEDS": {
+ pathlib.Path(__file__).resolve().parents[4]
+ / "data"
+ / "scrapy"
+ / "raw_country_data"
+ / "countries.json": {"format": "json", "encoding": "utf8", "store_empty": False, "indent": 2}
+ },
+ }
def start_requests(self):
return [
diff --git a/docs/scraping.md b/docs/scraping.md
index 7d3fb87..9f16bb5 100644
--- a/docs/scraping.md
+++ b/docs/scraping.md
@@ -23,9 +23,13 @@ Using selectors:
Download files/images:
+Setting pipelines per spider:
+
Exporting JSON:
+Setting exports per spider:
+
### new project
diff --git a/playground/downloaded_data_inspection_lab/Untitled.ipynb b/playground/downloaded_data_inspection_lab/Untitled.ipynb
index 5f91a00..896acc4 100644
--- a/playground/downloaded_data_inspection_lab/Untitled.ipynb
+++ b/playground/downloaded_data_inspection_lab/Untitled.ipynb
@@ -2,15 +2,15 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 1,
"id": "d8185790-0793-4881-99e8-6730f95a8006",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:31:57.297266Z",
- "iopub.status.busy": "2022-06-24T21:31:57.284090Z",
- "iopub.status.idle": "2022-06-24T21:31:57.366471Z",
- "shell.execute_reply": "2022-06-24T21:31:57.365193Z",
- "shell.execute_reply.started": "2022-06-24T21:31:57.293844Z"
+ "iopub.execute_input": "2022-06-24T22:04:54.386982Z",
+ "iopub.status.busy": "2022-06-24T22:04:54.386313Z",
+ "iopub.status.idle": "2022-06-24T22:04:54.854521Z",
+ "shell.execute_reply": "2022-06-24T22:04:54.853581Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:54.386910Z"
},
"tags": []
},
@@ -24,15 +24,15 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 2,
"id": "ea2b3e33-d58e-4e30-a0cc-8218a1f252c9",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:31:58.025200Z",
- "iopub.status.busy": "2022-06-24T21:31:58.024201Z",
- "iopub.status.idle": "2022-06-24T21:31:58.108904Z",
- "shell.execute_reply": "2022-06-24T21:31:58.107402Z",
- "shell.execute_reply.started": "2022-06-24T21:31:58.025121Z"
+ "iopub.execute_input": "2022-06-24T22:04:55.458615Z",
+ "iopub.status.busy": "2022-06-24T22:04:55.457695Z",
+ "iopub.status.idle": "2022-06-24T22:04:55.475878Z",
+ "shell.execute_reply": "2022-06-24T22:04:55.474706Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:55.458548Z"
},
"tags": []
},
@@ -43,7 +43,7 @@
"[None]"
]
},
- "execution_count": 39,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -58,15 +58,15 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 3,
"id": "36149580-91d9-431d-99c3-51feee829e79",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:31:58.650508Z",
- "iopub.status.busy": "2022-06-24T21:31:58.650001Z",
- "iopub.status.idle": "2022-06-24T21:31:58.670264Z",
- "shell.execute_reply": "2022-06-24T21:31:58.669296Z",
- "shell.execute_reply.started": "2022-06-24T21:31:58.650473Z"
+ "iopub.execute_input": "2022-06-24T22:04:56.134416Z",
+ "iopub.status.busy": "2022-06-24T22:04:56.133745Z",
+ "iopub.status.idle": "2022-06-24T22:04:56.140326Z",
+ "shell.execute_reply": "2022-06-24T22:04:56.138507Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:56.134371Z"
},
"tags": []
},
@@ -79,15 +79,15 @@
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": 4,
"id": "d03be94e-8642-4916-8a43-1711e0c21b36",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:52:35.726961Z",
- "iopub.status.busy": "2022-06-24T21:52:35.726356Z",
- "iopub.status.idle": "2022-06-24T21:52:35.823456Z",
- "shell.execute_reply": "2022-06-24T21:52:35.822464Z",
- "shell.execute_reply.started": "2022-06-24T21:52:35.726932Z"
+ "iopub.execute_input": "2022-06-24T22:04:56.621163Z",
+ "iopub.status.busy": "2022-06-24T22:04:56.620692Z",
+ "iopub.status.idle": "2022-06-24T22:04:56.731001Z",
+ "shell.execute_reply": "2022-06-24T22:04:56.728392Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:56.621128Z"
},
"tags": []
},
@@ -100,34 +100,48 @@
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": 5,
"id": "29cca9ea-16d3-4534-8c9e-49fde37f8cdd",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:52:36.453655Z",
- "iopub.status.busy": "2022-06-24T21:52:36.452202Z",
- "iopub.status.idle": "2022-06-24T21:52:36.555461Z",
- "shell.execute_reply": "2022-06-24T21:52:36.554096Z",
- "shell.execute_reply.started": "2022-06-24T21:52:36.453559Z"
+ "iopub.execute_input": "2022-06-24T22:04:57.257218Z",
+ "iopub.status.busy": "2022-06-24T22:04:57.256573Z",
+ "iopub.status.idle": "2022-06-24T22:04:57.333032Z",
+ "shell.execute_reply": "2022-06-24T22:04:57.332120Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:57.257174Z"
},
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['country_url', 'flag_description_url', 'short_country_name',\n",
+ " 'country_html', 'flag_html', 'file_urls', 'files'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "df = pd.read_json(countries_file)"
+ "df = pd.read_json(countries_file)\n",
+ "df.columns"
]
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": 6,
"id": "ef8bc3ce-08dd-4260-807c-2616b2e1c1ba",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:52:36.950435Z",
- "iopub.status.busy": "2022-06-24T21:52:36.949946Z",
- "iopub.status.idle": "2022-06-24T21:52:36.959935Z",
- "shell.execute_reply": "2022-06-24T21:52:36.958581Z",
- "shell.execute_reply.started": "2022-06-24T21:52:36.950398Z"
+ "iopub.execute_input": "2022-06-24T22:04:59.223608Z",
+ "iopub.status.busy": "2022-06-24T22:04:59.222961Z",
+ "iopub.status.idle": "2022-06-24T22:04:59.229384Z",
+ "shell.execute_reply": "2022-06-24T22:04:59.228618Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:59.223578Z"
},
"tags": []
},
@@ -151,15 +165,15 @@
},
{
"cell_type": "code",
- "execution_count": 77,
+ "execution_count": 7,
"id": "48db8f93-659b-45a4-8477-a7cec139bebc",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:52:37.635010Z",
- "iopub.status.busy": "2022-06-24T21:52:37.634417Z",
- "iopub.status.idle": "2022-06-24T21:52:37.645162Z",
- "shell.execute_reply": "2022-06-24T21:52:37.643796Z",
- "shell.execute_reply.started": "2022-06-24T21:52:37.634953Z"
+ "iopub.execute_input": "2022-06-24T22:04:59.710467Z",
+ "iopub.status.busy": "2022-06-24T22:04:59.709874Z",
+ "iopub.status.idle": "2022-06-24T22:04:59.720517Z",
+ "shell.execute_reply": "2022-06-24T22:04:59.717623Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:59.710431Z"
},
"tags": []
},
@@ -183,15 +197,15 @@
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 8,
"id": "a52f6aa2-5bbd-46e4-9b2f-cdbd7269cb6e",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:52:49.070882Z",
- "iopub.status.busy": "2022-06-24T21:52:49.070107Z",
- "iopub.status.idle": "2022-06-24T21:52:49.076033Z",
- "shell.execute_reply": "2022-06-24T21:52:49.075243Z",
- "shell.execute_reply.started": "2022-06-24T21:52:49.070853Z"
+ "iopub.execute_input": "2022-06-24T22:04:59.950051Z",
+ "iopub.status.busy": "2022-06-24T22:04:59.949622Z",
+ "iopub.status.idle": "2022-06-24T22:04:59.956484Z",
+ "shell.execute_reply": "2022-06-24T22:04:59.955471Z",
+ "shell.execute_reply.started": "2022-06-24T22:04:59.950016Z"
},
"tags": []
},
@@ -201,8 +215,8 @@
"output_type": "stream",
"text": [
"(206,)\n",
- "[False True]\n",
- "[False True]\n"
+ "[False]\n",
+ "[False]\n"
]
}
],
@@ -215,15 +229,15 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 9,
"id": "643e6512-1e5b-4eb2-9f0a-6b680ada787b",
"metadata": {
"execution": {
- "iopub.execute_input": "2022-06-24T21:53:15.797108Z",
- "iopub.status.busy": "2022-06-24T21:53:15.796761Z",
- "iopub.status.idle": "2022-06-24T21:53:15.809904Z",
- "shell.execute_reply": "2022-06-24T21:53:15.809157Z",
- "shell.execute_reply.started": "2022-06-24T21:53:15.797079Z"
+ "iopub.execute_input": "2022-06-24T22:05:00.166633Z",
+ "iopub.status.busy": "2022-06-24T22:05:00.166278Z",
+ "iopub.status.idle": "2022-06-24T22:05:00.178277Z",
+ "shell.execute_reply": "2022-06-24T22:05:00.177378Z",
+ "shell.execute_reply.started": "2022-06-24T22:05:00.166609Z"
},
"tags": []
},
@@ -250,7 +264,7 @@
" \n",
" | \n",
" country_url | \n",
- " flag_image_url | \n",
+ " flag_description_url | \n",
" short_country_name | \n",
" country_html | \n",
" flag_html | \n",
@@ -259,38 +273,17 @@
"
\n",
" \n",
" \n",
- " \n",
- " | 84 | \n",
- " https://en.wikipedia.org/wiki/Paraguay | \n",
- " https://en.wikipedia.org/wiki/File:Flag_of_Par... | \n",
- " Paraguay | \n",
- " [<tr><th colspan=\"2\" class=\"infobox-above adr\"... | \n",
- " None | \n",
- " [https:////upload.wikimedia.org/wikipedia/comm... | \n",
- " [{'url': 'https://upload.wikimedia.org/wikiped... | \n",
- "
\n",
" \n",
"\n",
""
],
"text/plain": [
- " country_url \\\n",
- "84 https://en.wikipedia.org/wiki/Paraguay \n",
- "\n",
- " flag_image_url short_country_name \\\n",
- "84 https://en.wikipedia.org/wiki/File:Flag_of_Par... Paraguay \n",
- "\n",
- " country_html flag_html \\\n",
- "84 [| \n",
" | | \n",
" country_url | \n",
- " flag_image_url | \n",
+ " flag_description_url | \n",
" short_country_name | \n",
" country_html | \n",
" flag_html | \n",
@@ -344,38 +338,17 @@
"
\n",
" \n",
" \n",
- " \n",
- " | 84 | \n",
- " https://en.wikipedia.org/wiki/Paraguay | \n",
- " https://en.wikipedia.org/wiki/File:Flag_of_Par... | \n",
- " Paraguay | \n",
- " [<tr><th colspan=\"2\" class=\"infobox-above adr\"... | \n",
- " None | \n",
- " [https:////upload.wikimedia.org/wikipedia/comm... | \n",
- " [{'url': 'https://upload.wikimedia.org/wikiped... | \n",
- "
\n",
" \n",
"\n",
""
],
"text/plain": [
- " country_url \\\n",
- "84 https://en.wikipedia.org/wiki/Paraguay \n",
- "\n",
- " flag_image_url short_country_name \\\n",
- "84 https://en.wikipedia.org/wiki/File:Flag_of_Par... Paraguay \n",
- "\n",
- " country_html flag_html \\\n",
- "84 [ | The flag of | Islamic Emirate of Afghanistan - د افغانستان اسلامي امارت\\xa0(Pashto)
Də Afġānistān Islāmī Imārat - امارت اسلامی افغانستان\\xa0(Dari)
Imārat-i Islāmī-yi Afghānistān
|
|---|
|
Anthem:\\xa0دا د باتورانو کور Dā Də Bātorāno Kor \"This is the Home of the Brave\"[2] |
Afghanistan on the globe Map of Afghanistan |
| Status | UN member state under an unrecognized government[3] |
|---|
| Capital and largest city | Kabul 34°31′N 69°11′E\\ufeff / \\ufeff34.517°N 69.183°E\\ufeff / 34.517; 69.183Coordinates: 34°31′N 69°11′E\\ufeff / \\ufeff34.517°N 69.183°E\\ufeff / 34.517; 69.183[4] |
|---|
| Major languages | |
|---|
| Ethnic\\xa0groups | |
|---|
| Religion | |
|---|
| Demonym(s) | Afghan[b][13][14] |
|---|
| Government | Unitary provisional theocratic Islamic emirate[15][16][17] |
|---|
|
| Hibatullah Akhundzada |
|---|
| Hasan Akhund (acting) |
|---|
| Abdul Hakim Ishaqzai |
|---|
| \\n |
| Legislature | Leadership Council (consultative body)[18] |
|---|
|
|
| 1709–1738 |
|---|
| 1747–1823 |
|---|
| 1823–1839 |
|---|
| 1839–1842 |
|---|
| 1842–1926 |
|---|
| 27 May 1863 |
|---|
| 26 May 1879 |
|---|
| 19 August 1919 |
|---|
| 9 June 1926 |
|---|
| 17 July 1973 |
|---|
| 27–28 April 1978 |
|---|
| 28 April 1992 |
|---|
| 27 September 1996 |
|---|
| 26 January 2004 |
|---|
| 15 August 2021 |
|---|
| \\n |
|
•\\xa0Total | 652,867[19]\\xa0km2 (252,073\\xa0sq\\xa0mi) (40th) |
|---|
•\\xa0Water\\xa0(%) | negligible |
|---|
|
•\\xa02021 estimate | 40,218,234[7] (37th) |
|---|
•\\xa0Density | 48.08/km2 (124.5/sq\\xa0mi) (174th) |
|---|
| GDP\\xa0(PPP) | 2018\\xa0estimate |
|---|
•\\xa0Total | $72.911\\xa0billion[20] (96th) |
|---|
•\\xa0Per capita | $2,024[20] (169th) |
|---|
| GDP\\xa0(nominal) | 2018\\xa0estimate |
|---|
•\\xa0Total | $21.657\\xa0billion[20] (111st) |
|---|
•\\xa0Per capita | $493[20] (177th) |
|---|
| HDI\\xa0(2019) | \\xa00.511[21] low\\xa0·\\xa0169th |
|---|
| Currency | Afghani (افغانی) (AFN) |
|---|
| Time zone | UTC+4:30 Solar Calendar (D†) |
|---|
| Driving side | right |
|---|
| Calling code | +93 |
|---|
| ISO 3166 code | AF |
|---|
| Internet TLD | .af افغانستان. |
'"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df.iloc[0].country_html)\n",
+ "content = \"\".join(df.iloc[0].country_html)\n",
+ "content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "97c1e41f-30f3-4116-aa11-5797e05b95ba",
"metadata": {},
"outputs": [],
"source": []