python-VM/slack-bot/traffic-scraper/dev/scraper.py.old

from bs4 import BeautifulSoup
from selenium import webdriver
import emoji
from datetime import datetime
import re

url = 'https://www.trafficdelays.co.uk/southern-england/'
# prepare the  option for the chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless')

# start the chrome driver
browser = webdriver.Chrome(options=options)
browser.get(url)
html = browser.page_source

soup = BeautifulSoup(html, features='lxml')
# soup.find_all(class_='alerts-severity-Severe')


def printBreak():
    print('\n')


# table = soup.find_all('td')

# list = []

# for item in table:
#     list.append(item)

# for i in range(0, 4):
#     print(list[i])

# printBreak()

# totalItems = int(len(list) / 4)

# for i in range(0, 4):
#     print(list[i].string)
#     printBreak()

# newList = ([x.text for x in soup.find_all('td')])

currentTime = datetime.now().strftime('%H:%M:%S')

print(emoji.emojize('Did someone say M62 :anguished:!? Let'
                    '\'s check the latest updates from Highways'
                    ' England! :police_car::rotating_light:',
                    use_aliases=True))
print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
      f' on the {newList[0]}')

# print(list[3].prettify())
desc = str(list[3])
descSplit = desc.split('<')

pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"

for item in descSplit:
    # print(item)
    regex = re.findall(pattern, item)
    # print(f'Type: {type(regex)}')
    # print(f'Matches: {regex}')
    try:
        printBreak()
        print(f'itemOne: {regex[0][0]}')
        print(f'itemTwo: {regex[0][1]}')
    except IndexError:
        pass
    # print(f'itemTwo: {regex[1]}')