adding completed prototype for slack bot

This commit is contained in:
2019-10-04 22:24:53 +01:00
parent 9f28a9ddce
commit e154f6db75
7 changed files with 139 additions and 50 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 128 KiB

After

Width:  |  Height:  |  Size: 118 KiB

View File

@@ -4,7 +4,7 @@ import emoji
from datetime import datetime
import re
url = 'http://www.trafficengland.com/traffic-alerts'
url = 'https://www.trafficdelays.co.uk/a14-traffic-delays/'
# prepare the option for the chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless')
@@ -14,58 +14,55 @@ browser = webdriver.Chrome(options=options)
browser.get(url)
html = browser.page_source
soup = BeautifulSoup(html, features='lxml')
# soup.find_all(class_='alerts-severity-Severe')
congestion = browser.find_element_by_xpath('//*[@id="congestion"]')
accident = browser.find_element_by_xpath('//*[@id="accident"]')
congestionText = congestion.text
congestionHTML = congestion.get_attribute('innerHTML')
congestionCount = congestionHTML.count('<li>')
pattern = r".*\<li\>.*title=\".*\".*\>((.|\n)*?)\<br\>"
congestionRegexExtraction = re.findall(pattern, congestionHTML)
accidentText = accident.text
accidentHTML = accident.get_attribute('innerHTML')
accidentCount = accidentHTML.count('<li>')
accidentRegexExtraction = re.findall(pattern, accidentHTML)
def printBreak():
print('\n')
table = soup.find_all('td')
list = []
for item in table:
list.append(item)
# for i in range(0, 4):
# print(list[i])
# printBreak()
# totalItems = int(len(list) / 4)
# for i in range(0, 4):
# print(list[i].string)
# printBreak()
newList = ([x.text for x in soup.find_all('td')])
currentTime = datetime.now().strftime('%H:%M:%S')
print(emoji.emojize('Did someone say M62 :anguished:!? Let'
'\'s check the latest updates from Highways'
' England! :police_car::rotating_light:',
f' England! as of {currentTime}'
':police_car::rotating_light:',
use_aliases=True))
print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
f' on the {newList[0]}')
# print(list[3].prettify())
desc = str(list[3])
descSplit = desc.split('<')
pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"
for item in descSplit:
# print(item)
regex = re.findall(pattern, item)
# print(f'Type: {type(regex)}')
# print(f'Matches: {regex}')
try:
printBreak()
print(f'itemOne: {regex[0][0]}')
print(f'itemTwo: {regex[0][1]}')
except IndexError:
pass
# print(f'itemTwo: {regex[1]}')
printBreak()
if congestionCount == 0:
print(emoji.emojize(f'There are currently no reported congestions on the'
f' M62 :thinking_face:', use_aliases=True))
if congestionCount != 0:
print(emoji.emojize(f'There are currently {congestionCount} incident(s)'
f' on the M62 :scream:', use_aliases=True))
for i in range(0, congestionCount):
print(congestionRegexExtraction[i][0] + '\n')
printBreak()
if accidentCount == 0:
print(emoji.emojize(f'There are currently no reported accidents on the'
f' M62 :thinking_face:', use_aliases=True))
if accidentCount != 0:
print(emoji.emojize(f'There are currently {accidentCount} incident(s)'
f' on the M62 :scream:', use_aliases=True))
for i in range(0, accidentCount):
print(accidentRegexExtraction[i][0] + '\n')
printBreak()
print(f'Hey Andy, have you thought about getting the train?'
+ emoji.emojize(f' :bullettrain_front:', use_aliases=True))
print(f'Hey Andy, maybe flying would be quicker?'
+ emoji.emojize(f' :helicopter:', use_aliases=True))
print(f'Don\'t fret, he can always work from home!'
+ emoji.emojize(f' :house_with_garden:', use_aliases=True))

71
slack-bot/scraper.py.old Normal file
View File

@@ -0,0 +1,71 @@
from bs4 import BeautifulSoup
from selenium import webdriver
import emoji
from datetime import datetime
import re
url = 'https://www.trafficdelays.co.uk/southern-england/'
# prepare the option for the chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless')
# start the chrome driver
browser = webdriver.Chrome(options=options)
browser.get(url)
html = browser.page_source
soup = BeautifulSoup(html, features='lxml')
# soup.find_all(class_='alerts-severity-Severe')
def printBreak():
print('\n')
# table = soup.find_all('td')
# list = []
# for item in table:
# list.append(item)
# for i in range(0, 4):
# print(list[i])
# printBreak()
# totalItems = int(len(list) / 4)
# for i in range(0, 4):
# print(list[i].string)
# printBreak()
# newList = ([x.text for x in soup.find_all('td')])
currentTime = datetime.now().strftime('%H:%M:%S')
print(emoji.emojize('Did someone say M62 :anguished:!? Let'
'\'s check the latest updates from Highways'
' England! :police_car::rotating_light:',
use_aliases=True))
print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
f' on the {newList[0]}')
# print(list[3].prettify())
desc = str(list[3])
descSplit = desc.split('<')
pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"
for item in descSplit:
# print(item)
regex = re.findall(pattern, item)
# print(f'Type: {type(regex)}')
# print(f'Matches: {regex}')
try:
printBreak()
print(f'itemOne: {regex[0][0]}')
print(f'itemTwo: {regex[0][1]}')
except IndexError:
pass
# print(f'itemTwo: {regex[1]}')

View File

@@ -11,13 +11,19 @@ options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll'
+ X)
# driver.set_window_size(S('Width'), S('Height')+500) # May need manual adjustment
driver.set_window_size(S('Width'), 1000) # May need manual adjustment
def setWidth(var, adj=0):
script = "return document.body.parentNode.scroll" + (var)
return driver.execute_script(script) + adj
driver.set_window_size(setWidth('Width'), setWidth('Height'))
driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
# driver.find_element_by_css_selector('#post-4706').screenshot('web_screenshot.png')
print(driver.find_element_by_css_selector('#post-4706').text)
# print(driver.find_element_by_css_selector('#post-4706').text)
im = Image.open('web_screenshot.png')
width, height = im.size

Binary file not shown.

Before

Width:  |  Height:  |  Size: 161 KiB

After

Width:  |  Height:  |  Size: 833 KiB