adding completed prototype for slack bot

2019-10-04 22:24:53 +01:00
parent 9f28a9ddce
commit e154f6db75
7 changed files with 139 additions and 50 deletions
--- a/slack-bot/cropped.png
+++ b/slack-bot/cropped.png
--- a/slack-bot/scraper.py
+++ b/slack-bot/scraper.py
@@ -4,7 +4,7 @@ import emoji
 from datetime import datetime
 import re

-url = 'http://www.trafficengland.com/traffic-alerts'
+url = 'https://www.trafficdelays.co.uk/a14-traffic-delays/'
 # prepare the  option for the chrome driver
 options = webdriver.ChromeOptions()
 options.add_argument('headless')
@@ -14,58 +14,55 @@ browser = webdriver.Chrome(options=options)
 browser.get(url)
 html = browser.page_source

-soup = BeautifulSoup(html, features='lxml')
-# soup.find_all(class_='alerts-severity-Severe')
+
+congestion = browser.find_element_by_xpath('//*[@id="congestion"]')
+accident = browser.find_element_by_xpath('//*[@id="accident"]')
+
+congestionText = congestion.text
+congestionHTML = congestion.get_attribute('innerHTML')
+congestionCount = congestionHTML.count('<li>')
+pattern = r".*\<li\>.*title=\".*\".*\>((.|\n)*?)\<br\>"
+congestionRegexExtraction = re.findall(pattern, congestionHTML)
+
+accidentText = accident.text
+accidentHTML = accident.get_attribute('innerHTML')
+accidentCount = accidentHTML.count('<li>')
+accidentRegexExtraction = re.findall(pattern, accidentHTML)


 def printBreak():
    print('\n')


-table = soup.find_all('td')
-
-list = []
-
-for item in table:
-    list.append(item)
-
-# for i in range(0, 4):
-#     print(list[i])
-
-# printBreak()
-
-# totalItems = int(len(list) / 4)
-
-# for i in range(0, 4):
-#     print(list[i].string)
-#     printBreak()
-
-newList = ([x.text for x in soup.find_all('td')])
-
 currentTime = datetime.now().strftime('%H:%M:%S')

 print(emoji.emojize('Did someone say M62 :anguished:!? Let'
                    '\'s check the latest updates from Highways'
-                    ' England! :police_car::rotating_light:',
+                    f' England! as of {currentTime}'
+                    ':police_car::rotating_light:',
                    use_aliases=True))
-print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
-      f' on the {newList[0]}')
-
-# print(list[3].prettify())
-desc = str(list[3])
-descSplit = desc.split('<')
-
-pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"
-
-for item in descSplit:
-    # print(item)
-    regex = re.findall(pattern, item)
-    # print(f'Type: {type(regex)}')
-    # print(f'Matches: {regex}')
-    try:
-        printBreak()
-        print(f'itemOne: {regex[0][0]}')
-        print(f'itemTwo: {regex[0][1]}')
-    except IndexError:
-        pass
-    # print(f'itemTwo: {regex[1]}')
+printBreak()
+if congestionCount == 0:
+    print(emoji.emojize(f'There are currently no reported congestions on the'
+                        f' M62 :thinking_face:', use_aliases=True))
+if congestionCount != 0:
+    print(emoji.emojize(f'There are currently {congestionCount} incident(s)'
+                        f' on the M62 :scream:', use_aliases=True))
+    for i in range(0, congestionCount):
+        print(congestionRegexExtraction[i][0] + '\n')
+printBreak()
+if accidentCount == 0:
+    print(emoji.emojize(f'There are currently no reported accidents on the'
+                        f' M62 :thinking_face:', use_aliases=True))
+if accidentCount != 0:
+    print(emoji.emojize(f'There are currently {accidentCount} incident(s)'
+                        f' on the M62 :scream:', use_aliases=True))
+    for i in range(0, accidentCount):
+        print(accidentRegexExtraction[i][0] + '\n')
+printBreak()
+print(f'Hey Andy, have you thought about getting the train?'
+      + emoji.emojize(f' :bullettrain_front:', use_aliases=True))
+print(f'Hey Andy, maybe flying would be quicker?'
+      + emoji.emojize(f' :helicopter:', use_aliases=True))
+print(f'Don\'t fret, he can always work from home!'
+      + emoji.emojize(f' :house_with_garden:', use_aliases=True))
--- a/slack-bot/scraper.py.old
+++ b/slack-bot/scraper.py.old
@@ -0,0 +1,71 @@
+from bs4 import BeautifulSoup
+from selenium import webdriver
+import emoji
+from datetime import datetime
+import re
+
+url = 'https://www.trafficdelays.co.uk/southern-england/'
+# prepare the  option for the chrome driver
+options = webdriver.ChromeOptions()
+options.add_argument('headless')
+
+# start the chrome driver
+browser = webdriver.Chrome(options=options)
+browser.get(url)
+html = browser.page_source
+
+soup = BeautifulSoup(html, features='lxml')
+# soup.find_all(class_='alerts-severity-Severe')
+
+
+def printBreak():
+    print('\n')
+
+
+# table = soup.find_all('td')
+
+# list = []
+
+# for item in table:
+#     list.append(item)
+
+# for i in range(0, 4):
+#     print(list[i])
+
+# printBreak()
+
+# totalItems = int(len(list) / 4)
+
+# for i in range(0, 4):
+#     print(list[i].string)
+#     printBreak()
+
+# newList = ([x.text for x in soup.find_all('td')])
+
+currentTime = datetime.now().strftime('%H:%M:%S')
+
+print(emoji.emojize('Did someone say M62 :anguished:!? Let'
+                    '\'s check the latest updates from Highways'
+                    ' England! :police_car::rotating_light:',
+                    use_aliases=True))
+print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
+      f' on the {newList[0]}')
+
+# print(list[3].prettify())
+desc = str(list[3])
+descSplit = desc.split('<')
+
+pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"
+
+for item in descSplit:
+    # print(item)
+    regex = re.findall(pattern, item)
+    # print(f'Type: {type(regex)}')
+    # print(f'Matches: {regex}')
+    try:
+        printBreak()
+        print(f'itemOne: {regex[0][0]}')
+        print(f'itemTwo: {regex[0][1]}')
+    except IndexError:
+        pass
+    # print(f'itemTwo: {regex[1]}')
--- a/slack-bot/screenshotFull.py
+++ b/slack-bot/screenshotFull.py
@@ -11,13 +11,19 @@ options.headless = True
 driver = webdriver.Chrome(options=options)
 driver.get(URL)

-S = lambda X: driver.execute_script('return document.body.parentNode.scroll'
-                                    + X)
-# driver.set_window_size(S('Width'), S('Height')+500)  # May need manual adjustment
-driver.set_window_size(S('Width'), 1000)  # May need manual adjustment
+
+def setWidth(var, adj=0):
+    script = "return document.body.parentNode.scroll" + (var)
+    return driver.execute_script(script) + adj
+
+
+driver.set_window_size(setWidth('Width'), setWidth('Height'))
+
+
 driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
 # driver.find_element_by_css_selector('#post-4706').screenshot('web_screenshot.png')
-print(driver.find_element_by_css_selector('#post-4706').text)
+
+# print(driver.find_element_by_css_selector('#post-4706').text)

 im = Image.open('web_screenshot.png')
 width, height = im.size
--- a/slack-bot/web_screenshot.png
+++ b/slack-bot/web_screenshot.png