diff --git a/slack-bot/cropped.png b/slack-bot/cropped.png new file mode 100644 index 0000000..debe43b Binary files /dev/null and b/slack-bot/cropped.png differ diff --git a/slack-bot/geckodriver.log b/slack-bot/geckodriver.log index e69de29..6abb2ed 100644 --- a/slack-bot/geckodriver.log +++ b/slack-bot/geckodriver.log @@ -0,0 +1,39 @@ +1570055118361 mozrunner::runner INFO Running command: "/usr/bin/firefox" "-marionette" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofilerB4DJp" +1570055118656 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons +1570055118656 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/ +1570055118656 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader* +IPDL protocol Error: Received an invalid file descriptor +IPDL protocol Error: Received an invalid file descriptor +1570055120485 Marionette INFO Listening on port 35931 +1570055120666 Marionette WARN TLS certificate errors will be ignored for this session +IPDL protocol Error: Received an invalid file descriptor +[Parent 11866, Gecko_IOThread] WARNING: pipe error (68): Connection reset by peer: file /builddir/build/BUILD/firefox-66.0.2/ipc/chromium/src/chrome/common/ipc_channel_posix.cc, line 357 +1570055124259 Marionette INFO Stopped listening on port 35931 +1570055140941 mozrunner::runner INFO Running command: "/usr/bin/firefox" "-marionette" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofileC5m1OE" +1570055141207 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons +1570055141207 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/ +1570055141207 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader* +IPDL protocol Error: Received an invalid file descriptor +IPDL protocol Error: Received an invalid file descriptor +1570055141995 Marionette INFO Listening on port 39457 +1570055142049 Marionette WARN TLS certificate errors will be ignored for this session +IPDL protocol Error: Received an invalid file descriptor +[Parent 12493, Gecko_IOThread] WARNING: pipe error (71): Connection reset by peer: file /builddir/build/BUILD/firefox-66.0.2/ipc/chromium/src/chrome/common/ipc_channel_posix.cc, line 357 +[Parent 12493, Gecko_IOThread] WARNING: pipe error (80): Connection reset by peer: file /builddir/build/BUILD/firefox-66.0.2/ipc/chromium/src/chrome/common/ipc_channel_posix.cc, line 357 +1570055146451 Marionette INFO Stopped listening on port 39457 +1570055159366 mozrunner::runner INFO Running command: "/usr/bin/firefox" "-marionette" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofileJGgS47" +1570055159635 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons +1570055159635 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/ +1570055159635 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader* +IPDL protocol Error: Received an invalid file descriptor +IPDL protocol Error: Received an invalid file descriptor +1570055160384 Marionette INFO Listening on port 41419 +1570055160477 Marionette WARN TLS certificate errors will be ignored for this session +IPDL protocol Error: Received an invalid file descriptor +1570055162449 Marionette INFO Stopped listening on port 41419 + +###!!! [Parent][DispatchAsyncMessage] Error: PClientSource::Msg_Teardown Route error: message sent to unknown actor ID + + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + diff --git a/slack-bot/regex.txt b/slack-bot/regex.txt index c434750..c2774f2 100644 --- a/slack-bot/regex.txt +++ b/slack-bot/regex.txt @@ -1,2 +1,4 @@ col 4 ^\<[a-zA-Z\/]+\>(.+?)\s+?\:\s+?(.+?)\<[a-zA-Z\/]+\>(.+?)\s+?\:\s+?(.+?)\<[a-zA-Z\/]+\>(.+?)\s+?\:\s+?(.+?)\<[a-zA-Z\/]+\>(.+?)\s+?\:\s+?(.+?)\<[a-zA-Z\/]+\>(.+?)\s+?\:\s+?(.+?)\<[a-zA-Z\/]+\>$ + +^\((.+?)\s+?\:\s+(.+?).+)<\/td\>$ diff --git a/slack-bot/scraper.py b/slack-bot/scraper.py index a1dad43..0cd8810 100644 --- a/slack-bot/scraper.py +++ b/slack-bot/scraper.py @@ -2,6 +2,7 @@ from bs4 import BeautifulSoup from selenium import webdriver import emoji from datetime import datetime +import re url = 'http://www.trafficengland.com/traffic-alerts' # prepare the option for the chrome driver @@ -9,7 +10,7 @@ options = webdriver.ChromeOptions() options.add_argument('headless') # start the chrome driver -browser = webdriver.Chrome(chrome_options=options) +browser = webdriver.Chrome(options=options) browser.get(url) html = browser.page_source @@ -28,16 +29,16 @@ list = [] for item in table: list.append(item) -for i in range(0, 4): - print(list[i]) +# for i in range(0, 4): +# print(list[i]) -printBreak() +# printBreak() -totalItems = int(len(list) / 4) +# totalItems = int(len(list) / 4) -for i in range(0, 4): - print(list[i].string) - printBreak() +# for i in range(0, 4): +# print(list[i].string) +# printBreak() newList = ([x.text for x in soup.find_all('td')]) @@ -47,8 +48,24 @@ print(emoji.emojize('Did someone say M62 :anguished:!? Let' '\'s check the latest updates from Highways' ' England! :police_car::rotating_light:', use_aliases=True)) -print(f'As of {currentTime}, there is currently a {newList[2]} {newList[1]}' +print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}' f' on the {newList[0]}') # print(list[3].prettify()) -print(str(list[3])) +desc = str(list[3]) +descSplit = desc.split('<') + +pattern = r"\>(.+?)\s+?\:\s+?(.+?)$" + +for item in descSplit: + # print(item) + regex = re.findall(pattern, item) + # print(f'Type: {type(regex)}') + # print(f'Matches: {regex}') + try: + printBreak() + print(f'itemOne: {regex[0][0]}') + print(f'itemTwo: {regex[0][1]}') + except IndexError: + pass + # print(f'itemTwo: {regex[1]}') diff --git a/slack-bot/screenshot.png b/slack-bot/screenshot.png new file mode 100644 index 0000000..2dab5f9 Binary files /dev/null and b/slack-bot/screenshot.png differ diff --git a/slack-bot/screenshot.py b/slack-bot/screenshot.py new file mode 100644 index 0000000..49c2bee --- /dev/null +++ b/slack-bot/screenshot.py @@ -0,0 +1,6 @@ +from selenium import webdriver + +driver = webdriver.Firefox() +driver.get('https://www.python.org') +driver.save_screenshot('screenshot.png') +driver.quit() diff --git a/slack-bot/screenshotFull.py b/slack-bot/screenshotFull.py new file mode 100644 index 0000000..bc17b0c --- /dev/null +++ b/slack-bot/screenshotFull.py @@ -0,0 +1,26 @@ +# https://stackoverflow.com/questions/41721734/take-screenshot-of-full-page-with-selenium-python-with-chromedriver/57338909#57338909 + +from selenium import webdriver +from PIL import Image + +URL = 'https://www.trafficdelays.co.uk/m62-traffic-delays/' + +options = webdriver.ChromeOptions() +options.headless = True + +driver = webdriver.Chrome(options=options) +driver.get(URL) + +S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + + X) +# driver.set_window_size(S('Width'), S('Height')+500) # May need manual adjustment +driver.set_window_size(S('Width'), 1000) # May need manual adjustment +driver.find_element_by_tag_name('body').screenshot('web_screenshot.png') +# driver.find_element_by_css_selector('#post-4706').screenshot('web_screenshot.png') +print(driver.find_element_by_css_selector('#post-4706').text) + +im = Image.open('web_screenshot.png') +width, height = im.size +region = im.crop((0, 0, width, 880)) +region.save('cropped.png') +driver.quit() diff --git a/slack-bot/web_screenshot.png b/slack-bot/web_screenshot.png new file mode 100644 index 0000000..e191079 Binary files /dev/null and b/slack-bot/web_screenshot.png differ