v0.1 of send and receive scripts
This commit is contained in:
70
slack-bot/traffic-scraper/dev/scraper.py
Normal file
70
slack-bot/traffic-scraper/dev/scraper.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
import emoji
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
url = 'https://www.trafficdelays.co.uk/m62-traffic-delays/'
|
||||
# prepare the option for the chrome driver
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument('headless')
|
||||
|
||||
# start the chrome driver
|
||||
browser = webdriver.Chrome(options=options)
|
||||
browser.get(url)
|
||||
html = browser.page_source
|
||||
|
||||
|
||||
congestion = browser.find_element_by_xpath('//*[@id="congestion"]')
|
||||
accident = browser.find_element_by_xpath('//*[@id="accident"]')
|
||||
|
||||
congestionText = congestion.text
|
||||
congestionHTML = congestion.get_attribute('innerHTML')
|
||||
congestionCount = congestionHTML.count('<li>')
|
||||
pattern = r".*\<li\>.*title=\".*\".*\>((.|\n)*?)\<br\>"
|
||||
congestionRegexExtraction = re.findall(pattern, congestionHTML)
|
||||
|
||||
accidentText = accident.text
|
||||
accidentHTML = accident.get_attribute('innerHTML')
|
||||
accidentCount = accidentHTML.count('<li>')
|
||||
accidentRegexExtraction = re.findall(pattern, accidentHTML)
|
||||
|
||||
|
||||
def printBreak():
|
||||
print('\n')
|
||||
|
||||
|
||||
currentTime = datetime.now().strftime('%H:%M:%S')
|
||||
|
||||
print(emoji.emojize('Did someone say M62 :anguished:!? Let'
|
||||
'\'s check the latest updates from Highways'
|
||||
f' England! as of {currentTime}'
|
||||
':police_car::rotating_light:',
|
||||
use_aliases=True))
|
||||
printBreak()
|
||||
if congestionCount == 0:
|
||||
print(emoji.emojize(f'There are currently no reported congestions on the'
|
||||
f' M62 :thinking_face:', use_aliases=True))
|
||||
if congestionCount != 0:
|
||||
print(emoji.emojize(f'There are currently {congestionCount} incident(s)'
|
||||
f' on the M62 :scream:', use_aliases=True))
|
||||
for i in range(0, congestionCount):
|
||||
print(congestionRegexExtraction[i][0] + '\n')
|
||||
printBreak()
|
||||
if accidentCount == 0:
|
||||
print(emoji.emojize(f'There are currently no reported accidents on the'
|
||||
f' M62 :thinking_face:', use_aliases=True))
|
||||
if accidentCount != 0:
|
||||
print(emoji.emojize(f'There are currently {accidentCount} incident(s)'
|
||||
f' on the M62 :scream:', use_aliases=True))
|
||||
for i in range(0, accidentCount):
|
||||
print(accidentRegexExtraction[i][0] + '\n')
|
||||
printBreak()
|
||||
print(f'Hey Andy, have you thought about getting the train?'
|
||||
+ emoji.emojize(f' :bullettrain_front:', use_aliases=True))
|
||||
print(f'Hey Andy, maybe flying would be quicker?'
|
||||
+ emoji.emojize(f' :helicopter:', use_aliases=True))
|
||||
print(f'Don\'t fret, he can always work from home!'
|
||||
+ emoji.emojize(f' :house_with_garden:', use_aliases=True))
|
||||
|
||||
browser.quit()
|
||||
71
slack-bot/traffic-scraper/dev/scraper.py.old
Normal file
71
slack-bot/traffic-scraper/dev/scraper.py.old
Normal file
@@ -0,0 +1,71 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
import emoji
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
url = 'https://www.trafficdelays.co.uk/southern-england/'
|
||||
# prepare the option for the chrome driver
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument('headless')
|
||||
|
||||
# start the chrome driver
|
||||
browser = webdriver.Chrome(options=options)
|
||||
browser.get(url)
|
||||
html = browser.page_source
|
||||
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
# soup.find_all(class_='alerts-severity-Severe')
|
||||
|
||||
|
||||
def printBreak():
|
||||
print('\n')
|
||||
|
||||
|
||||
# table = soup.find_all('td')
|
||||
|
||||
# list = []
|
||||
|
||||
# for item in table:
|
||||
# list.append(item)
|
||||
|
||||
# for i in range(0, 4):
|
||||
# print(list[i])
|
||||
|
||||
# printBreak()
|
||||
|
||||
# totalItems = int(len(list) / 4)
|
||||
|
||||
# for i in range(0, 4):
|
||||
# print(list[i].string)
|
||||
# printBreak()
|
||||
|
||||
# newList = ([x.text for x in soup.find_all('td')])
|
||||
|
||||
currentTime = datetime.now().strftime('%H:%M:%S')
|
||||
|
||||
print(emoji.emojize('Did someone say M62 :anguished:!? Let'
|
||||
'\'s check the latest updates from Highways'
|
||||
' England! :police_car::rotating_light:',
|
||||
use_aliases=True))
|
||||
print(f'As of {currentTime}, there is currently {newList[2]} {newList[1]}'
|
||||
f' on the {newList[0]}')
|
||||
|
||||
# print(list[3].prettify())
|
||||
desc = str(list[3])
|
||||
descSplit = desc.split('<')
|
||||
|
||||
pattern = r"\>(.+?)\s+?\:\s+?(.+?)$"
|
||||
|
||||
for item in descSplit:
|
||||
# print(item)
|
||||
regex = re.findall(pattern, item)
|
||||
# print(f'Type: {type(regex)}')
|
||||
# print(f'Matches: {regex}')
|
||||
try:
|
||||
printBreak()
|
||||
print(f'itemOne: {regex[0][0]}')
|
||||
print(f'itemTwo: {regex[0][1]}')
|
||||
except IndexError:
|
||||
pass
|
||||
# print(f'itemTwo: {regex[1]}')
|
||||
64
slack-bot/traffic-scraper/dev/sendToSQS_old.py
Normal file
64
slack-bot/traffic-scraper/dev/sendToSQS_old.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import boto3
|
||||
import os
|
||||
import sys
|
||||
sys.path.append(os.getcwd())
|
||||
sys.path.append('/home/dtomlinson/projects/slack-bot/traffic-scraper/prd')
|
||||
from pullTrafficInfo import getTrafficInfo
|
||||
import base64
|
||||
# import emoji
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
motorway = 'A3'
|
||||
currentTime = datetime.now().strftime('%H:%M:%S')
|
||||
|
||||
session = boto3.Session(profile_name='plex-aws')
|
||||
sqs = session.client('sqs')
|
||||
|
||||
# queue = sqs.create_queue(QueueName='test', Attributes={'DelaySeconds': '5'})
|
||||
|
||||
inst = getTrafficInfo.getTrafficURL(motorway).findIncidents() \
|
||||
.getIncidentInformation().generateOutput()
|
||||
|
||||
# encoded = base64.b64encode(inst.output[0].encode())
|
||||
# decoded = base64.b64decode(encoded).decode()
|
||||
|
||||
queueURL = 'https://sqs.eu-west-1.amazonaws.com/745437999005/slack-bot.fifo'
|
||||
|
||||
# message = sqs.send_message(QueueUrl=queueURL,
|
||||
# MessageBody='string',
|
||||
# MessageGroupId='slack-bot-motorway')
|
||||
|
||||
# response = sqs.receive_message(QueueUrl=queueURL,
|
||||
# MaxNumberOfMessages=5)
|
||||
# if 'Messages' in response:
|
||||
# for message in response['Messages']:
|
||||
# print(message['Body'])
|
||||
# else:
|
||||
# print('Queue is empty')
|
||||
|
||||
# for item in inst.output:
|
||||
# print(item)
|
||||
# encoded = (base64.b64encode(item.encode())).decode()
|
||||
# dedupId = hashlib.md5((item + currentTime).encode()).hexdigest()
|
||||
# msg = sqs.send_message(QueueUrl=queueURL,
|
||||
# MessageBody=encoded,
|
||||
# MessageGroupId=f'slack-bot-{motorway}',
|
||||
# MessageDeduplicationId=dedupId)
|
||||
# if msg is not None:
|
||||
# print(msg["MessageId"])
|
||||
# # print(encoded)
|
||||
# # print(encoded.encode())
|
||||
# # print(base64.b64decode(encoded.encode()).decode())
|
||||
|
||||
|
||||
response = sqs.receive_message(QueueUrl=queueURL,
|
||||
MaxNumberOfMessages=10)
|
||||
if 'Messages' in response:
|
||||
for message in response['Messages']:
|
||||
# print(message['Body'])
|
||||
# break
|
||||
decoded = base64.b64decode(message['Body'].encode())
|
||||
print(decoded.decode())
|
||||
else:
|
||||
print('Queue is empty')
|
||||
Reference in New Issue
Block a user