adding initial boilerplates
This commit is contained in:
257
python/aws/s3/downloadFromS3.py
Normal file
257
python/aws/s3/downloadFromS3.py
Normal file
@@ -0,0 +1,257 @@
|
||||
import boto3
|
||||
import botocore
|
||||
import os
|
||||
from datetime import date, timedelta
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
import glob
|
||||
|
||||
# Set Global Variables
|
||||
log_location = 'pull.log'
|
||||
|
||||
remote_folder = ['bot_predictions/']
|
||||
remote_file_prefix = ['blocking_suggestions_']
|
||||
|
||||
append_date = ['True']
|
||||
date_format = ['%Y-%m-%d']
|
||||
|
||||
bucket = ['td-ingest-storage-williamhill']
|
||||
access_key = ['AKIAYJXVWMRHQ2OGNHLA']
|
||||
secret_key = ['0/4wxdBmpiU3gK1QHLk4me0zj2RHuNAcSOfgJm1B']
|
||||
|
||||
|
||||
class downloadFiles(object):
|
||||
"""docstring for downloadFiles"""
|
||||
today = date.today()
|
||||
yesterday = date.today() - timedelta(1)
|
||||
|
||||
def __init__(self,
|
||||
client,
|
||||
resource,
|
||||
bucket,
|
||||
remote_folder,
|
||||
remote_file_prefix,
|
||||
local_path,
|
||||
append_date=False,
|
||||
date_format=''):
|
||||
super(downloadFiles, self).__init__()
|
||||
self.client = client
|
||||
self.resource = resource
|
||||
self.bucket = bucket
|
||||
self.append_date = append_date
|
||||
self.date_format = date_format
|
||||
self.remote_folder = self._folder_fixer(remote_folder)
|
||||
self.dest = f'{self.remote_folder!s}{remote_file_prefix!s}'
|
||||
self.local_path = local_path
|
||||
self.remote_list, self.local_list, self.local_file_list = \
|
||||
(list() for _ in range(3))
|
||||
|
||||
@staticmethod
|
||||
def generate_date(date_format, relative_day='today'):
|
||||
if relative_day == 'today':
|
||||
date = downloadFiles.today.strftime(date_format)
|
||||
elif relative_day == 'yesterday':
|
||||
date = downloadFiles.yesterday.strftime(date_format)
|
||||
return date
|
||||
|
||||
@staticmethod
|
||||
def _folder_fixer(folder):
|
||||
try:
|
||||
if folder[-1] != '/':
|
||||
folder = f'{folder}/'
|
||||
except IndexError:
|
||||
folder = ''
|
||||
return folder
|
||||
|
||||
def get_path(self):
|
||||
if self.local_path:
|
||||
self.local_path = self._folder_fixer(self.local_path)
|
||||
logger.info(f'path entered is {self.local_path}')
|
||||
return self
|
||||
else:
|
||||
self.local_path = os.getcwd()
|
||||
self.local_path = self._folder_fixer(self.local_path)
|
||||
self.local_path = f'{self.local_path}blocking_suggestions/'
|
||||
logger.info(f'no path entered, using current directory '
|
||||
f'{self.local_path}')
|
||||
return self
|
||||
|
||||
def get_files(self):
|
||||
if self.append_date:
|
||||
date_today = self.generate_date(self.date_format)
|
||||
date_yesterday = self.generate_date(self.date_format, 'yesterday')
|
||||
else:
|
||||
date_today = ''
|
||||
date_yesterday = ''
|
||||
self.dest_list = []
|
||||
self.dest_list.append(f'{self.dest!s}{date_today!s}')
|
||||
self.dest_list.append(f'{self.dest!s}{date_yesterday!s}')
|
||||
for dest in self.dest_list:
|
||||
paginator = self.client.get_paginator('list_objects')
|
||||
iterator = paginator.paginate(Bucket=self.bucket, Prefix=dest)
|
||||
self.filtered = iterator.search('Contents[*].Key')
|
||||
for i in self.filtered:
|
||||
try:
|
||||
self.remote_list.append(i)
|
||||
self.local_list.append(
|
||||
f'{self.local_path}{i[len(self.remote_folder):]}'
|
||||
)
|
||||
self.local_file_list.append(
|
||||
f'{i[len(self.remote_folder):]}'
|
||||
)
|
||||
except TypeError:
|
||||
logger.info('no files available to download -- exiting')
|
||||
raise SystemExit
|
||||
logger.debug(f'remote files are {self.remote_list}')
|
||||
logger.debug(f'saving files locally to {self.local_list}')
|
||||
return self
|
||||
|
||||
def get_history(self):
|
||||
self.history_file = f'{self.local_path}.history.txt'
|
||||
try:
|
||||
logger.info('opening history file')
|
||||
open(self.history_file, 'a').close()
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
logger.critical('history file cannot be found or created'
|
||||
' - check permissions of the folder.')
|
||||
raise
|
||||
self.history_list = \
|
||||
[line.rstrip('\n') for line in open(self.history_file)]
|
||||
return self
|
||||
|
||||
def remove_files(self):
|
||||
logger.info('attempting to clear current files')
|
||||
current_files = glob.glob(f'{self.local_path}[!history.txt]*')
|
||||
if current_files:
|
||||
for i in current_files:
|
||||
try:
|
||||
os.remove(i)
|
||||
logger.info(f'removed {i}')
|
||||
except OSError:
|
||||
logger.exception('Error:')
|
||||
else:
|
||||
logger.info('no files to remove')
|
||||
return self
|
||||
|
||||
def download_files(self):
|
||||
for remote_file, local_file_with_path, local_file in zip(
|
||||
self.remote_list, self.local_list, self.local_file_list):
|
||||
if local_file not in self.history_list:
|
||||
with open(local_file_with_path, 'wb'), \
|
||||
open(self.history_file, 'a') as hist:
|
||||
try:
|
||||
self.resource.Bucket(self.bucket).download_file(
|
||||
remote_file, local_file_with_path)
|
||||
hist.write(f'\n{local_file}')
|
||||
logger.info(f'downloaded {local_file}')
|
||||
except botocore.exceptions.ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
print(f'The object {remote_file} does not exist.')
|
||||
else:
|
||||
raise
|
||||
if local_file in self.history_list:
|
||||
logger.debug(f'{local_file} already downloaded - skipping')
|
||||
return self
|
||||
|
||||
|
||||
def _call():
|
||||
global args, debug
|
||||
parser = argparse.ArgumentParser(description="""
|
||||
downloads any new files for the current day from an S3 bucket. \
|
||||
uses a local history file to track what has been \
|
||||
previously downloaded in the download path.
|
||||
""")
|
||||
parser.add_argument('--path', type=str,
|
||||
help='enter pull path to download to. if left \
|
||||
blank will use the same location as the script.',
|
||||
default='')
|
||||
parser.add_argument('--debug', action='store_true', default=False,
|
||||
help='Use this to log DEBUG information.')
|
||||
|
||||
args = parser.parse_args()
|
||||
debug = vars(args)['debug']
|
||||
|
||||
if debug:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
main(_clients=_clients,
|
||||
_resources=_resources,
|
||||
_buckets=_buckets,
|
||||
remote_folder=remote_folder,
|
||||
remote_file_prefix=remote_file_prefix,
|
||||
append_date=append_date,
|
||||
date_format=date_format,
|
||||
**vars(args))
|
||||
|
||||
|
||||
def main(*args,
|
||||
_clients={'client0': ''},
|
||||
_resources={'resource0': ''},
|
||||
_buckets={'bucket0': ''},
|
||||
remote_folder=[''],
|
||||
remote_file_prefix=[''],
|
||||
append_date=['True'],
|
||||
date_format=[''],
|
||||
path='',
|
||||
**kwargs):
|
||||
logger.info('========= SCRIPT STARTED =========')
|
||||
instance = downloadFiles(client=_clients['client0'],
|
||||
resource=_resources['resource0'],
|
||||
bucket=_buckets['bucket0'],
|
||||
remote_folder=remote_folder[0],
|
||||
remote_file_prefix=remote_file_prefix[0],
|
||||
local_path=path,
|
||||
append_date=append_date[0],
|
||||
date_format=date_format[0])
|
||||
instance.get_path().get_files().get_history().remove_files()\
|
||||
.download_files()
|
||||
logger.info('========= SCRIPT FINISHED =========')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args, debug = '', ''
|
||||
|
||||
# define logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
c_handler = logging.StreamHandler(sys.stdout)
|
||||
f_handler = logging.FileHandler(log_location)
|
||||
|
||||
c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
f_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
c_handler.setFormatter(c_format)
|
||||
f_handler.setFormatter(f_format)
|
||||
|
||||
logger.addHandler(c_handler)
|
||||
logger.addHandler(f_handler)
|
||||
|
||||
_clients = {}
|
||||
_resources = {}
|
||||
_buckets = {}
|
||||
|
||||
for i in range(0, len(bucket)):
|
||||
_clients[f'client{i}'] =\
|
||||
boto3.client('s3',
|
||||
aws_access_key_id=f'{access_key[i]}',
|
||||
aws_secret_access_key=f'{secret_key[i]}')
|
||||
_resources[f'resource{i}'] =\
|
||||
boto3.resource('s3',
|
||||
aws_access_key_id=f'{access_key[i]}',
|
||||
aws_secret_access_key=f'{secret_key[i]}')
|
||||
_buckets[f'bucket{i}'] = f'{bucket[i]}'
|
||||
|
||||
try:
|
||||
_length = len(remote_folder)
|
||||
if _length == 0:
|
||||
remote_folder = ['']
|
||||
elif remote_folder[0] == 'root':
|
||||
remote_folder = ['']
|
||||
else:
|
||||
pass
|
||||
except NameError:
|
||||
remote_folder = ['']
|
||||
_call()
|
||||
156
python/aws/s3/getS3Info.py
Normal file
156
python/aws/s3/getS3Info.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import boto3
|
||||
import logging
|
||||
import os
|
||||
import datetime
|
||||
from splunk_hec_handler import SplunkHecHandler
|
||||
import warnings
|
||||
import customLogLevel
|
||||
from socket import gaierror
|
||||
|
||||
|
||||
class getFileDetails(object):
|
||||
"""Query an S3 bucket for information about files stored
|
||||
|
||||
Required arguments:
|
||||
bucket: a string with the name of the bucket
|
||||
remoteFiles: a string containing the path and filename of the files
|
||||
client: a boto3 s3 client
|
||||
resource: a boto3 s3 resource
|
||||
|
||||
Optional arguments:
|
||||
logger: a splunk HEC handler for the logging module
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
bucket: str,
|
||||
remoteFiles: str,
|
||||
client: boto3.client,
|
||||
resource: boto3.resource,
|
||||
logger: logging.getLogger = None
|
||||
):
|
||||
super().__init__()
|
||||
self.bucket = bucket
|
||||
self.remoteFiles = remoteFiles
|
||||
self.client = client
|
||||
self.resource = resource
|
||||
self.logger = logger
|
||||
|
||||
@staticmethod
|
||||
def formatFolder(remoteFolder: str):
|
||||
try:
|
||||
if remoteFolder[-1] != '/':
|
||||
remoteFolder = f'{remoteFolder}/'
|
||||
except IndexError:
|
||||
remoteFolder = ''
|
||||
return remoteFolder
|
||||
|
||||
@staticmethod
|
||||
def generateDate(date_format='%Y-%m-%d',
|
||||
time: datetime.datetime = None
|
||||
):
|
||||
"""
|
||||
Generates a human readable time string for a
|
||||
datetime.datetime object.
|
||||
By default will use today with %Y-%m-%d format """
|
||||
if time is None:
|
||||
time = datetime.date.today()
|
||||
date = time.strftime(date_format)
|
||||
return date
|
||||
|
||||
@staticmethod
|
||||
def getEpochTime(time: datetime.datetime):
|
||||
epoch = datetime.datetime.timestamp(time)
|
||||
return epoch
|
||||
|
||||
def getS3Files(self):
|
||||
self.paginator = self.client.get_paginator('list_objects')
|
||||
self.iterator = self.paginator.paginate(Bucket=self.bucket,
|
||||
Prefix=self.remoteFiles)
|
||||
self.filtered = self.iterator.search('Contents[*]')
|
||||
self.fileDict = dict()
|
||||
counter = 0
|
||||
|
||||
for i in self.filtered:
|
||||
now = self.getEpochTime(datetime.datetime.now())
|
||||
i.update({'_time': now})
|
||||
i['LastModified'] = self.getEpochTime(i['LastModified'])
|
||||
self.fileDict[counter] = i
|
||||
counter += 1
|
||||
return self
|
||||
|
||||
def sendToSplunk(self):
|
||||
for i in self.fileDict.values():
|
||||
try:
|
||||
self.logger.splunk(i)
|
||||
except AttributeError:
|
||||
raise Exception("No logger level exists for the custom Splunk"
|
||||
" level. Try creating a customLogLevel for"
|
||||
" splunk and try again.")
|
||||
return self
|
||||
|
||||
|
||||
def _call():
|
||||
# Define env variables for AWS boto3
|
||||
os.environ['AWS_PROFILE'] = 'netacea'
|
||||
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-1'
|
||||
|
||||
# Define bucket and file names
|
||||
bucket = 'td-ingest-storage-williamhill'
|
||||
remoteFolder = 'bot_predictions/'
|
||||
remoteFilePrefix = 'blocking_suggestions_'
|
||||
append_date = True
|
||||
date_format = '%Y-%m-%d'
|
||||
|
||||
# Define splunk hec handler for logging
|
||||
try:
|
||||
splunk_handler = SplunkHecHandler('sc1uxpremn81.prod.williamhill.plc',
|
||||
'ea641e31-870e-4f5f-965f'
|
||||
'-57e0c9a2aa3d',
|
||||
port=8088, proto='https',
|
||||
ssl_verify=False,
|
||||
sourcetype='httpevent')
|
||||
except gaierror:
|
||||
raise SystemExit
|
||||
|
||||
logger = logging.getLogger('SplunkHecHandlerExample')
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(splunk_handler)
|
||||
customLogLevel.addLoggingLevel('splunk', 15, logging)
|
||||
|
||||
main(bucket,
|
||||
remoteFolder,
|
||||
remoteFilePrefix,
|
||||
append_date,
|
||||
date_format,
|
||||
logger)
|
||||
|
||||
|
||||
def main(bucket: str,
|
||||
remoteFolder: str,
|
||||
remoteFilePrefix: str,
|
||||
append_date: bool,
|
||||
date_format: str,
|
||||
logger: logging.getLogger):
|
||||
|
||||
if append_date:
|
||||
remoteFileDate = getFileDetails.generateDate(date_format)
|
||||
else:
|
||||
remoteFileDate = ''
|
||||
|
||||
remoteFolder = getFileDetails.formatFolder(remoteFolder)
|
||||
remoteFiles = f'{remoteFolder}{remoteFilePrefix}{remoteFileDate}'
|
||||
|
||||
client = boto3.client('s3')
|
||||
resource = boto3.resource('s3')
|
||||
|
||||
instance = getFileDetails(bucket,
|
||||
remoteFiles,
|
||||
client,
|
||||
resource,
|
||||
logger)
|
||||
instance.getS3Files().sendToSplunk()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
warnings.filterwarnings("ignore")
|
||||
_call()
|
||||
141
python/aws/sqs/receiveFromSQS.py
Normal file
141
python/aws/sqs/receiveFromSQS.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import boto3
|
||||
import base64
|
||||
import math
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class receiveFromSQS(object):
|
||||
"""docstring for receiveFromSQS"""
|
||||
|
||||
def __init__(self, session, queueURL):
|
||||
super(receiveFromSQS, self).__init__()
|
||||
self.session = session
|
||||
self.sqs = session.client('sqs')
|
||||
self.queueURL = queueURL
|
||||
self.messages = []
|
||||
|
||||
@classmethod
|
||||
def createSession(cls, profileName, queueURL):
|
||||
session = boto3.Session(profile_name=profileName)
|
||||
return cls(session, queueURL)
|
||||
|
||||
def getQueueLength(self):
|
||||
attributeNames = ['ApproximateNumberOfMessages']
|
||||
self.queueAttributes = self.sqs.get_queue_attributes(
|
||||
QueueUrl=self.queueURL, AttributeNames=attributeNames
|
||||
)
|
||||
self.queueLength = int(
|
||||
self.queueAttributes['Attributes']['ApproximateNumberOfMessages']
|
||||
)
|
||||
return self.queueLength
|
||||
|
||||
def _receiveSQSMessage(
|
||||
self, totalNumberOfMessages, maxNumberOfMessages=10
|
||||
):
|
||||
self.resp = []
|
||||
self.loops = int(
|
||||
math.ceil(totalNumberOfMessages / maxNumberOfMessages)
|
||||
)
|
||||
loopTrack = 0
|
||||
if totalNumberOfMessages <= 10:
|
||||
maxNumberOfMessages = totalNumberOfMessages
|
||||
else:
|
||||
maxNumberOfMessagesFinal = 10 - (
|
||||
(self.loops * maxNumberOfMessages) - totalNumberOfMessages
|
||||
)
|
||||
print(maxNumberOfMessagesFinal)
|
||||
if self.loops == 0:
|
||||
raise RuntimeError('No messages in the queue')
|
||||
for i in range(0, self.loops):
|
||||
if loopTrack == self.loops - 1 and totalNumberOfMessages > 10:
|
||||
maxNumberOfMessages = maxNumberOfMessagesFinal
|
||||
self.resp.append(
|
||||
self.sqs.receive_message(
|
||||
QueueUrl=self.queueURL,
|
||||
MaxNumberOfMessages=maxNumberOfMessages,
|
||||
)
|
||||
)
|
||||
try:
|
||||
entries = [
|
||||
{
|
||||
'Id': msg['MessageId'],
|
||||
'ReceiptHandle': msg['ReceiptHandle'],
|
||||
}
|
||||
for msg in self.resp[i]['Messages']
|
||||
]
|
||||
self._deleteSQSMessages(entries)
|
||||
loopTrack += 1
|
||||
except KeyError:
|
||||
print("No messages in the queue")
|
||||
return self
|
||||
|
||||
def _extractMessageFromSQS(self, totalNumberOfMessages):
|
||||
self.extractedMessages = []
|
||||
self.receiptHandles = []
|
||||
try:
|
||||
for i in range(0, self.loops):
|
||||
_loops = len(self.resp[i]['Messages'])
|
||||
for j in range(0, _loops):
|
||||
if 'Messages' in self.resp[i]:
|
||||
self.extractedMessages.append(
|
||||
self.resp[i]['Messages'][j]['Body']
|
||||
)
|
||||
else:
|
||||
print('No messages in the queue')
|
||||
except KeyError:
|
||||
print('No messages in the queue key')
|
||||
return self
|
||||
|
||||
def _deleteSQSMessages(self, entries):
|
||||
self.respDelete = self.sqs.delete_message_batch(
|
||||
QueueUrl=self.queueURL, Entries=entries
|
||||
)
|
||||
if len(self.respDelete['Successful']) != len(entries):
|
||||
raise RuntimeError(
|
||||
f'Failed to delete messages: entries={entries!r}'
|
||||
f' resp={self.respDelete!r}'
|
||||
)
|
||||
|
||||
def _decodeMessages(self):
|
||||
if len(self.extractedMessages) == 0:
|
||||
print('No messages to process')
|
||||
else:
|
||||
for message in self.extractedMessages:
|
||||
decoded = base64.b64decode(message).decode()
|
||||
self.messages.append(decoded)
|
||||
return self
|
||||
|
||||
def receiveAllMessages(self, b64=True, _totalNumberOfMessages=None):
|
||||
if _totalNumberOfMessages is None:
|
||||
totalNumberOfMessages = self.getQueueLength()
|
||||
else:
|
||||
totalNumberOfMessages = _totalNumberOfMessages
|
||||
self._receiveSQSMessage(totalNumberOfMessages)
|
||||
self._extractMessageFromSQS(
|
||||
totalNumberOfMessages=totalNumberOfMessages
|
||||
)
|
||||
if b64:
|
||||
self._decodeMessages()
|
||||
else:
|
||||
self.messages = self.extractedMessages
|
||||
return self
|
||||
|
||||
def receiveNMessages(self, numberOfMessages, b64=True):
|
||||
self.receiveAllMessages(
|
||||
b64=b64, _totalNumberOfMessages=numberOfMessages
|
||||
)
|
||||
return self
|
||||
|
||||
# def generateOutput(self, outputType='json'):
|
||||
# if outputType == 'json':
|
||||
# self.output = json.dumps(self.messages)
|
||||
# return self.output
|
||||
|
||||
def savetoDisk(self, path):
|
||||
self.timeNow = datetime.now().strftime('%d-%m-%Y_%H:%M:%S')
|
||||
if len(self.messages) > 0:
|
||||
with open(f'{path}/{self.timeNow}.json', 'w+') as outputFile:
|
||||
json.dump(self.messages, outputFile)
|
||||
else:
|
||||
print('No messages to save')
|
||||
63
python/aws/sqs/sendToSQS.py
Normal file
63
python/aws/sqs/sendToSQS.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import boto3
|
||||
import base64
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
from pullTrafficInfo import getTrafficInfo
|
||||
|
||||
|
||||
class sendToSQS(object):
|
||||
"""docstring for sendToSQS"""
|
||||
|
||||
def __init__(self, session, queueURL):
|
||||
super(sendToSQS, self).__init__()
|
||||
self.session = session
|
||||
self.sqs = session.client('sqs')
|
||||
self.queueURL = queueURL
|
||||
|
||||
@classmethod
|
||||
def createSession(cls, profileName, queueURL):
|
||||
session = boto3.Session(profile_name=profileName)
|
||||
return cls(session, queueURL)
|
||||
|
||||
def sendMessage(self, message, messageGroupId, b64=True, dedup=False):
|
||||
currentTime = datetime.now().strftime('%H:%M:%S.%f')
|
||||
if b64:
|
||||
message = (base64.b64encode(message.encode())).decode()
|
||||
if not dedup:
|
||||
dedupId = hashlib.md5((message + currentTime).encode()).hexdigest()
|
||||
msg = self.sqs.send_message(
|
||||
QueueUrl=self.queueURL,
|
||||
MessageBody=message,
|
||||
MessageGroupId=messageGroupId,
|
||||
MessageDeduplicationId=dedupId,
|
||||
)
|
||||
else:
|
||||
msg = self.sqs.send_message(
|
||||
QueueUrl=self.queueURL,
|
||||
MessageBody=message,
|
||||
MessageGroupId=messageGroupId,
|
||||
)
|
||||
if msg is not None:
|
||||
print(msg['MessageId'])
|
||||
|
||||
|
||||
# inst = sendToSQS.createSession(
|
||||
# profileName='plex-aws',
|
||||
# queueURL='https://sqs.eu-west-1.amazonaws.com'
|
||||
# '/745437999005/slack-bot.fifo',
|
||||
# )
|
||||
|
||||
# instM = (
|
||||
# getTrafficInfo.getTrafficURL('M62')
|
||||
# .findIncidents()
|
||||
# .getIncidentInformation()
|
||||
# .generateOutput()
|
||||
# )
|
||||
|
||||
# for _ in range(0, 5):
|
||||
# for item in instM.output:
|
||||
# inst.sendMessage(message=item, messageGroupId='slack-bot-M62')
|
||||
Reference in New Issue
Block a user