adding initial boilerplates
This commit is contained in:
156
python/aws/s3/getS3Info.py
Normal file
156
python/aws/s3/getS3Info.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import boto3
|
||||
import logging
|
||||
import os
|
||||
import datetime
|
||||
from splunk_hec_handler import SplunkHecHandler
|
||||
import warnings
|
||||
import customLogLevel
|
||||
from socket import gaierror
|
||||
|
||||
|
||||
class getFileDetails(object):
|
||||
"""Query an S3 bucket for information about files stored
|
||||
|
||||
Required arguments:
|
||||
bucket: a string with the name of the bucket
|
||||
remoteFiles: a string containing the path and filename of the files
|
||||
client: a boto3 s3 client
|
||||
resource: a boto3 s3 resource
|
||||
|
||||
Optional arguments:
|
||||
logger: a splunk HEC handler for the logging module
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
bucket: str,
|
||||
remoteFiles: str,
|
||||
client: boto3.client,
|
||||
resource: boto3.resource,
|
||||
logger: logging.getLogger = None
|
||||
):
|
||||
super().__init__()
|
||||
self.bucket = bucket
|
||||
self.remoteFiles = remoteFiles
|
||||
self.client = client
|
||||
self.resource = resource
|
||||
self.logger = logger
|
||||
|
||||
@staticmethod
|
||||
def formatFolder(remoteFolder: str):
|
||||
try:
|
||||
if remoteFolder[-1] != '/':
|
||||
remoteFolder = f'{remoteFolder}/'
|
||||
except IndexError:
|
||||
remoteFolder = ''
|
||||
return remoteFolder
|
||||
|
||||
@staticmethod
|
||||
def generateDate(date_format='%Y-%m-%d',
|
||||
time: datetime.datetime = None
|
||||
):
|
||||
"""
|
||||
Generates a human readable time string for a
|
||||
datetime.datetime object.
|
||||
By default will use today with %Y-%m-%d format """
|
||||
if time is None:
|
||||
time = datetime.date.today()
|
||||
date = time.strftime(date_format)
|
||||
return date
|
||||
|
||||
@staticmethod
|
||||
def getEpochTime(time: datetime.datetime):
|
||||
epoch = datetime.datetime.timestamp(time)
|
||||
return epoch
|
||||
|
||||
def getS3Files(self):
|
||||
self.paginator = self.client.get_paginator('list_objects')
|
||||
self.iterator = self.paginator.paginate(Bucket=self.bucket,
|
||||
Prefix=self.remoteFiles)
|
||||
self.filtered = self.iterator.search('Contents[*]')
|
||||
self.fileDict = dict()
|
||||
counter = 0
|
||||
|
||||
for i in self.filtered:
|
||||
now = self.getEpochTime(datetime.datetime.now())
|
||||
i.update({'_time': now})
|
||||
i['LastModified'] = self.getEpochTime(i['LastModified'])
|
||||
self.fileDict[counter] = i
|
||||
counter += 1
|
||||
return self
|
||||
|
||||
def sendToSplunk(self):
|
||||
for i in self.fileDict.values():
|
||||
try:
|
||||
self.logger.splunk(i)
|
||||
except AttributeError:
|
||||
raise Exception("No logger level exists for the custom Splunk"
|
||||
" level. Try creating a customLogLevel for"
|
||||
" splunk and try again.")
|
||||
return self
|
||||
|
||||
|
||||
def _call():
|
||||
# Define env variables for AWS boto3
|
||||
os.environ['AWS_PROFILE'] = 'netacea'
|
||||
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-1'
|
||||
|
||||
# Define bucket and file names
|
||||
bucket = 'td-ingest-storage-williamhill'
|
||||
remoteFolder = 'bot_predictions/'
|
||||
remoteFilePrefix = 'blocking_suggestions_'
|
||||
append_date = True
|
||||
date_format = '%Y-%m-%d'
|
||||
|
||||
# Define splunk hec handler for logging
|
||||
try:
|
||||
splunk_handler = SplunkHecHandler('sc1uxpremn81.prod.williamhill.plc',
|
||||
'ea641e31-870e-4f5f-965f'
|
||||
'-57e0c9a2aa3d',
|
||||
port=8088, proto='https',
|
||||
ssl_verify=False,
|
||||
sourcetype='httpevent')
|
||||
except gaierror:
|
||||
raise SystemExit
|
||||
|
||||
logger = logging.getLogger('SplunkHecHandlerExample')
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.addHandler(splunk_handler)
|
||||
customLogLevel.addLoggingLevel('splunk', 15, logging)
|
||||
|
||||
main(bucket,
|
||||
remoteFolder,
|
||||
remoteFilePrefix,
|
||||
append_date,
|
||||
date_format,
|
||||
logger)
|
||||
|
||||
|
||||
def main(bucket: str,
|
||||
remoteFolder: str,
|
||||
remoteFilePrefix: str,
|
||||
append_date: bool,
|
||||
date_format: str,
|
||||
logger: logging.getLogger):
|
||||
|
||||
if append_date:
|
||||
remoteFileDate = getFileDetails.generateDate(date_format)
|
||||
else:
|
||||
remoteFileDate = ''
|
||||
|
||||
remoteFolder = getFileDetails.formatFolder(remoteFolder)
|
||||
remoteFiles = f'{remoteFolder}{remoteFilePrefix}{remoteFileDate}'
|
||||
|
||||
client = boto3.client('s3')
|
||||
resource = boto3.resource('s3')
|
||||
|
||||
instance = getFileDetails(bucket,
|
||||
remoteFiles,
|
||||
client,
|
||||
resource,
|
||||
logger)
|
||||
instance.getS3Files().sendToSplunk()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
warnings.filterwarnings("ignore")
|
||||
_call()
|
||||
Reference in New Issue
Block a user