157 lines
4.7 KiB
Python
157 lines
4.7 KiB
Python
import boto3
|
|
import logging
|
|
import os
|
|
import datetime
|
|
from splunk_hec_handler import SplunkHecHandler
|
|
import warnings
|
|
import customLogLevel
|
|
from socket import gaierror
|
|
|
|
|
|
class getFileDetails(object):
|
|
"""Query an S3 bucket for information about files stored
|
|
|
|
Required arguments:
|
|
bucket: a string with the name of the bucket
|
|
remoteFiles: a string containing the path and filename of the files
|
|
client: a boto3 s3 client
|
|
resource: a boto3 s3 resource
|
|
|
|
Optional arguments:
|
|
logger: a splunk HEC handler for the logging module
|
|
"""
|
|
|
|
def __init__(self,
|
|
bucket: str,
|
|
remoteFiles: str,
|
|
client: boto3.client,
|
|
resource: boto3.resource,
|
|
logger: logging.getLogger = None
|
|
):
|
|
super().__init__()
|
|
self.bucket = bucket
|
|
self.remoteFiles = remoteFiles
|
|
self.client = client
|
|
self.resource = resource
|
|
self.logger = logger
|
|
|
|
@staticmethod
|
|
def formatFolder(remoteFolder: str):
|
|
try:
|
|
if remoteFolder[-1] != '/':
|
|
remoteFolder = f'{remoteFolder}/'
|
|
except IndexError:
|
|
remoteFolder = ''
|
|
return remoteFolder
|
|
|
|
@staticmethod
|
|
def generateDate(date_format='%Y-%m-%d',
|
|
time: datetime.datetime = None
|
|
):
|
|
"""
|
|
Generates a human readable time string for a
|
|
datetime.datetime object.
|
|
By default will use today with %Y-%m-%d format """
|
|
if time is None:
|
|
time = datetime.date.today()
|
|
date = time.strftime(date_format)
|
|
return date
|
|
|
|
@staticmethod
|
|
def getEpochTime(time: datetime.datetime):
|
|
epoch = datetime.datetime.timestamp(time)
|
|
return epoch
|
|
|
|
def getS3Files(self):
|
|
self.paginator = self.client.get_paginator('list_objects')
|
|
self.iterator = self.paginator.paginate(Bucket=self.bucket,
|
|
Prefix=self.remoteFiles)
|
|
self.filtered = self.iterator.search('Contents[*]')
|
|
self.fileDict = dict()
|
|
counter = 0
|
|
|
|
for i in self.filtered:
|
|
now = self.getEpochTime(datetime.datetime.now())
|
|
i.update({'_time': now})
|
|
i['LastModified'] = self.getEpochTime(i['LastModified'])
|
|
self.fileDict[counter] = i
|
|
counter += 1
|
|
return self
|
|
|
|
def sendToSplunk(self):
|
|
for i in self.fileDict.values():
|
|
try:
|
|
self.logger.splunk(i)
|
|
except AttributeError:
|
|
raise Exception("No logger level exists for the custom Splunk"
|
|
" level. Try creating a customLogLevel for"
|
|
" splunk and try again.")
|
|
return self
|
|
|
|
|
|
def _call():
|
|
# Define env variables for AWS boto3
|
|
os.environ['AWS_PROFILE'] = 'netacea'
|
|
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-1'
|
|
|
|
# Define bucket and file names
|
|
bucket = 'td-ingest-storage-williamhill'
|
|
remoteFolder = 'bot_predictions/'
|
|
remoteFilePrefix = 'blocking_suggestions_'
|
|
append_date = True
|
|
date_format = '%Y-%m-%d'
|
|
|
|
# Define splunk hec handler for logging
|
|
try:
|
|
splunk_handler = SplunkHecHandler('sc1uxpremn81.prod.williamhill.plc',
|
|
'ea641e31-870e-4f5f-965f'
|
|
'-57e0c9a2aa3d',
|
|
port=8088, proto='https',
|
|
ssl_verify=False,
|
|
sourcetype='httpevent')
|
|
except gaierror:
|
|
raise SystemExit
|
|
|
|
logger = logging.getLogger('SplunkHecHandlerExample')
|
|
logger.setLevel(logging.DEBUG)
|
|
logger.addHandler(splunk_handler)
|
|
customLogLevel.addLoggingLevel('splunk', 15, logging)
|
|
|
|
main(bucket,
|
|
remoteFolder,
|
|
remoteFilePrefix,
|
|
append_date,
|
|
date_format,
|
|
logger)
|
|
|
|
|
|
def main(bucket: str,
|
|
remoteFolder: str,
|
|
remoteFilePrefix: str,
|
|
append_date: bool,
|
|
date_format: str,
|
|
logger: logging.getLogger):
|
|
|
|
if append_date:
|
|
remoteFileDate = getFileDetails.generateDate(date_format)
|
|
else:
|
|
remoteFileDate = ''
|
|
|
|
remoteFolder = getFileDetails.formatFolder(remoteFolder)
|
|
remoteFiles = f'{remoteFolder}{remoteFilePrefix}{remoteFileDate}'
|
|
|
|
client = boto3.client('s3')
|
|
resource = boto3.resource('s3')
|
|
|
|
instance = getFileDetails(bucket,
|
|
remoteFiles,
|
|
client,
|
|
resource,
|
|
logger)
|
|
instance.getS3Files().sendToSplunk()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
warnings.filterwarnings("ignore")
|
|
_call()
|