import boto3 import logging import os import datetime from splunk_hec_handler import SplunkHecHandler import warnings import customLogLevel from socket import gaierror class getFileDetails(object): """Query an S3 bucket for information about files stored Required arguments: bucket: a string with the name of the bucket remoteFiles: a string containing the path and filename of the files client: a boto3 s3 client resource: a boto3 s3 resource Optional arguments: logger: a splunk HEC handler for the logging module """ def __init__(self, bucket: str, remoteFiles: str, client: boto3.client, resource: boto3.resource, logger: logging.getLogger = None ): super().__init__() self.bucket = bucket self.remoteFiles = remoteFiles self.client = client self.resource = resource self.logger = logger @staticmethod def formatFolder(remoteFolder: str): try: if remoteFolder[-1] != '/': remoteFolder = f'{remoteFolder}/' except IndexError: remoteFolder = '' return remoteFolder @staticmethod def generateDate(date_format='%Y-%m-%d', time: datetime.datetime = None ): """ Generates a human readable time string for a datetime.datetime object. By default will use today with %Y-%m-%d format """ if time is None: time = datetime.date.today() date = time.strftime(date_format) return date @staticmethod def getEpochTime(time: datetime.datetime): epoch = datetime.datetime.timestamp(time) return epoch def getS3Files(self): self.paginator = self.client.get_paginator('list_objects') self.iterator = self.paginator.paginate(Bucket=self.bucket, Prefix=self.remoteFiles) self.filtered = self.iterator.search('Contents[*]') self.fileDict = dict() counter = 0 for i in self.filtered: now = self.getEpochTime(datetime.datetime.now()) i.update({'_time': now}) i['LastModified'] = self.getEpochTime(i['LastModified']) self.fileDict[counter] = i counter += 1 return self def sendToSplunk(self): for i in self.fileDict.values(): try: self.logger.splunk(i) except AttributeError: raise Exception("No logger level exists for the custom Splunk" " level. Try creating a customLogLevel for" " splunk and try again.") return self def _call(): # Define env variables for AWS boto3 os.environ['AWS_PROFILE'] = 'netacea' os.environ['AWS_DEFAULT_REGION'] = 'eu-west-1' # Define bucket and file names bucket = 'td-ingest-storage-williamhill' remoteFolder = 'bot_predictions/' remoteFilePrefix = 'blocking_suggestions_' append_date = True date_format = '%Y-%m-%d' # Define splunk hec handler for logging try: splunk_handler = SplunkHecHandler('sc1uxpremn81.prod.williamhill.plc', 'ea641e31-870e-4f5f-965f' '-57e0c9a2aa3d', port=8088, proto='https', ssl_verify=False, sourcetype='httpevent') except gaierror: raise SystemExit logger = logging.getLogger('SplunkHecHandlerExample') logger.setLevel(logging.DEBUG) logger.addHandler(splunk_handler) customLogLevel.addLoggingLevel('splunk', 15, logging) main(bucket, remoteFolder, remoteFilePrefix, append_date, date_format, logger) def main(bucket: str, remoteFolder: str, remoteFilePrefix: str, append_date: bool, date_format: str, logger: logging.getLogger): if append_date: remoteFileDate = getFileDetails.generateDate(date_format) else: remoteFileDate = '' remoteFolder = getFileDetails.formatFolder(remoteFolder) remoteFiles = f'{remoteFolder}{remoteFilePrefix}{remoteFileDate}' client = boto3.client('s3') resource = boto3.resource('s3') instance = getFileDetails(bucket, remoteFiles, client, resource, logger) instance.getS3Files().sendToSplunk() if __name__ == '__main__': warnings.filterwarnings("ignore") _call()