Python Boto3 CloudFront对象使用计数_Python_Amazon Web Services_Boto3_Amazon Cloudfront

Python Boto3 CloudFront对象使用计数

python amazon-web-services

Python Boto3 CloudFront对象使用计数,python,amazon-web-services,boto3,amazon-cloudfront,Python,Amazon Web Services,Boto3,Amazon Cloudfront,我想计算一下CloudFront dist中所有对象被单独命中的次数，这样我就可以生成一个excel表来跟踪使用情况统计。我一直在查看CloudFront的boto3文档，但我无法确定在哪里可以访问这些信息。我看到AWS Cloudfront控制台生成了一个“流行对象”报告。我不确定是否有人知道如何在boto3中获取AWS为该报告生成的数字如果无法通过Boto3访问，是否应该使用AWS CLI命令更新：下面是我最后使用的伪代码，希望它是其他人的起点： import boto3 import

我想计算一下CloudFront dist中所有对象被单独命中的次数，这样我就可以生成一个excel表来跟踪使用情况统计。我一直在查看CloudFront的boto3文档，但我无法确定在哪里可以访问这些信息。我看到AWS Cloudfront控制台生成了一个“流行对象”报告。我不确定是否有人知道如何在boto3中获取AWS为该报告生成的数字

如果无法通过Boto3访问，是否应该使用AWS CLI命令

更新：

下面是我最后使用的伪代码，希望它是其他人的起点：

import boto3
import gzip
from datetime import datetime, date, timedelta
import shutil
from xlwt import Workbook

def analyze(timeInterval):
    """
    analyze usage data in cloudfront
    :param domain:
    :param id:
    :param password:
    :return: usage data
    """
    outputList = []
    outputDict = {}

    s3 = boto3.resource('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=PASSWORD)
    data = s3.Bucket(AWS_STORAGE_BUCKET_NAME)
    count = 0
    currentDatetime = str(datetime.now()).split(' ')
    currentDatetime = currentDatetime[0].split('-')
    currentdatetimeYear = int(currentDatetime[0])
    currentdatetimeMonth = int(currentDatetime[1])
    currentdatetimeDay = int(currentDatetime[2])
    currentDatetime = date(year=currentdatetimeYear, month=currentdatetimeMonth, day=currentdatetimeDay)

    # create excel workbook/sheet that we'll save results to
    wb = Workbook()
    sheet1 = wb.add_sheet('Log Results By URL')
    sheet1.write(0, 1, 'File')
    sheet1.write(0, 2, 'Total Hit Count')
    sheet1.write(0, 3, 'Total Byte Count')

    for item in data.objects.all():
        count += 1
        # print(count, '\n', item)
        # print(item.key)
        datetimeRef = str(item.key).replace(CLOUDFRONT_IDENTIFIER+'.', '')
        datetimeRef = datetimeRef.split('.')
        datetimeRef = datetimeRef[0]
        datetimeRef = str(datetimeRef[:-3]).split('-')
        datetimeRefYear = int(datetimeRef[0])
        datetimeRefMonth = int(datetimeRef[1])
        datetimeRefDay = int(datetimeRef[2])
        datetimeRef = date(year=datetimeRefYear, month=datetimeRefMonth, day=datetimeRefDay)
        # print('comparing', datetimeRef - timedelta(days=1), currentDatetime)
        if timeInterval == 'daily':
            if datetimeRef > currentDatetime - timedelta(days=1):
                pass
            else:
                # file not within datetime restrictions, don't do stuff
                continue
        elif timeInterval == 'weekly':
            if datetimeRef > currentDatetime - timedelta(days=7):
                pass
            else:
                # file not within datetime restrictions, don't do stuff
                continue
        elif timeInterval == 'monthly':
            if datetimeRef > currentDatetime - timedelta(weeks=4):
                pass
            else:
                # file not within datetime restrictions, don't do stuff
                continue
        elif timeInterval == 'yearly':
            if datetimeRef > currentDatetime - timedelta(weeks=52):
                pass
            else:
                # file not within datetime restrictions, don't do stuff
                continue
        print('datetimeRef', datetimeRef)
        print('currentDatetime', currentDatetime)
        print('Analyzing File:', item.key)

        # download the file
        s3.Bucket(AWS_STORAGE_BUCKET_NAME).download_file(item.key, 'logFile.gz')

        # unzip the file
        with gzip.open('logFile.gz', 'rb') as f_in:
            with open('logFile.txt', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

        # read the text file and add contents to a list
        with open('logFile.txt', 'r') as f:
            lines = f.readlines()
            localcount = -1
            for line in lines:
                localcount += 1
                if localcount < 2:
                    continue
                else:
                    outputList.append(line)

        # print(outputList)
        # iterate through the data collecting hit counts and byte size
        for dataline in outputList:
            data = dataline.split('\t')
            # print(data)
            if outputDict.get(data[7]) is None:
                outputDict[data[7]] = {'count': 1, 'byteCount': int(data[3])}
            else:
                td = outputDict[data[7]]
                outputDict[data[7]] = {'count': int(td['count']) + 1, 'byteCount': int(td['byteCount']) + int(data[3])}

    # print(outputDict)
    #  iterate through the result dictionary and write to the excel sheet
    outputDictKeys = outputDict.keys()
    count = 1
    for outputDictKey in outputDictKeys:
        sheet1.write(count, 1, str(outputDictKey))
        sheet1.write(count, 2, outputDict[outputDictKey]['count'])
        sheet1.write(count, 3, outputDict[outputDictKey]['byteCount'])
        count += 1
    safeDateTime = str(datetime.now()).replace(':', '.')

    # save the workbook
    wb.save(str(timeInterval)+str('_Log_Result_'+str(safeDateTime)) + '.xls')


if __name__ == '__main__':
    analyze('daily')

导入boto3
导入gzip
从datetime导入datetime、date、timedelta
进口舒蒂尔
从xlwt导入工作簿
def分析（时间间隔）：
"""
在cloudfront中分析使用情况数据
：参数域：
：参数id:
：param密码：
：return：使用情况数据
"""
outputList=[]
outputDict={}
s3=boto3.resource（'s3'，aws\u access\u key\u id=aws\u access\u key\u id，aws\u secret\u access\u key=PASSWORD）
数据=s3.Bucket（AWS\u存储\u Bucket\u名称）
计数=0
currentDatetime=str（datetime.now（））.split（“”）
currentDatetime=currentDatetime[0]。拆分（“-”）
currentdatetimeYear=int（currentDatetime[0]）
currentdatetimeMonth=int（currentDatetime[1]）
currentdatetimeDay=int（currentDatetime[2]）
currentDatetime=date（年=currentdatetimeYear，月=currentdatetimeMonth，日=currentdatetimeDay）
#创建将结果保存到的excel工作簿/工作表
wb=工作簿（）
sheet1=wb.add_sheet（'按URL记录结果'）
sheet1.写入（0，1，‘文件’）
表1.写入（0，2，‘总点击次数’）
表1.写入（0，3，‘总字节计数’）
对于data.objects.all（）中的项：
计数+=1
#打印（计数，'\n'，项目）
#打印（item.key）
datetimeRef=str（item.key）.replace（CLOUDFRONT_标识符+'，''）
datetimeRef=datetimeRef.split（'.'））
datetimeRef=datetimeRef[0]
datetimeRef=str（datetimeRef[：-3]）。拆分（'-'））
datetimeRefYear=int（datetimeRef[0]）
datetimeRefMonth=int（datetimeRef[1]）
datetimeRefDay=int（datetimeRef[2]）
datetimeRef=date（年=datetimeRefYear，月=datetimeRefMonth，日=datetimeRefDay）
#打印（'comparing'，datetimeRef-timedelta（天数=1），currentDatetime）
如果时间间隔==“每日”：
如果datetimeRef>currentDatetime-timedelta（天数=1）：
通过
其他：
#文件不在日期时间限制内，请勿执行任何操作
持续
elif时间间隔==“每周”：
如果datetimeRef>currentDatetime-timedelta（天数=7）：
通过
其他：
#文件不在日期时间限制内，请勿执行任何操作
持续
elif时间间隔==“每月”：
如果datetimeRef>currentDatetime-timedelta（周=4）：
通过
其他：
#文件不在日期时间限制内，请勿执行任何操作
持续
elif时间间隔==“每年”：
如果datetimeRef>currentDatetime-timedelta（周=52）：
通过
其他：
#文件不在日期时间限制内，请勿执行任何操作
持续
打印（'datetimeRef'，datetimeRef）
打印（'currentDatetime'，currentDatetime）
打印（'分析文件：'，item.key）
#下载该文件
s3.Bucket（AWS_存储_Bucket_名称）。下载_文件（item.key，'logFile.gz'）
#解压缩文件
将gzip.open（'logFile.gz'，'rb'）作为f_输入：
以open（'logFile.txt'，'wb'）作为f_输出：
shutil.copyfileobj（f_-in，f_-out）
#读取文本文件并将内容添加到列表中
以open（'logFile.txt'，'r'）作为f：
行=f.读行（）
localcount=-1
对于行中的行：
localcount+=1
如果localcount<2：
持续
其他：
outputList.append（行）
#打印（输出列表）
#遍历数据收集命中计数和字节大小
对于outputList中的数据行：
data=dataline.split（'\t'）
#打印（数据）
如果outputDict.get（数据[7]）为无：
outputDict[data[7]={'count'：1，'byteCount'：int（data[3]）}
其他：
td=输出DICT[数据[7]]
outputDict[data[7]={'count'：int（td['count']）+1，'byteCount'：int（td['byteCount']）+int（data[3]）
#打印（outputDict）
#遍历结果字典并写入excel工作表
outputDictKeys=outputDict.keys（）
计数=1
对于outputDictKey中的outputDictKey：
表1.写入（计数，1，str（outputDictKey））
sheet1.写入（计数，2，outputDict[outputDictKey]['count']）
sheet1.写入（计数，3，outputDict[outputDictKey]['byteCount']）
计数+=1
safeDateTime=str（datetime.now（））.replace（“：”，“.”）
#保存工作簿
wb.save（str（时间间隔）+str（日志结果）+str（安全日期时间））+'.xls'）
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu'：
分析（“每日”）

来自：

您可以将CloudFront配置为创建日志文件，其中包含CloudFront收到的每个用户请求的详细信息。这些称为标准日志，也称为访问日志。这些标准日志可用于web和RTMP发行版。如果启用标准日志，还可以指定希望CloudFront保存文件的AmazonS3存储桶

日志文件可能相当大，但您可以。

因此，我最后要做的是使用boto3获取并分析上述标准日志！谢谢你的帮助，约翰！