Python 如何通过Lambda函数将存储在S3中的文件(如Excel文件)复制到Google Drive中?

Python 如何通过Lambda函数将存储在S3中的文件(如Excel文件)复制到Google Drive中?,python,amazon-web-services,amazon-s3,google-drive-api,Python,Amazon Web Services,Amazon S3,Google Drive Api,我花了两天的时间研究这个问题,在网上找不到直接的答案,所以我认为现在我已经解决了这个问题,在这里进行问答是明智的 本质上,我在AWS Lambda中运行Python代码,该代码收集一组数据,对其进行处理,并生成一个包含团队所需信息的Excel文件。我需要把这个文件推到GoogleDrive(一个共享文件夹),这样团队成员都可以看到这些信息 问题是,我试图使用谷歌API中的MediaFileUpload来实现这一点。此方法将文件名作为字符串输入,如下所示(从第三行到最后一行): 嗯,这对我计算机上

我花了两天的时间研究这个问题,在网上找不到直接的答案,所以我认为现在我已经解决了这个问题,在这里进行问答是明智的

本质上,我在AWS Lambda中运行Python代码,该代码收集一组数据,对其进行处理,并生成一个包含团队所需信息的Excel文件。我需要把这个文件推到GoogleDrive(一个共享文件夹),这样团队成员都可以看到这些信息

问题是,我试图使用谷歌API中的MediaFileUpload来实现这一点。此方法将文件名作为字符串输入,如下所示(从第三行到最后一行):


嗯,这对我计算机上的本地文件很好,但是对于s3文件我该怎么做呢?我无法将“s3://mybucket/my_file.xlsx”传递给MediaFileUpload()方法。

为了解决这个问题,我只需要使用Google Drive API中的MediaIoBaseUpload()方法。诀窍是从s3文件中读取xlsx文件内容,将其拉入BytesIO,然后将其推入GoogleDrive MediaIoBaseUpload方法。然后一切都成功了

import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request as gRequest
from apiclient.http import MediaFileUpload, BytesIO, MediaIoBaseUpload


def upload_file_to_gdrive(folder_id, filename, CRED_FOLDER, S3_FOLDER):
    s3 = s3fs.S3FileSystem(anon=False,key=<AWS_KEY>,secret=<AWS_SECRET>)
    
    # If modifying these scopes, delete the file token.pickle.
    SCOPES = ['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
                  'https://www.googleapis.com/auth/drive.appdata']
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists(CRED_FOLDER+'token.pickle'):
        with s3.open(CRED_FOLDER+'token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        print('creds didnt exist or were invalid')
        if creds and creds.expired and creds.refresh_token:
            print('creds were expired')
            creds.refresh(gRequest())
        else:
            with s3.open(CRED_FOLDER+'credentials.json', 'rb') as f:
                flow = InstalledAppFlow.from_client_secrets_file(f, SCOPES)
                creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with s3.open(CRED_FOLDER+'token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('drive', 'v3', credentials=creds, cache_discovery=False)
    ### filename is still like '2020-07-02_site_ads_txt.xlsx'
    ### but S3_FOLDER is a valid s3 bucket path
    with s3.open(S3_FOLDER+filename, 'rb') as f:
        fbytes = BytesIO(f.read())
        media = MediaIoBaseUpload(fbytes, mimetype='application/vnd.google-apps.spreadsheet')
        upload_metadata = {'name': filename, 'parents': [folder_id], 'mimeType': 'application/vnd.google-apps.spreadsheet'}
        return service.files().create(body=upload_metadata, media_body=media, fields='id').execute()
导入pickle
导入操作系统路径
从GoogleAppClient.discovery导入生成
从google_auth_oauthlib.flow导入安装的应用程序流
从google.auth.transport.requests导入请求作为gRequest
从apiclient.http导入MediaFileUpload、BytesIO、MediaIoBaseUpload
def upload_file_至_gdrive(文件夹id、文件名、CRED_文件夹、S3_文件夹):
s3=s3fs.S3FileSystem(anon=False,key=,secret=)
#如果修改这些作用域,请删除文件token.pickle。
作用域=['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
信誉=无
#文件token.pickle存储用户的访问和刷新令牌,并且
#在第一次完成授权流时自动创建
#时间。
如果os.path.exists(CRED_文件夹+'token.pickle'):
使用s3.open(CRED_文件夹+'token.pickle','rb')作为令牌:
creds=pickle.load(令牌)
#如果没有可用的(有效)凭据,请让用户登录。
如果没有信用或信用无效:
打印('凭证不存在或无效')
如果creds和creds.expired和creds.refresh\u令牌:
打印('信用证已过期')
creds.refresh(gRequest())
其他:
使用s3.open(CRED_文件夹+'credentials.json','rb')作为f:
flow=InstalledAppFlow.from_client_secrets_文件(f,作用域)
creds=flow.运行本地服务器(端口=0)
#保存下一次运行的凭据
使用s3.open(CRED_文件夹+'token.pickle','wb')作为令牌:
pickle.dump(信用卡、代币)
服务=生成('drive','v3',凭据=凭据,缓存\u discovery=False)
###文件名仍然类似于“2020-07-02_site_ads_txt.xlsx”
###但是S3_文件夹是一个有效的S3存储桶路径
使用s3.open(s3_文件夹+文件名'rb')作为f:
fbytes=BytesIO(f.read())
media=mediaobaseupload(fbytes,mimetype='application/vnd.google apps.spreadsheet')
上传元数据={'name':文件名,'parents':[folder_id],'mimeType':'application/vnd.google apps.spreadsheet'}
return service.files().create(body=upload\u metadata,media\u body=media,fields='id')。execute()

为了解决这个问题,我只需要使用Google Drive API中的MediaIoBaseUpload()方法。诀窍是从s3文件中读取xlsx文件内容,将其拉入BytesIO,然后将其推入GoogleDrive MediaIoBaseUpload方法。然后一切都成功了

import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request as gRequest
from apiclient.http import MediaFileUpload, BytesIO, MediaIoBaseUpload


def upload_file_to_gdrive(folder_id, filename, CRED_FOLDER, S3_FOLDER):
    s3 = s3fs.S3FileSystem(anon=False,key=<AWS_KEY>,secret=<AWS_SECRET>)
    
    # If modifying these scopes, delete the file token.pickle.
    SCOPES = ['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
                  'https://www.googleapis.com/auth/drive.appdata']
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists(CRED_FOLDER+'token.pickle'):
        with s3.open(CRED_FOLDER+'token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        print('creds didnt exist or were invalid')
        if creds and creds.expired and creds.refresh_token:
            print('creds were expired')
            creds.refresh(gRequest())
        else:
            with s3.open(CRED_FOLDER+'credentials.json', 'rb') as f:
                flow = InstalledAppFlow.from_client_secrets_file(f, SCOPES)
                creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with s3.open(CRED_FOLDER+'token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('drive', 'v3', credentials=creds, cache_discovery=False)
    ### filename is still like '2020-07-02_site_ads_txt.xlsx'
    ### but S3_FOLDER is a valid s3 bucket path
    with s3.open(S3_FOLDER+filename, 'rb') as f:
        fbytes = BytesIO(f.read())
        media = MediaIoBaseUpload(fbytes, mimetype='application/vnd.google-apps.spreadsheet')
        upload_metadata = {'name': filename, 'parents': [folder_id], 'mimeType': 'application/vnd.google-apps.spreadsheet'}
        return service.files().create(body=upload_metadata, media_body=media, fields='id').execute()
导入pickle
导入操作系统路径
从GoogleAppClient.discovery导入生成
从google_auth_oauthlib.flow导入安装的应用程序流
从google.auth.transport.requests导入请求作为gRequest
从apiclient.http导入MediaFileUpload、BytesIO、MediaIoBaseUpload
def upload_file_至_gdrive(文件夹id、文件名、CRED_文件夹、S3_文件夹):
s3=s3fs.S3FileSystem(anon=False,key=,secret=)
#如果修改这些作用域,请删除文件token.pickle。
作用域=['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
信誉=无
#文件token.pickle存储用户的访问和刷新令牌,并且
#在第一次完成授权流时自动创建
#时间。
如果os.path.exists(CRED_文件夹+'token.pickle'):
使用s3.open(CRED_文件夹+'token.pickle','rb')作为令牌:
creds=pickle.load(令牌)
#如果没有可用的(有效)凭据,请让用户登录。
如果没有信用或信用无效:
打印('凭证不存在或无效')
如果creds和creds.expired和creds.refresh\u令牌:
打印('信用证已过期')
creds.refresh(gRequest())
其他:
使用s3.open(CRED_文件夹+'credentials.json','rb')作为f:
flow=InstalledAppFlow.from_client_secrets_文件(f,作用域)
creds=flow.运行本地服务器(端口=0)
#保存下一次运行的凭据
使用s3.open(CRED_文件夹+'token.pickle','wb')作为令牌:
pickle.dump(信用卡、代币)
服务=生成('drive','v3',凭据=凭据,缓存\u discovery=False)
###文件名仍然类似于“2020-07-02_site_ads_txt.xlsx”
###但是S3_文件夹是一个有效的S3存储桶路径
使用s3.open(s3_文件夹+文件名'rb')作为f:
fbytes=BytesIO(f.read())
media=mediaobaseupload(fbytes,mimetype='application/vnd.google apps.spreadsheet')
上传元数据={'name':文件名,'parents':[folder_id],'mimeType':'application/vnd.google apps.spreadsheet'}
return service.files().create(body=upload\u metadata,media\u body=media,fields='id')。execute()