Python 如何通过Lambda函数将存储在S3中的文件(如Excel文件)复制到Google Drive中?
我花了两天的时间研究这个问题,在网上找不到直接的答案,所以我认为现在我已经解决了这个问题,在这里进行问答是明智的 本质上,我在AWS Lambda中运行Python代码,该代码收集一组数据,对其进行处理,并生成一个包含团队所需信息的Excel文件。我需要把这个文件推到GoogleDrive(一个共享文件夹),这样团队成员都可以看到这些信息 问题是,我试图使用谷歌API中的MediaFileUpload来实现这一点。此方法将文件名作为字符串输入,如下所示(从第三行到最后一行):Python 如何通过Lambda函数将存储在S3中的文件(如Excel文件)复制到Google Drive中?,python,amazon-web-services,amazon-s3,google-drive-api,Python,Amazon Web Services,Amazon S3,Google Drive Api,我花了两天的时间研究这个问题,在网上找不到直接的答案,所以我认为现在我已经解决了这个问题,在这里进行问答是明智的 本质上,我在AWS Lambda中运行Python代码,该代码收集一组数据,对其进行处理,并生成一个包含团队所需信息的Excel文件。我需要把这个文件推到GoogleDrive(一个共享文件夹),这样团队成员都可以看到这些信息 问题是,我试图使用谷歌API中的MediaFileUpload来实现这一点。此方法将文件名作为字符串输入,如下所示(从第三行到最后一行): 嗯,这对我计算机上
嗯,这对我计算机上的本地文件很好,但是对于s3文件我该怎么做呢?我无法将“s3://mybucket/my_file.xlsx”传递给MediaFileUpload()方法。为了解决这个问题,我只需要使用Google Drive API中的MediaIoBaseUpload()方法。诀窍是从s3文件中读取xlsx文件内容,将其拉入BytesIO,然后将其推入GoogleDrive MediaIoBaseUpload方法。然后一切都成功了
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request as gRequest
from apiclient.http import MediaFileUpload, BytesIO, MediaIoBaseUpload
def upload_file_to_gdrive(folder_id, filename, CRED_FOLDER, S3_FOLDER):
s3 = s3fs.S3FileSystem(anon=False,key=<AWS_KEY>,secret=<AWS_SECRET>)
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(CRED_FOLDER+'token.pickle'):
with s3.open(CRED_FOLDER+'token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
print('creds didnt exist or were invalid')
if creds and creds.expired and creds.refresh_token:
print('creds were expired')
creds.refresh(gRequest())
else:
with s3.open(CRED_FOLDER+'credentials.json', 'rb') as f:
flow = InstalledAppFlow.from_client_secrets_file(f, SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with s3.open(CRED_FOLDER+'token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds, cache_discovery=False)
### filename is still like '2020-07-02_site_ads_txt.xlsx'
### but S3_FOLDER is a valid s3 bucket path
with s3.open(S3_FOLDER+filename, 'rb') as f:
fbytes = BytesIO(f.read())
media = MediaIoBaseUpload(fbytes, mimetype='application/vnd.google-apps.spreadsheet')
upload_metadata = {'name': filename, 'parents': [folder_id], 'mimeType': 'application/vnd.google-apps.spreadsheet'}
return service.files().create(body=upload_metadata, media_body=media, fields='id').execute()
导入pickle
导入操作系统路径
从GoogleAppClient.discovery导入生成
从google_auth_oauthlib.flow导入安装的应用程序流
从google.auth.transport.requests导入请求作为gRequest
从apiclient.http导入MediaFileUpload、BytesIO、MediaIoBaseUpload
def upload_file_至_gdrive(文件夹id、文件名、CRED_文件夹、S3_文件夹):
s3=s3fs.S3FileSystem(anon=False,key=,secret=)
#如果修改这些作用域,请删除文件token.pickle。
作用域=['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
信誉=无
#文件token.pickle存储用户的访问和刷新令牌,并且
#在第一次完成授权流时自动创建
#时间。
如果os.path.exists(CRED_文件夹+'token.pickle'):
使用s3.open(CRED_文件夹+'token.pickle','rb')作为令牌:
creds=pickle.load(令牌)
#如果没有可用的(有效)凭据,请让用户登录。
如果没有信用或信用无效:
打印('凭证不存在或无效')
如果creds和creds.expired和creds.refresh\u令牌:
打印('信用证已过期')
creds.refresh(gRequest())
其他:
使用s3.open(CRED_文件夹+'credentials.json','rb')作为f:
flow=InstalledAppFlow.from_client_secrets_文件(f,作用域)
creds=flow.运行本地服务器(端口=0)
#保存下一次运行的凭据
使用s3.open(CRED_文件夹+'token.pickle','wb')作为令牌:
pickle.dump(信用卡、代币)
服务=生成('drive','v3',凭据=凭据,缓存\u discovery=False)
###文件名仍然类似于“2020-07-02_site_ads_txt.xlsx”
###但是S3_文件夹是一个有效的S3存储桶路径
使用s3.open(s3_文件夹+文件名'rb')作为f:
fbytes=BytesIO(f.read())
media=mediaobaseupload(fbytes,mimetype='application/vnd.google apps.spreadsheet')
上传元数据={'name':文件名,'parents':[folder_id],'mimeType':'application/vnd.google apps.spreadsheet'}
return service.files().create(body=upload\u metadata,media\u body=media,fields='id')。execute()
为了解决这个问题,我只需要使用Google Drive API中的MediaIoBaseUpload()方法。诀窍是从s3文件中读取xlsx文件内容,将其拉入BytesIO,然后将其推入GoogleDrive MediaIoBaseUpload方法。然后一切都成功了
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request as gRequest
from apiclient.http import MediaFileUpload, BytesIO, MediaIoBaseUpload
def upload_file_to_gdrive(folder_id, filename, CRED_FOLDER, S3_FOLDER):
s3 = s3fs.S3FileSystem(anon=False,key=<AWS_KEY>,secret=<AWS_SECRET>)
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(CRED_FOLDER+'token.pickle'):
with s3.open(CRED_FOLDER+'token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
print('creds didnt exist or were invalid')
if creds and creds.expired and creds.refresh_token:
print('creds were expired')
creds.refresh(gRequest())
else:
with s3.open(CRED_FOLDER+'credentials.json', 'rb') as f:
flow = InstalledAppFlow.from_client_secrets_file(f, SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with s3.open(CRED_FOLDER+'token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds, cache_discovery=False)
### filename is still like '2020-07-02_site_ads_txt.xlsx'
### but S3_FOLDER is a valid s3 bucket path
with s3.open(S3_FOLDER+filename, 'rb') as f:
fbytes = BytesIO(f.read())
media = MediaIoBaseUpload(fbytes, mimetype='application/vnd.google-apps.spreadsheet')
upload_metadata = {'name': filename, 'parents': [folder_id], 'mimeType': 'application/vnd.google-apps.spreadsheet'}
return service.files().create(body=upload_metadata, media_body=media, fields='id').execute()
导入pickle
导入操作系统路径
从GoogleAppClient.discovery导入生成
从google_auth_oauthlib.flow导入安装的应用程序流
从google.auth.transport.requests导入请求作为gRequest
从apiclient.http导入MediaFileUpload、BytesIO、MediaIoBaseUpload
def upload_file_至_gdrive(文件夹id、文件名、CRED_文件夹、S3_文件夹):
s3=s3fs.S3FileSystem(anon=False,key=,secret=)
#如果修改这些作用域,请删除文件token.pickle。
作用域=['https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.appdata']
信誉=无
#文件token.pickle存储用户的访问和刷新令牌,并且
#在第一次完成授权流时自动创建
#时间。
如果os.path.exists(CRED_文件夹+'token.pickle'):
使用s3.open(CRED_文件夹+'token.pickle','rb')作为令牌:
creds=pickle.load(令牌)
#如果没有可用的(有效)凭据,请让用户登录。
如果没有信用或信用无效:
打印('凭证不存在或无效')
如果creds和creds.expired和creds.refresh\u令牌:
打印('信用证已过期')
creds.refresh(gRequest())
其他:
使用s3.open(CRED_文件夹+'credentials.json','rb')作为f:
flow=InstalledAppFlow.from_client_secrets_文件(f,作用域)
creds=flow.运行本地服务器(端口=0)
#保存下一次运行的凭据
使用s3.open(CRED_文件夹+'token.pickle','wb')作为令牌:
pickle.dump(信用卡、代币)
服务=生成('drive','v3',凭据=凭据,缓存\u discovery=False)
###文件名仍然类似于“2020-07-02_site_ads_txt.xlsx”
###但是S3_文件夹是一个有效的S3存储桶路径
使用s3.open(s3_文件夹+文件名'rb')作为f:
fbytes=BytesIO(f.read())
media=mediaobaseupload(fbytes,mimetype='application/vnd.google apps.spreadsheet')
上传元数据={'name':文件名,'parents':[folder_id],'mimeType':'application/vnd.google apps.spreadsheet'}
return service.files().create(body=upload\u metadata,media\u body=media,fields='id')。execute()