Python Lambda fn要获取s3文件,请使用它更改另一个s3文件,然后将其重写为s3
这是一段python代码,我用它来使用参考文件pds\u ref操作文件table1 所以pds_ref看起来像这样:Python Lambda fn要获取s3文件,请使用它更改另一个s3文件,然后将其重写为s3,python,csv,amazon-s3,aws-lambda,Python,Csv,Amazon S3,Aws Lambda,这是一段python代码,我用它来使用参考文件pds\u ref操作文件table1 所以pds_ref看起来像这样: |THE_TABLE|THE_KEY |table1|3 |table1|1 表1是这样的 |ID|NAME |1|Imran |2|Peter |3|Pedro |4|Carlos 其思想是使用pds_ref中的引用来删除正在列出的任何表中的记录,并删除相应的键…在这种情况下,删除1和3 此python代码的工作原理与python相同 导入csv with open("p
|THE_TABLE|THE_KEY
|table1|3
|table1|1
表1是这样的
|ID|NAME
|1|Imran
|2|Peter
|3|Pedro
|4|Carlos
其思想是使用pds_ref中的引用来删除正在列出的任何表中的记录,并删除相应的键…在这种情况下,删除1和3
此python代码的工作原理与python相同
导入csv
with open("pds_ref","rb") as ref_file:
refreader=csv.DictReader(ref_file, delimiter='|')
reftable=[row for row in refreader]
refheader = refreader.fieldnames
for refrow in reftable:
print refrow['THE_TABLE']
print refrow['THE_KEY']
with open(refrow['THE_TABLE'], "rbw") as infile:
reader = csv.DictReader(infile, delimiter='|')
table = [row for row in reader]
header = reader.fieldnames
with open(refrow['THE_TABLE'], "wb") as outfile:
writer = csv.DictWriter(outfile, header,delimiter='|')
writer.writeheader()
for row in table:
if row['ID'] != refrow['THE_KEY'] :
writer.writerow(row)
现在,我想使用lambda来实现这一点,这样每当有人上传pds_ref文件时,就会触发该函数
我尽可能地获取pds_ref文件并读取每一行,但在打开和写回修改后的table1文件时遇到了麻烦。谢谢你的帮助
import boto3
import csv
import io
def lambda_handler(event, context):
s3 = boto3.client("s3")
if event:
print ("Event : ", event)
file_obj = event["Records"][0]
filename = str(file_obj['s3']['object']['key'])
bucketname = str(file_obj['s3']['bucket']['name'])
print("Filename: ",filename)
print("Bucket: ",bucketname)
fileObj = s3.get_object(Bucket= "lambda-trig1",Key=filename)
print ("fileObj: ",fileObj)
file_content = fileObj["Body"].read().decode('utf-8')
print(file_content)
f_pds_ref = s3.get_object(Bucket= "lambda-trig1",Key='pds_ref')
fc_pds_ref = f_pds_ref['Body'].read().decode('utf-8').splitlines(True)
for refrow in csv.DictReader(fc_pds_ref,delimiter='|'):
print refrow['THE_TABLE']
print refrow['THE_KEY']
current_table = refrow['THE_TABLE']
current_key = refrow['THE_KEY']
f_the_next_table = s3.get_object(Bucket= "lambda-trig1",Key=current_table)
fc_the_next_table = f_the_next_table['Body'].read().decode('utf-8').splitlines(True)
with open(refrow[f_the_next_table], "rbw") as infile:
reader = csv.DictReader(infile, delimiter='|')
# table = [row for row in reader]
# header = reader.fieldnames
# print (header)
在运行更新其他表的过程之前,
您希望确保它仅为
Put
事件运行
在阅读pds\u ref
之后,以下是对当前步骤的一些补充:
- 通过
这允许您执行唯一的迭代来更新表对象 而不是对同一个表对象中的内容使用多个表格对所有
s进行分组按键
- 对于每个
表
组,
读取表格对象并过滤掉\u键组中的行,
将筛选的内容写入表对象
from contextlib import contextmanager
from csv import DictReader, DictWriter
from collections import defaultdict
import io
import boto3
s3 = boto3.client("s3")
BUCKET = "creeper-bank"
DELIMITER = "|"
TABLE_OBJECT_COLUMNS = ['', 'ID', 'NAME']
WATCH_KEY = "pds_ref"
def content_as_dict_reader(content):
yield DictReader(
content.splitlines(),
delimiter=DELIMITER)
@contextmanager
def tables_and_lines_for_deletion():
object_ = s3.get_object(
Bucket=BUCKET, Key=WATCH_KEY
)
content = object_["Body"].read().decode('utf-8')
return content_as_dict_reader(content)
@contextmanager
def table_record(table):
object_ = s3.get_object(
Bucket=BUCKET, Key=table
)
content = object_["Body"].read().decode('utf-8')
return content_as_dict_reader(content)
def object_table(table, record):
with io.StringIO() as file_:
writer = DictWriter(
file_,
fieldnames=TABLE_OBJECT_COLUMNS,
delimiter=DELIMITER
)
writer.writeheader()
writer.writerows(list(record))
s3.put_object(
Bucket=BUCKET,
Key=table,
Body=file_.getvalue()
)
def lambda_handler(event, context):
if not event:
print("Function must be triggered via a published event")
return
event_record, *_ = event["Records"]
match_watchkey = True
try:
event_name = str(event_record['eventName'])
if "Put" not in event_name:
match_watchkey = False
s3_event = event_record['s3']
print("checking if S3 event is a put one for :WATCH_KEY")
key = s3_event['object']['key']
bucket = s3_event['bucket']['name']
if key != WATCH_KEY:
match_watchkey = False
if bucket != BUCKET:
match_watchkey = False
except KeyError:
# Handle when event_record isn't an S3 one.
match_watchkey = False
if not match_watchkey:
print("Published event did not match :WATCH_KEY.")
return
print("S3 event is a put one for :WATCH_KEY!")
table_group = defaultdict(list)
print("Reading :WATCH_KEY content")
with tables_and_lines_for_deletion() as tables:
for dct in tables:
table_k = dct['THE_TABLE']
table_v = dct['THE_KEY']
table_group[table_k].append(table_v)
print("Updating objects found in :WATCH_KEY content")
for t, ids in table_group.items():
record_update = None
with table_record(t) as record:
record_update = (
dct
for dct in record
if dct["ID"] not in ids
)
object_table(t, record_update)
print("Update completed!")
return
使用示例事件进行测试
您好,为什么您要在所有行(
|ID | NAME
)中将表|对象|列=['''ID',NAME']而不仅仅是'ID',NAME'.-您的表作为前导管道字符
)。当您解析为CSV时,总是会有一个空列,当它被|
拆分时,它会变成['''ID','NAME']
sample_event = {
'Records': [
{
'eventName': 'ObjectCreated:Put',
's3': {
'bucket': {
'name': 'creeper-bank',
},
'object': {
'key': 'pds_ref',
}
},
}
]
}
lambda_handler(sample_event, {})