Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/opengl/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python Lambda fn要获取s3文件,请使用它更改另一个s3文件,然后将其重写为s3_Python_Csv_Amazon S3_Aws Lambda - Fatal编程技术网

Python Lambda fn要获取s3文件,请使用它更改另一个s3文件,然后将其重写为s3

Python Lambda fn要获取s3文件,请使用它更改另一个s3文件,然后将其重写为s3,python,csv,amazon-s3,aws-lambda,Python,Csv,Amazon S3,Aws Lambda,这是一段python代码,我用它来使用参考文件pds\u ref操作文件table1 所以pds_ref看起来像这样: |THE_TABLE|THE_KEY |table1|3 |table1|1 表1是这样的 |ID|NAME |1|Imran |2|Peter |3|Pedro |4|Carlos 其思想是使用pds_ref中的引用来删除正在列出的任何表中的记录,并删除相应的键…在这种情况下,删除1和3 此python代码的工作原理与python相同 导入csv with open("p

这是一段python代码,我用它来使用参考文件pds\u ref操作文件table1 所以pds_ref看起来像这样:

|THE_TABLE|THE_KEY
|table1|3
|table1|1
表1是这样的

|ID|NAME
|1|Imran
|2|Peter
|3|Pedro
|4|Carlos
其思想是使用pds_ref中的引用来删除正在列出的任何表中的记录,并删除相应的键…在这种情况下,删除1和3

此python代码的工作原理与python相同 导入csv

with open("pds_ref","rb") as ref_file:
    refreader=csv.DictReader(ref_file, delimiter='|')
    reftable=[row for row in refreader]
    refheader = refreader.fieldnames    
    for refrow in reftable:
        print refrow['THE_TABLE']   
        print refrow['THE_KEY']
        with open(refrow['THE_TABLE'], "rbw") as infile:
                reader = csv.DictReader(infile, delimiter='|')
                table = [row for row in reader]
                header = reader.fieldnames 
        with open(refrow['THE_TABLE'], "wb") as outfile:
                writer = csv.DictWriter(outfile, header,delimiter='|')
                writer.writeheader()
                for row in table:
                    if row['ID'] != refrow['THE_KEY'] :
                        writer.writerow(row)
现在,我想使用lambda来实现这一点,这样每当有人上传pds_ref文件时,就会触发该函数

我尽可能地获取pds_ref文件并读取每一行,但在打开和写回修改后的table1文件时遇到了麻烦。谢谢你的帮助

import boto3
import csv
import io



def lambda_handler(event, context):
    s3 = boto3.client("s3")

    if event:
        print ("Event : ", event)
        file_obj = event["Records"][0]
        filename = str(file_obj['s3']['object']['key'])
        bucketname = str(file_obj['s3']['bucket']['name'])
        print("Filename: ",filename)
        print("Bucket: ",bucketname)
        fileObj = s3.get_object(Bucket= "lambda-trig1",Key=filename)
        print ("fileObj: ",fileObj)
        file_content = fileObj["Body"].read().decode('utf-8')
        print(file_content)

        f_pds_ref = s3.get_object(Bucket= "lambda-trig1",Key='pds_ref')
        fc_pds_ref = f_pds_ref['Body'].read().decode('utf-8').splitlines(True) 

        for refrow in csv.DictReader(fc_pds_ref,delimiter='|'):
            print refrow['THE_TABLE']
            print refrow['THE_KEY']
            current_table = refrow['THE_TABLE']
            current_key = refrow['THE_KEY']
            f_the_next_table = s3.get_object(Bucket= "lambda-trig1",Key=current_table)
            fc_the_next_table = f_the_next_table['Body'].read().decode('utf-8').splitlines(True) 
            with open(refrow[f_the_next_table], "rbw") as infile:
                reader = csv.DictReader(infile, delimiter='|')
            #   table = [row for row in reader]
            #   header = reader.fieldnames 
            #   print (header)

在运行更新其他表的过程之前,
您希望确保它仅为
Put
事件运行

在阅读
pds\u ref
之后,以下是对当前步骤的一些补充:

  • 通过
    表格对所有
    按键
    s进行分组

    这允许您执行唯一的迭代来更新表对象 而不是对同一个表对象中的内容使用多个

  • 对于每个
    组,
    读取表格对象并过滤掉\u键组中的行,
    将筛选的内容写入表对象

这可以通过以下方式实现

from contextlib import contextmanager
from csv import DictReader, DictWriter
from collections import defaultdict
import io

import boto3

s3 = boto3.client("s3")

BUCKET = "creeper-bank"
DELIMITER = "|"
TABLE_OBJECT_COLUMNS = ['', 'ID', 'NAME']
WATCH_KEY = "pds_ref"


def content_as_dict_reader(content):
    yield DictReader(
        content.splitlines(),
        delimiter=DELIMITER)

@contextmanager
def tables_and_lines_for_deletion():
    object_ = s3.get_object(
        Bucket=BUCKET, Key=WATCH_KEY
    )
    content = object_["Body"].read().decode('utf-8')
    return content_as_dict_reader(content)

@contextmanager
def table_record(table):
    object_ = s3.get_object(
        Bucket=BUCKET, Key=table
    )
    content = object_["Body"].read().decode('utf-8')
    return content_as_dict_reader(content)

def object_table(table, record):
    with io.StringIO() as file_:
        writer = DictWriter(
            file_,
            fieldnames=TABLE_OBJECT_COLUMNS,
            delimiter=DELIMITER
        )
        writer.writeheader()
        writer.writerows(list(record))

        s3.put_object(
            Bucket=BUCKET,
            Key=table,
            Body=file_.getvalue()
        )

def lambda_handler(event, context):
    if not event:
        print("Function must be triggered via a published event")
        return

    event_record, *_ = event["Records"]
    match_watchkey = True
    try:
        event_name = str(event_record['eventName'])
        if "Put" not in event_name:
            match_watchkey = False

        s3_event = event_record['s3']
        print("checking if S3 event is a put one for :WATCH_KEY")

        key = s3_event['object']['key']
        bucket = s3_event['bucket']['name']

        if key != WATCH_KEY:
            match_watchkey = False
        if bucket != BUCKET:
            match_watchkey = False
    except KeyError:
        # Handle when event_record isn't an S3 one.
        match_watchkey = False
    if not match_watchkey:
        print("Published event did not match :WATCH_KEY.")
        return

    print("S3 event is a put one for :WATCH_KEY!")

    table_group = defaultdict(list)

    print("Reading :WATCH_KEY content")
    with tables_and_lines_for_deletion() as tables:
        for dct in tables:
            table_k = dct['THE_TABLE']
            table_v = dct['THE_KEY']
            table_group[table_k].append(table_v)

    print("Updating objects found in :WATCH_KEY content")
    for t, ids in table_group.items():
        record_update = None
        with table_record(t) as record:
            record_update = (
                dct
                for dct in record
                if dct["ID"] not in ids
            )
        object_table(t, record_update)
    print("Update completed!")
    return
使用示例事件进行测试
您好,为什么您要在所有行(
|ID | NAME
)中将表|对象|列=['''ID',NAME']而不仅仅是'ID',NAME'.-您的表作为前导管道字符
)。当您解析为CSV时,总是会有一个空列,当它被
|
拆分时,它会变成
['''ID','NAME']
sample_event = {
    'Records': [
        {
            'eventName':  'ObjectCreated:Put',

            's3': {
                'bucket': {
                    'name': 'creeper-bank',
                },
                'object': {
                    'key': 'pds_ref',
                }
            },
        }
    ]
}
lambda_handler(sample_event, {})