os.environ变量的Python 3.6 Lambda代码错误
lambda上的这部分代码失败,出现错误:os.environ变量的Python 3.6 Lambda代码错误,python,python-3.x,aws-lambda,Python,Python 3.x,Aws Lambda,lambda上的这部分代码失败,出现错误: try : sf = Salesforce(username = sfdc_username, password = sfdc_password, security_token = sfdc_security_token,
try :
sf = Salesforce(username = sfdc_username,
password = sfdc_password,
security_token = sfdc_security_token,
instance_url = sfdc_salesforce_instance_url,
domain = sfdc_sandbox)
print('salesforce login good')
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'])
sys.exit(1)
当我将此代码移动到EC2 amazon linux上的测试环境中,并将sfdc_sandox设置为“test”时,它不会出现任何问题。我尝试使用os.environb[“L_-SFDC\u-SANDBOX”]和os.environo[“L_-SFDC\u-SANDBOX”].encode('utf8'),但也没有帮助,因为它会产生相同的错误。在Lambda中引入此变量时,如何修复类型错误
这是整个脚本,可能错误不是因为那段特定的代码,尽管看起来是这样
a bytes-like object is required, not 'str': TypeError
Traceback (most recent call last):
File "/var/task/sfdc_etl/bin/sfdc_etl.py", line 80, in lambda_handler
domain = sfdc_sandbox)
File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/api.py", line 146, in __init__
domain=self.domain)
File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/login.py", line 80, in SalesforceLogin
username = escape(username)
File "/var/lang/lib/python3.6/html/__init__.py", line 19, in escape
s = s.replace("&", "&") # Must be done first!
TypeError: a bytes-like object is required, not 'str'
问题是下面这行
import os
import sys
# this adds the parent directory of bin so we can find the module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python2.7/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from awsmgr.awsmgr import S3Helper
from base64 import b64decode
import boto3
from collections import OrderedDict
import datetime
from dateutil.parser import parse
import logging
import json
import math
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
from simple_salesforce.exceptions import SalesforceGeneralError, SalesforceMoreThanOneRecord, SalesforceMalformedRequest, SalesforceExpiredSession, SalesforceRefusedRequest, SalesforceResourceNotFound
from sqlalchemy import create_engine
from sqlalchemy import exc
current_path = os.path.dirname(os.path.realpath(__file__))
# Use this one for the parent directory
ENV_ROOT = os.path.abspath(os.path.join(current_path, os.path.pardir))
# Use this one for the current directory
#ENV_ROOT = os.path.abspath(os.path.join(current_path))
sys.path.append(ENV_ROOT)
def lambda_handler(event, context):
###############################
# Global Variable Definitions #
###############################
d_parse = parse
TMP_PATH = '/tmp'
igersUser = 'admin'
igersPwd = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["RS_PASSWORD"]))['Plaintext']
igersHost = os.environ["RS_HOST"]
igers = create_engine('postgres://{}:{}@{}/ibdrs'.format(igersUser, igersPwd, igersHost), encoding="utf-8")
igersSchema = os.environ["RS_SCHEMA"]
s3 = S3Helper(debug=os.environ["DEBUG"])
nextObjFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/next_object.txt',os.path.abspath(os.path.join(TMP_PATH,'next_object.txt')))
s3Destination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/json/'
s3Path = '{}_json'
s3NextObjDestination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/{}'
fileCount = 1
sfdc_username = os.environ["L_USERNAME"].encode('utf8')
sfdc_salesforce_instance_url = os.environ["L_SALESFORCE_INSTANCE_URL"].encode('utf8')
sfdc_password = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']
sfdc_security_token = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_SECURITY_TOKEN"]))['Plaintext']
sfdc_sandbox = os.environ["L_SFDC_SANDBOX"].encode('utf8')
print(type(sfdc_username), type(sfdc_password), type(sfdc_security_token), type(sfdc_salesforce_instance_url), type(sfdc_sandbox))
try :
sf = Salesforce(username = sfdc_username,
password = sfdc_password,
security_token = sfdc_security_token,
instance_url = sfdc_salesforce_instance_url,
domain = sfdc_sandbox)
print('salesforce login good')
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'])
sys.exit(1)
# get nextobj from s3
with open(nextObjFile, 'r') as f :
nextObjItem = f.read().strip().lower()
nextObj = nextObjItem.lower()
print('Processing {}'.format(nextObj))
######################################################
# get rs table group permissions, store in dataframe #
######################################################
def rsGetGroupPerms(igers, nextObj) :
global groupPerms
groupPerms = {}
existingGroupPerms = '''
SELECT
namespace, item, type, groname
FROM
(
SELECT
use.usename AS subject,
nsp.nspname AS NAMESPACE,
cls.relname AS item,
cls.relkind AS TYPE,
use2.usename AS OWNER,
cls.relacl
FROM
pg_user use
CROSS JOIN pg_class cls
LEFT JOIN pg_namespace nsp ON cls.relnamespace = nsp.oid
LEFT JOIN pg_user use2 ON cls.relowner = use2.usesysid
WHERE
cls.relowner = use.usesysid
AND nsp.nspname NOT IN ( 'pg_catalog', 'pg_toast', 'information_schema' )
AND nsp.nspname IN ( 'salesforce' )
AND relacl IS NOT NULL
ORDER BY
subject,
NAMESPACE,
item
)
JOIN pg_group pu ON array_to_string( relacl, '|' ) LIKE'%%' || pu.groname || '%%'
WHERE item = '{}'
'''.format(nextObj)
groupPerms = pd.read_sql(existingGroupPerms, igers)
print('got the group permissions')
return groupPerms
#####################################################
# get rs table user permissions, store in dataframe #
# NOT CURRENTLY IN USE #
#####################################################
#def rsGetUseerPerms(igers, nextObj) :
# existingUserPerms = '''
# SELECT *
# FROM
# (
# SELECT
# schemaname
# ,objectname
# ,usename
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS sel
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ins
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS upd
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS del
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ref
# FROM
# (
# SELECT schemaname, 't' AS obj_type, tablename AS objectname, schemaname + '.' + tablename AS fullobj FROM pg_tables
# UNION
# SELECT schemaname, 'v' AS obj_type, viewname AS objectname, schemaname + '.' + viewname AS fullobj FROM pg_views
# ) AS objs
# ,(SELECT * FROM pg_user) AS usrs
# ORDER BY fullobj
# )
# WHERE (sel = true or ins = true or upd = true or del = true or ref = true)
# and objectname = '{}'
# '''.format(nextObj)
#
# userPerms = pd.read_sql_query(existingUserPerms, igers)
# return userPerms
####################################################
# Connect to Salesforce, Query JSON, and Copy to S3#
####################################################
def sfToS3(fileCount, sf, nextObj) :
# Initiate list for returned data
pulls = []
# Pull initial Query
sfobject = sf.restful('sobjects/{}/describe/'.format(nextObj), params=None)
fields_list = [record['name'] for record in sfobject['fields']]
initialQuery = sf.query("SELECT {} FROM {}".format(','.join(fields_list),nextObj))
#Send a single file or the first file to S3
data = initialQuery['records']
try :
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
# Append initial query data to pulls
if 'nextRecordsUrl' in initialQuery :
pulls.append(initialQuery['nextRecordsUrl'])
nextChunk = initialQuery['nextRecordsUrl']
nextQuery = sf.query_more(nextChunk,True)
if 'nextRecordsUrl' in nextQuery :
pulls.append(nextQuery['nextRecordsUrl'])
x = True
fileCount = 2
while x == True:
try:
# set up while loop to re-query salesforce until returned
# query does not have a 'nextRecordsUrl' return value
# Query new 'nextRecordsUrl'
nextQuery = sf.query_more(nextQuery['nextRecordsUrl'],True)
# append new query to pulls
pulls.append(nextQuery['nextRecordsUrl'])
except: # This triggers when nextQuery['nextRecordsUrl'] does not exist
# set x to False to end loop
x = False
#if there was a follow on set of records, query it and add to S3
if len(pulls) >= 1 :
for i in range(len(pulls)) :
data = sf.query_more(str(pulls[i].split('/')[5]))['records']
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
fileCount += 1
print('completed sending JSON files to S3')
except :
print('Salesforce Object Empty, ending execution')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
####################
# JSONL to S3 #
####################
def send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path) :
fileName = '{}_file{}.json'
localFilePath ='/tmp/'
for element in data :
item = data.pop()
item.pop('attributes', None)
tempdict = OrderedDict({})
for k,v in item.items() :
if 'date' in k.lower() or 'stamp' in k.lower() :
if not v is None :
d = d_parse(v)
v = d.strftime('%Y-%m-%d %I:%M:%S')
tempdict[k.lower()] = v
else :
tempdict[k.lower()] = v
with open(localFilePath+fileName.format(nextObj,fileCount), 'a') as outfile :
outfile.write(json.dumps(tempdict))
outfile.write('\n')
s3.put_s3_file_datedpath(localFilePath+fileName.format(nextObj,fileCount),s3Destination+s3Path.format(nextObj))
os.remove(localFilePath+fileName.format(nextObj,fileCount))
#################################################
# maps SFDC type to SQL type - used for ddl #
#################################################
def map_data_type(sfdc_type, length):
"""
Definition to map Salesforce datatype to Redshift datatype.
"""
__MULTIPLIER = 1.3 # may not be Zero!
if length == 0:
length = 1092
if length == 4095:
length = 15000
if length > 65535:
length = 65534
if sfdc_type == u'boolean':
return u'varchar(5)'
elif sfdc_type == u'date':
return u'timestamp'
elif sfdc_type == u'datetime':
return u'timestamp'
elif sfdc_type == u'currency':
return u'decimal(38,6)'
elif sfdc_type == u'double':
return u'decimal(38,6)'
elif sfdc_type == u'int':
return u'numeric(10)'
elif sfdc_type == u'picklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'id':
return u'varchar({})'.format(length)
elif sfdc_type == u'reference':
return u'varchar({})'.format(length)
elif sfdc_type == u'textarea':
if length >= (65535/length*__MULTIPLIER):
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format( math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'email':
return u'varchar({})'.format(length)
elif sfdc_type == u'phone':
return u'varchar({})'.format(length)
elif sfdc_type == u'url':
return u'varchar({})'.format(length)
elif sfdc_type == u'multipicklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'anyType':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'percent':
return u'numeric(38,6)'
elif sfdc_type == u'combobox':
return u'varchar({})'.format(length)
elif sfdc_type == u'base64':
return u'varchar({})'.format(length)
elif sfdc_type == u'time':
return u'varchar(255)'
elif sfdc_type == u'string':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
else:
return u'varchar(65535)'
####################################
# Turn SFDC metadata into SQL #
####################################
def get_ddl(sf, nextObj, igersSchema, col_remove=None):
md = sf.restful("sobjects/{}/describe/".format(nextObj), params=None)
target_table=nextObj
total_field_count = 0
global ddl_str
ddl_str = ''
ddl_str += 'CREATE TABLE '+ igersSchema+"."+target_table +' ('
for x in md["fields"]:
#print x["name"]
if col_remove:
if x["name"].lower() in [element.lower() for element in col_remove]:
print("Skipping: {}".format(x["name"]))
continue
ddl_str += x["name"] + ' ' + map_data_type(x["type"],x["length"])
if x["name"] == 'Id':
ddl_str += ' NOT NULL DISTKEY'
ddl_str += ","
total_field_count = total_field_count + 1
ddl_str = ddl_str[:-1]
ddl_str += ')'
logging.info('DDL Successfully created...')
# print("Total Field Count: "+str(total_field_count))
return ddl_str
#########################
# Create Table from DDL, execute the copy query and update permissions #
#########################
def rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers) :
today = datetime.date.today()
dated_path = today.strftime('%Y/%m/%d')
perms_statement = ''
drop_table = '''
DROP TABLE IF EXISTS {}.{} CASCADE
'''.format(igersSchema, nextObj)
loadQuery = '''
COPY {}.{}
FROM '{}{}/{}/'
iam_role 'arn:aws:iam::087024238921:role/LambdaFullAccessRole'
TRUNCATECOLUMNS
FORMAT AS JSON 'auto'
'''.format(igersSchema, nextObj, s3Destination, s3Path.format(nextObj), dated_path)
grantPerms = '''
GRANT SELECT ON {}.{} TO GROUP {}
'''
with igers.connect() as conn:
try :
conn.execute(drop_table)
print('completed drop table')
conn.execute(ddl_str)
print('completed create table')
conn.execute(loadQuery)
print('completed load query')
for row in range(len(groupPerms)) :
perms_statement = grantPerms.format(groupPerms['namespace'].iloc[row],groupPerms['item'].iloc[row],groupPerms['groname'].iloc[row])
conn.execute(perms_statement)
print('completed grant group permissions')
conn.close()
except exc.SQLAlchemyError as e :
print(e)
######################################
# Update Next Object and Write to S3 #
######################################
def updateNextObj(nextObj, s3NextObjDestination) :
objectsList = []
objectsFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/sfdc_etl_objects.txt',os.path.abspath(os.path.join(TMP_PATH,'sfdc_etl_objects.txt')))
localNobjTempFile = os.path.abspath(os.path.join(TMP_PATH,'next_object.txt'))
nextObjText = ''
with open (objectsFile, 'r') as objs :
for line in objs :
objectsList.append(line.strip("\n"))
for i in range(len(objectsList)-1) :
if objectsList[i].lower() == nextObj :
nextObjText = objectsList[i+1]
print(nextObjText)
with open (localNobjTempFile, 'w') as f :
f.write(nextObjText)
s3.put_s3_file(localNobjTempFile,s3NextObjDestination.format('next_object.txt'))
print('completed Updating the next object')
################################################
# Test if the object exists and execute #
################################################
try :
getattr(sf,nextObj).describe()
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'] +', writing next object and ending')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
rsGetGroupPerms(igers, nextObj)
sfToS3(fileCount, sf, nextObj)
get_ddl(sf, nextObj, igersSchema, col_remove=None)
rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers)
updateNextObj(nextObj, s3NextObjDestination)
正在返回一个bytes对象,另一方面,Salesforce
类需要一个字符串
解决方案:
请注意,
sfdc\u password
变量正在使用.decode
bytes方法转换为字符串问题在于以下行
import os
import sys
# this adds the parent directory of bin so we can find the module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python2.7/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from awsmgr.awsmgr import S3Helper
from base64 import b64decode
import boto3
from collections import OrderedDict
import datetime
from dateutil.parser import parse
import logging
import json
import math
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
from simple_salesforce.exceptions import SalesforceGeneralError, SalesforceMoreThanOneRecord, SalesforceMalformedRequest, SalesforceExpiredSession, SalesforceRefusedRequest, SalesforceResourceNotFound
from sqlalchemy import create_engine
from sqlalchemy import exc
current_path = os.path.dirname(os.path.realpath(__file__))
# Use this one for the parent directory
ENV_ROOT = os.path.abspath(os.path.join(current_path, os.path.pardir))
# Use this one for the current directory
#ENV_ROOT = os.path.abspath(os.path.join(current_path))
sys.path.append(ENV_ROOT)
def lambda_handler(event, context):
###############################
# Global Variable Definitions #
###############################
d_parse = parse
TMP_PATH = '/tmp'
igersUser = 'admin'
igersPwd = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["RS_PASSWORD"]))['Plaintext']
igersHost = os.environ["RS_HOST"]
igers = create_engine('postgres://{}:{}@{}/ibdrs'.format(igersUser, igersPwd, igersHost), encoding="utf-8")
igersSchema = os.environ["RS_SCHEMA"]
s3 = S3Helper(debug=os.environ["DEBUG"])
nextObjFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/next_object.txt',os.path.abspath(os.path.join(TMP_PATH,'next_object.txt')))
s3Destination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/json/'
s3Path = '{}_json'
s3NextObjDestination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/{}'
fileCount = 1
sfdc_username = os.environ["L_USERNAME"].encode('utf8')
sfdc_salesforce_instance_url = os.environ["L_SALESFORCE_INSTANCE_URL"].encode('utf8')
sfdc_password = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']
sfdc_security_token = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_SECURITY_TOKEN"]))['Plaintext']
sfdc_sandbox = os.environ["L_SFDC_SANDBOX"].encode('utf8')
print(type(sfdc_username), type(sfdc_password), type(sfdc_security_token), type(sfdc_salesforce_instance_url), type(sfdc_sandbox))
try :
sf = Salesforce(username = sfdc_username,
password = sfdc_password,
security_token = sfdc_security_token,
instance_url = sfdc_salesforce_instance_url,
domain = sfdc_sandbox)
print('salesforce login good')
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'])
sys.exit(1)
# get nextobj from s3
with open(nextObjFile, 'r') as f :
nextObjItem = f.read().strip().lower()
nextObj = nextObjItem.lower()
print('Processing {}'.format(nextObj))
######################################################
# get rs table group permissions, store in dataframe #
######################################################
def rsGetGroupPerms(igers, nextObj) :
global groupPerms
groupPerms = {}
existingGroupPerms = '''
SELECT
namespace, item, type, groname
FROM
(
SELECT
use.usename AS subject,
nsp.nspname AS NAMESPACE,
cls.relname AS item,
cls.relkind AS TYPE,
use2.usename AS OWNER,
cls.relacl
FROM
pg_user use
CROSS JOIN pg_class cls
LEFT JOIN pg_namespace nsp ON cls.relnamespace = nsp.oid
LEFT JOIN pg_user use2 ON cls.relowner = use2.usesysid
WHERE
cls.relowner = use.usesysid
AND nsp.nspname NOT IN ( 'pg_catalog', 'pg_toast', 'information_schema' )
AND nsp.nspname IN ( 'salesforce' )
AND relacl IS NOT NULL
ORDER BY
subject,
NAMESPACE,
item
)
JOIN pg_group pu ON array_to_string( relacl, '|' ) LIKE'%%' || pu.groname || '%%'
WHERE item = '{}'
'''.format(nextObj)
groupPerms = pd.read_sql(existingGroupPerms, igers)
print('got the group permissions')
return groupPerms
#####################################################
# get rs table user permissions, store in dataframe #
# NOT CURRENTLY IN USE #
#####################################################
#def rsGetUseerPerms(igers, nextObj) :
# existingUserPerms = '''
# SELECT *
# FROM
# (
# SELECT
# schemaname
# ,objectname
# ,usename
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS sel
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ins
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS upd
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS del
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ref
# FROM
# (
# SELECT schemaname, 't' AS obj_type, tablename AS objectname, schemaname + '.' + tablename AS fullobj FROM pg_tables
# UNION
# SELECT schemaname, 'v' AS obj_type, viewname AS objectname, schemaname + '.' + viewname AS fullobj FROM pg_views
# ) AS objs
# ,(SELECT * FROM pg_user) AS usrs
# ORDER BY fullobj
# )
# WHERE (sel = true or ins = true or upd = true or del = true or ref = true)
# and objectname = '{}'
# '''.format(nextObj)
#
# userPerms = pd.read_sql_query(existingUserPerms, igers)
# return userPerms
####################################################
# Connect to Salesforce, Query JSON, and Copy to S3#
####################################################
def sfToS3(fileCount, sf, nextObj) :
# Initiate list for returned data
pulls = []
# Pull initial Query
sfobject = sf.restful('sobjects/{}/describe/'.format(nextObj), params=None)
fields_list = [record['name'] for record in sfobject['fields']]
initialQuery = sf.query("SELECT {} FROM {}".format(','.join(fields_list),nextObj))
#Send a single file or the first file to S3
data = initialQuery['records']
try :
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
# Append initial query data to pulls
if 'nextRecordsUrl' in initialQuery :
pulls.append(initialQuery['nextRecordsUrl'])
nextChunk = initialQuery['nextRecordsUrl']
nextQuery = sf.query_more(nextChunk,True)
if 'nextRecordsUrl' in nextQuery :
pulls.append(nextQuery['nextRecordsUrl'])
x = True
fileCount = 2
while x == True:
try:
# set up while loop to re-query salesforce until returned
# query does not have a 'nextRecordsUrl' return value
# Query new 'nextRecordsUrl'
nextQuery = sf.query_more(nextQuery['nextRecordsUrl'],True)
# append new query to pulls
pulls.append(nextQuery['nextRecordsUrl'])
except: # This triggers when nextQuery['nextRecordsUrl'] does not exist
# set x to False to end loop
x = False
#if there was a follow on set of records, query it and add to S3
if len(pulls) >= 1 :
for i in range(len(pulls)) :
data = sf.query_more(str(pulls[i].split('/')[5]))['records']
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
fileCount += 1
print('completed sending JSON files to S3')
except :
print('Salesforce Object Empty, ending execution')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
####################
# JSONL to S3 #
####################
def send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path) :
fileName = '{}_file{}.json'
localFilePath ='/tmp/'
for element in data :
item = data.pop()
item.pop('attributes', None)
tempdict = OrderedDict({})
for k,v in item.items() :
if 'date' in k.lower() or 'stamp' in k.lower() :
if not v is None :
d = d_parse(v)
v = d.strftime('%Y-%m-%d %I:%M:%S')
tempdict[k.lower()] = v
else :
tempdict[k.lower()] = v
with open(localFilePath+fileName.format(nextObj,fileCount), 'a') as outfile :
outfile.write(json.dumps(tempdict))
outfile.write('\n')
s3.put_s3_file_datedpath(localFilePath+fileName.format(nextObj,fileCount),s3Destination+s3Path.format(nextObj))
os.remove(localFilePath+fileName.format(nextObj,fileCount))
#################################################
# maps SFDC type to SQL type - used for ddl #
#################################################
def map_data_type(sfdc_type, length):
"""
Definition to map Salesforce datatype to Redshift datatype.
"""
__MULTIPLIER = 1.3 # may not be Zero!
if length == 0:
length = 1092
if length == 4095:
length = 15000
if length > 65535:
length = 65534
if sfdc_type == u'boolean':
return u'varchar(5)'
elif sfdc_type == u'date':
return u'timestamp'
elif sfdc_type == u'datetime':
return u'timestamp'
elif sfdc_type == u'currency':
return u'decimal(38,6)'
elif sfdc_type == u'double':
return u'decimal(38,6)'
elif sfdc_type == u'int':
return u'numeric(10)'
elif sfdc_type == u'picklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'id':
return u'varchar({})'.format(length)
elif sfdc_type == u'reference':
return u'varchar({})'.format(length)
elif sfdc_type == u'textarea':
if length >= (65535/length*__MULTIPLIER):
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format( math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'email':
return u'varchar({})'.format(length)
elif sfdc_type == u'phone':
return u'varchar({})'.format(length)
elif sfdc_type == u'url':
return u'varchar({})'.format(length)
elif sfdc_type == u'multipicklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'anyType':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'percent':
return u'numeric(38,6)'
elif sfdc_type == u'combobox':
return u'varchar({})'.format(length)
elif sfdc_type == u'base64':
return u'varchar({})'.format(length)
elif sfdc_type == u'time':
return u'varchar(255)'
elif sfdc_type == u'string':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
else:
return u'varchar(65535)'
####################################
# Turn SFDC metadata into SQL #
####################################
def get_ddl(sf, nextObj, igersSchema, col_remove=None):
md = sf.restful("sobjects/{}/describe/".format(nextObj), params=None)
target_table=nextObj
total_field_count = 0
global ddl_str
ddl_str = ''
ddl_str += 'CREATE TABLE '+ igersSchema+"."+target_table +' ('
for x in md["fields"]:
#print x["name"]
if col_remove:
if x["name"].lower() in [element.lower() for element in col_remove]:
print("Skipping: {}".format(x["name"]))
continue
ddl_str += x["name"] + ' ' + map_data_type(x["type"],x["length"])
if x["name"] == 'Id':
ddl_str += ' NOT NULL DISTKEY'
ddl_str += ","
total_field_count = total_field_count + 1
ddl_str = ddl_str[:-1]
ddl_str += ')'
logging.info('DDL Successfully created...')
# print("Total Field Count: "+str(total_field_count))
return ddl_str
#########################
# Create Table from DDL, execute the copy query and update permissions #
#########################
def rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers) :
today = datetime.date.today()
dated_path = today.strftime('%Y/%m/%d')
perms_statement = ''
drop_table = '''
DROP TABLE IF EXISTS {}.{} CASCADE
'''.format(igersSchema, nextObj)
loadQuery = '''
COPY {}.{}
FROM '{}{}/{}/'
iam_role 'arn:aws:iam::087024238921:role/LambdaFullAccessRole'
TRUNCATECOLUMNS
FORMAT AS JSON 'auto'
'''.format(igersSchema, nextObj, s3Destination, s3Path.format(nextObj), dated_path)
grantPerms = '''
GRANT SELECT ON {}.{} TO GROUP {}
'''
with igers.connect() as conn:
try :
conn.execute(drop_table)
print('completed drop table')
conn.execute(ddl_str)
print('completed create table')
conn.execute(loadQuery)
print('completed load query')
for row in range(len(groupPerms)) :
perms_statement = grantPerms.format(groupPerms['namespace'].iloc[row],groupPerms['item'].iloc[row],groupPerms['groname'].iloc[row])
conn.execute(perms_statement)
print('completed grant group permissions')
conn.close()
except exc.SQLAlchemyError as e :
print(e)
######################################
# Update Next Object and Write to S3 #
######################################
def updateNextObj(nextObj, s3NextObjDestination) :
objectsList = []
objectsFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/sfdc_etl_objects.txt',os.path.abspath(os.path.join(TMP_PATH,'sfdc_etl_objects.txt')))
localNobjTempFile = os.path.abspath(os.path.join(TMP_PATH,'next_object.txt'))
nextObjText = ''
with open (objectsFile, 'r') as objs :
for line in objs :
objectsList.append(line.strip("\n"))
for i in range(len(objectsList)-1) :
if objectsList[i].lower() == nextObj :
nextObjText = objectsList[i+1]
print(nextObjText)
with open (localNobjTempFile, 'w') as f :
f.write(nextObjText)
s3.put_s3_file(localNobjTempFile,s3NextObjDestination.format('next_object.txt'))
print('completed Updating the next object')
################################################
# Test if the object exists and execute #
################################################
try :
getattr(sf,nextObj).describe()
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'] +', writing next object and ending')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
rsGetGroupPerms(igers, nextObj)
sfToS3(fileCount, sf, nextObj)
get_ddl(sf, nextObj, igersSchema, col_remove=None)
rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers)
updateNextObj(nextObj, s3NextObjDestination)
正在返回一个bytes对象,另一方面,Salesforce
类需要一个字符串
解决方案:
请注意,
sfdc\u password
变量正在使用.decode
bytes方法转换为字符串刚刚放入print(type(sfdc\u sandbox))
,它说我真的对这个问题感到困惑。似乎sandbox
kwarg已经被弃用了(),你能试着改用域
kwarg吗?是的,我以前用过,让我切换回去试试。从回溯中我可以看到错误本身与密码有关,你能打印出sfdc\u密码
类型(即打印(类型(sfdc\u密码))
)。看来html.escape
()函数接收到错误的类型。我更新到了域,同样的问题。至于打印类型:print(type(sfdc\u用户名)、type(sfdc\u密码)、type(sfdc\u安全性\u令牌)、type(sfdc\u salesforce\u实例\u url)、type(sfdc\u沙盒))
生成以下
只需放入打印(type(sfdc\u沙盒))
它说我对这个问题真的很茫然。看起来沙盒kwarg已经被弃用了(),你能试着用域kwarg来代替吗?是的,我以前用过,让我切换回去试试。从回溯中我可以看到错误本身与密码有关,您能否打印出sfdc\u密码
type(即print(type(sfdc\u密码))
)。看来html.escape
()函数接收到错误的类型。我更新到了域,同样的问题。至于打印类型:print(type(sfdc\u用户名)、type(sfdc\u密码)、type(sfdc\u安全性\u令牌)、type(sfdc\u salesforce\u实例\u url)、type(sfdc\u沙盒))
产生了以下
松弛,这让我更加思考这个问题。所以,是密码和安全令牌从boto3解码以字节而不是str的形式传入。我很高兴能帮助:)Slack,这让我更多地思考了这个问题。因此,是密码和安全令牌从boto3解码以字节形式传入,而不是str。我很乐意提供帮助:)
try :
sf = Salesforce(username=sfdc_username,
password=sfdc_password.decode("utf-8"),
security_token=sfdc_security_token,
instance_url=sfdc_salesforce_instance_url,
domain=sfdc_sandbox)
print('salesforce login good')