Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/310.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python Google.api_core.exceptions.BadRequest:400无效凭据_Python_Docker_Google Cloud Platform_Google Bigquery_Google Cloud Storage - Fatal编程技术网

Python Google.api_core.exceptions.BadRequest:400无效凭据

Python Google.api_core.exceptions.BadRequest:400无效凭据,python,docker,google-cloud-platform,google-bigquery,google-cloud-storage,Python,Docker,Google Cloud Platform,Google Bigquery,Google Cloud Storage,我正在尝试使用airflow运行docker映像,但无法向gcp进行身份验证。我尝试使用os.environ库,但也不起作用 包含服务密钥以运行脚本的最佳方式是什么 我发送了图像执行的脚本,我请求所有人的帮助,指出代码中可能的调整,这样我就可以执行我的过程 import gspread from oauth2client.service_account import ServiceAccountCredentials import base64 import io import avro.io

我正在尝试使用airflow运行docker映像,但无法向gcp进行身份验证。我尝试使用os.environ库,但也不起作用

包含服务密钥以运行脚本的最佳方式是什么

我发送了图像执行的脚本,我请求所有人的帮助,指出代码中可能的调整,这样我就可以执行我的过程

import gspread
from oauth2client.service_account import ServiceAccountCredentials
import base64
import io
import avro.io
from avro.datafile import DataFileWriter
import os
import gcloud
from gcloud import storage
from google.cloud import bigquery
from datetime import datetime, timedelta

#Bigquery Credentials and settings
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive",
         "https://www.googleapis.com/auth/urlshortener",
         "https://www.googleapis.com/auth/sqlservice.admin",
         "https://www.googleapis.com/auth/cloud-platform",
         "https://www.googleapis.com/auth/compute",
         "https://www.googleapis.com/auth/devstorage.full_control",
         "https://www.googleapis.com/auth/logging.admin",
         "https://www.googleapis.com/auth/logging.write",
         "https://www.googleapis.com/auth/monitoring",
         "https://www.googleapis.com/auth/servicecontrol",
         "https://www.googleapis.com/auth/service.management.readonly",
         "https://www.googleapis.com/auth/bigquery",
         "https://www.googleapis.com/auth/datastore",
         "https://www.googleapis.com/auth/taskqueue",
         "https://www.googleapis.com/auth/userinfo.email",
         "https://www.googleapis.com/auth/trace.append",
         "https://www.googleapis.com/auth/plus.login",
         "https://www.googleapis.com/auth/plus.me",
         "https://www.googleapis.com/auth/userinfo.email",
         "https://www.googleapis.com/auth/userinfo.profile"]


creds = ServiceAccountCredentials.from_json_keyfile_name('cert/key.json', scope)
client = gspread.authorize(creds)

folder = str((datetime.now() - timedelta(days=15)).strftime('%Y-%m-%d'))
data_folder = str((datetime.now() - timedelta(days=15)).strftime('%Y%m%d'))
bucket_name = 'gs://bucket/*.csv'
dataset = 'dataset'
tabela = 'table'

new_file = 'cert/register_' + data_folder + '.avro'
file_schema = 'cert/schema.avsc'
new_filename = 'register_' + data_folder + '.avro'


# As file at filePath is deleted now, so we should check if file exists or not not before deleting them
if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")

bq1 = bigquery.Client()
#Delete IDs
query1 = """DELETE FROM dataset.ids WHERE ID IS NOT NULL"""
query_job1 = bq1.query(query1)

def insert_bigquery(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = [
        bigquery.SchemaField('id','STRING',mode='REQUIRED')
    ]
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.field_delimiter = ";"
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

insert_bigquery(bucket_name, dataset, tabela)

def get_data_from_bigquery():
    """query bigquery to get data to import to PSQL"""
    bq = bigquery.Client()
    #Busca IDs
    query = """SELECT id FROM dataset.ids"""
    query_job = bq.query(query)
    data = query_job.result()
    rows = list(data)
    return rows

a = get_data_from_bigquery()
length = len(a)
line_count = 0
schema = avro.schema.Parse(open(file_schema, "rb").read())  # need to know the schema to write. According to 1.8.2 of Apache Avro
writer = DataFileWriter(open(new_file, "wb"), avro.io.DatumWriter(), schema)

for row in range(length):
    bytes = base64.b64decode(str(a[row][0]))
    bytes = bytes[5:]
    buf = io.BytesIO(bytes)
    decoder = avro.io.BinaryDecoder(buf)
    rec_reader = avro.io.DatumReader(avro.schema.Parse(open(file_schema).read()))
    out=rec_reader.read(decoder)
    writer.append(out)
writer.close()

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob("insert/" + destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}'.format(
        source_file_name,
        destination_blob_name
    ))

upload_blob('bucket', new_file, new_filename)

def insert_bigquery_avro(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.autodetect = True
    job_config.source_format = bigquery.SourceFormat.AVRO
    job_config.use_avro_logical_types = True
    time_partitioning = bigquery.table.TimePartitioning()
    job_config.time_partitioning = time_partitioning
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

dataset1 = 'dataset'
tabela1 = 'test'
bucket_name1 = 'gs://bucket/insert/' + new_filename

insert_bigquery_avro(bucket_name1, dataset1, tabela1)

if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")
错误消息:

[2019-12-03 18:18:55,176] {taskinstance.py:859} INFO - Executing <Task(KubernetesPodOperator): script> on 2019-12-03T18:17:47.034888+00:00
[2019-12-03 18:18:55,176] {base_task_runner.py:133} INFO - Running: ['airflow', 'run', 'test_script', 'script', '2019-12-03T18:17:47.034888+00:00', '--job_id', '37988', '--pool', 'default_pool', '--raw', '-sd', '/airflow/dags/git/test_script.py', '--cfg_path', '/tmp/tmp0j3b1n2u']
[2019-12-03 18:18:55,649] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:65: DeprecationWarning: The elasticsearch_host option in [elasticsearch] has been renamed to host - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_HOST = conf.get('elasticsearch', 'HOST')
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:67: DeprecationWarning: The elasticsearch_log_id_template option in [elasticsearch] has been renamed to log_id_template - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_LOG_ID_TEMPLATE = conf.get('elasticsearch', 'LOG_ID_TEMPLATE')
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:69: DeprecationWarning: The elasticsearch_end_of_log_mark option in [elasticsearch] has been renamed to end_of_log_mark - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_END_OF_LOG_MARK = conf.get('elasticsearch', 'END_OF_LOG_MARK')
[2019-12-03 18:18:55,817] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.
[2019-12-03 18:18:55,817] {base_task_runner.py:115} INFO - Job 37988: Subtask script   """)
[2019-12-03 18:18:55,932] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:55,932] {__init__.py:51} INFO - Using executor LocalExecutor
[2019-12-03 18:18:56,233] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:56,233] {dagbag.py:90} INFO - Filling up the DagBag from /airflow/dags/git/test_script.py
[2019-12-03 18:18:56,979] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:56,979] {cli.py:516} INFO - Running <TaskInstance: bexs_script.script 2019-12-03T18:17:47.034888+00:00 [running]> on host bexspaytransferpaytransfer-c5050aad788b4547974f8ec02ca25232
[2019-12-03 18:18:57,040] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:57,040[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:18:58,047] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:58,047[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:18:59,054] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:59,054[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:00,061] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:00,060[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:01,066] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:01,066[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:02,072] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:02,072[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:03,078] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:03,078[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:04,084] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:04,084[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:05,090] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:05,090[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:06,097] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:06,097[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:07,107] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:07,107[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:08,114] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:08,114[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:09,121] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:09,121[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:10,128] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:10,128[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mRunning[0m[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,735[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'Traceback (most recent call last):\n'[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "script.py", line 101, in <module>\n'[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    insert_bigquery(bucket_name, dataset, tabela)\n'[0m
[2019-12-03 18:19:12,739] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "script.py", line 98, in insert_bigquery\n'[0m
[2019-12-03 18:19:12,739] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,739[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    load_job.result()\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,739[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "/usr/local/lib/python3.7/site-packages/google/cloud/bigquery/job.py", line 697, in result\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    return super(_AsyncJob, self).result(timeout=timeout)\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "/usr/local/lib/python3.7/site-packages/google/api_core/future/polling.py", line 127, in result\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    raise self._exception\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'google.api_core.exceptions.BadRequest: 400 Invalid credential\n'[0m
[2019-12-03 18:19:12,741] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,741[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b"Can not delete the file as it doesn't exists\n"[0m
[2019-12-03 18:19:12,742] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,741[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'Starting job 71ea5742-37c6-4152-a171-8f558b83da76\n'[0m
[2019-12-03 18:19:17,752] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,752[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mFailed[0m[0m
[2019-12-03 18:19:17,753] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,752[0m] {[34mpod_launcher.py:[0m208} INFO[0m - Event with job id [1mdag_test-f50779cc[0m Failed[0m
[2019-12-03 18:19:17,757] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,757[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mFailed[0m[0m
[2019-12-03 18:18:55176]{taskinstance.py:859}INFO-在2019-12-03T18:17:47.034888+00:00执行
[2019-12-03 18:18:55176]{base_task_runner.py:133}运行信息:['aiffort','run','test_script','script','2019-12-03T18:17:47.034888+00:00','--job_id','37988','--pool','default_pool','--raw','-sd','/aiffort/dags/git/test_script.py','--cfg_path','tmp/tmp03b1n2u']
[2019-12-03 18:18:55649]{base_task_runner.py:115}信息-作业37988:子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:65:deprecation警告:已将[elasticsearch]中的elasticsearch_主机选项重命名为主机-已使用旧设置,但请更新配置。
[2019-12-0318:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本ELASTICSEARCH_HOST=conf.get('ELASTICSEARCH','HOST'))
[2019-12-03 18:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:67:弃用警告:[elasticsearch]中的elasticsearch_log_id_模板选项已重命名为log_id_模板-已使用旧设置,但是请更新你的配置。
[2019-12-0318:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本ELASTICSEARCH_LOG_ID_TEMPLATE=conf.get('ELASTICSEARCH','LOG_ID_TEMPLATE'))
[2019-12-03 18:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:69:弃用警告:[elasticsearch]中的elasticsearch_end_of_log_标记选项已重命名为end_of_of_log_标记-已使用旧设置,但是请更新你的配置。
[2019-12-03 18:18:55650]{base_task_runner.py:115}INFO-作业37988:子任务脚本ELASTICSEARCH_END_OF_LOG_MARK=conf.get('ELASTICSEARCH','END_OF_LOG_MARK'))
[2019-12-03 18:18:55817]{base_task_runner.py:115}INFO-Job 37988:Subtask script/usr/local/lib/python3.7/site packages/psycopg2/uuu init_uuuuuuuuuuuuuu.py:144:UserWarning:psycopg2 wheel包将从2.8版重命名;为了保持从二进制文件安装,请改用“pip安装psycopg2二进制文件”。有关详细信息,请参阅:。
[2019-12-0318:18:55817]{base_task_runner.py:115}INFO-Job 37988:Subtask script“”)
[2019-12-0318:18:55932]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-0318:18:55932]{{uuuu init_uuuu.py:51}信息-使用executor LocalExecutor
[2019-12-03 18:18:56233]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-03 18:18:56233]{dagbag.py:90}信息-从/aiffair/dags/git/test_script.py填充dagbag
[2019-12-03 18:18:56979]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-03 18:18:56979]{cli.py:516}信息-在主机bexspaytransferpaytransfer-c5050aad788b4547974f8ec02ca25232上运行
[2019-12-03 18:18:57040]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:57040[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:18:58047]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:58047[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:18:59054]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:59054[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:00061]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:00060[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:01066]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:01066[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:02072]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:02072[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:03078]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:03078[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:04084]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:04084[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:05090]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:05090[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:06097]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:06097[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:07107]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:07107[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:08114]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:08114[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:09121]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:09121[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件:[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-0318:19:10128]{logging_mixin.py:95}信息-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import base64
import io
import avro.io
from avro.datafile import DataFileWriter
from google.oauth2 import service_account
import os
import gcloud
from gcloud import storage
from google.cloud import bigquery
from datetime import datetime, timedelta

key_path = 'cert/key.json'

credentials = service_account.Credentials.from_service_account_file(
key_path,
scopes=["https://www.googleapis.com/auth/cloud-platform",
     "https://spreadsheets.google.com/feeds",
     'https://www.googleapis.com/auth/spreadsheets',
     "https://www.googleapis.com/auth/drive.file",
     "https://www.googleapis.com/auth/drive",
     "https://www.googleapis.com/auth/urlshortener",
     "https://www.googleapis.com/auth/sqlservice.admin",
     "https://www.googleapis.com/auth/cloud-platform",
     "https://www.googleapis.com/auth/compute",
     "https://www.googleapis.com/auth/devstorage.full_control",
     "https://www.googleapis.com/auth/logging.admin",
     "https://www.googleapis.com/auth/logging.write",
     "https://www.googleapis.com/auth/monitoring",
     "https://www.googleapis.com/auth/servicecontrol",
     "https://www.googleapis.com/auth/service.management.readonly",
     "https://www.googleapis.com/auth/bigquery",
     "https://www.googleapis.com/auth/datastore",
     "https://www.googleapis.com/auth/taskqueue",
     "https://www.googleapis.com/auth/userinfo.email",
     "https://www.googleapis.com/auth/trace.append",
     "https://www.googleapis.com/auth/plus.login",
     "https://www.googleapis.com/auth/plus.me",
     "https://www.googleapis.com/auth/userinfo.email",
     "https://www.googleapis.com/auth/userinfo.profile"],
)

client = bigquery.Client(
    credentials=credentials,
    project=credentials.project_id,
)

folder = str((datetime.now() - timedelta(days=15)).strftime('%Y-%m-%d'))
data_folder = str((datetime.now() - timedelta(days=15)).strftime('%Y%m%d'))
bucket_name = 'gs://bucket/*.csv'
dataset = 'dataset'
tabela = 'table'

new_file = 'cert/register_' + data_folder + '.avro'
file_schema = 'cert/schema.avsc'
new_filename = 'register_' + data_folder + '.avro'


# As file at filePath is deleted now, so we should check if file exists or not not before deleting them
if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")

bq1 = bigquery.Client(credentials=credentials, project=credentials.project_id)
#Delete IDs
query1 = """DELETE FROM dataset.ids WHERE ID IS NOT NULL"""
query_job1 = bq1.query(query1)

def insert_bigquery(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = [
        bigquery.SchemaField('id','STRING',mode='REQUIRED')
    ]
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.field_delimiter = ";"
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

insert_bigquery(bucket_name, dataset, tabela)

def get_data_from_bigquery():
    """query bigquery to get data to import to PSQL"""
    bq = bigquery.Client(credentials=credentials, project=credentials.project_id)
    #Busca IDs
    query = """SELECT id FROM dataset.ids"""
    query_job = bq.query(query)
    data = query_job.result()
    rows = list(data)
    return rows

a = get_data_from_bigquery()
length = len(a)
line_count = 0
schema = avro.schema.Parse(open(file_schema, "rb").read())  # need to know the schema to write. According to 1.8.2 of Apache Avro
writer = DataFileWriter(open(new_file, "wb"), avro.io.DatumWriter(), schema)

for row in range(length):
    bytes = base64.b64decode(str(a[row][0]))
    bytes = bytes[5:]
    buf = io.BytesIO(bytes)
    decoder = avro.io.BinaryDecoder(buf)
    rec_reader = avro.io.DatumReader(avro.schema.Parse(open(file_schema).read()))
    out=rec_reader.read(decoder)
    writer.append(out)
writer.close()

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client.from_service_account_json('cert/key.json')
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob("insert/" + destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}'.format(
        source_file_name,
        destination_blob_name
    ))

upload_blob('bucket', new_file, new_filename)

def insert_bigquery_avro(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.autodetect = True
    job_config.source_format = bigquery.SourceFormat.AVRO
    job_config.use_avro_logical_types = True
    time_partitioning = bigquery.table.TimePartitioning()
    job_config.time_partitioning = time_partitioning
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

dataset1 = 'dataset'
tabela1 = 'test'
bucket_name1 = 'gs://bucket/insert/' + new_filename

insert_bigquery_avro(bucket_name1, dataset1, tabela1)

if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")