Python Google.api_core.exceptions.BadRequest:400无效凭据_Python_Docker_Google Cloud Platform_Google Bigquery_Google Cloud Storage

Python Google.api_core.exceptions.BadRequest:400无效凭据

python docker google-cloud-platform google-bigquery google-cloud-storage

Python Google.api_core.exceptions.BadRequest:400无效凭据,python,docker,google-cloud-platform,google-bigquery,google-cloud-storage,Python,Docker,Google Cloud Platform,Google Bigquery,Google Cloud Storage,我正在尝试使用airflow运行docker映像，但无法向gcp进行身份验证。我尝试使用os.environ库，但也不起作用包含服务密钥以运行脚本的最佳方式是什么我发送了图像执行的脚本，我请求所有人的帮助，指出代码中可能的调整，这样我就可以执行我的过程 import gspread from oauth2client.service_account import ServiceAccountCredentials import base64 import io import avro.io

我正在尝试使用airflow运行docker映像，但无法向gcp进行身份验证。我尝试使用os.environ库，但也不起作用

包含服务密钥以运行脚本的最佳方式是什么

我发送了图像执行的脚本，我请求所有人的帮助，指出代码中可能的调整，这样我就可以执行我的过程

import gspread
from oauth2client.service_account import ServiceAccountCredentials
import base64
import io
import avro.io
from avro.datafile import DataFileWriter
import os
import gcloud
from gcloud import storage
from google.cloud import bigquery
from datetime import datetime, timedelta

#Bigquery Credentials and settings
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive",
         "https://www.googleapis.com/auth/urlshortener",
         "https://www.googleapis.com/auth/sqlservice.admin",
         "https://www.googleapis.com/auth/cloud-platform",
         "https://www.googleapis.com/auth/compute",
         "https://www.googleapis.com/auth/devstorage.full_control",
         "https://www.googleapis.com/auth/logging.admin",
         "https://www.googleapis.com/auth/logging.write",
         "https://www.googleapis.com/auth/monitoring",
         "https://www.googleapis.com/auth/servicecontrol",
         "https://www.googleapis.com/auth/service.management.readonly",
         "https://www.googleapis.com/auth/bigquery",
         "https://www.googleapis.com/auth/datastore",
         "https://www.googleapis.com/auth/taskqueue",
         "https://www.googleapis.com/auth/userinfo.email",
         "https://www.googleapis.com/auth/trace.append",
         "https://www.googleapis.com/auth/plus.login",
         "https://www.googleapis.com/auth/plus.me",
         "https://www.googleapis.com/auth/userinfo.email",
         "https://www.googleapis.com/auth/userinfo.profile"]


creds = ServiceAccountCredentials.from_json_keyfile_name('cert/key.json', scope)
client = gspread.authorize(creds)

folder = str((datetime.now() - timedelta(days=15)).strftime('%Y-%m-%d'))
data_folder = str((datetime.now() - timedelta(days=15)).strftime('%Y%m%d'))
bucket_name = 'gs://bucket/*.csv'
dataset = 'dataset'
tabela = 'table'

new_file = 'cert/register_' + data_folder + '.avro'
file_schema = 'cert/schema.avsc'
new_filename = 'register_' + data_folder + '.avro'


# As file at filePath is deleted now, so we should check if file exists or not not before deleting them
if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")

bq1 = bigquery.Client()
#Delete IDs
query1 = """DELETE FROM dataset.ids WHERE ID IS NOT NULL"""
query_job1 = bq1.query(query1)

def insert_bigquery(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = [
        bigquery.SchemaField('id','STRING',mode='REQUIRED')
    ]
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.field_delimiter = ";"
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

insert_bigquery(bucket_name, dataset, tabela)

def get_data_from_bigquery():
    """query bigquery to get data to import to PSQL"""
    bq = bigquery.Client()
    #Busca IDs
    query = """SELECT id FROM dataset.ids"""
    query_job = bq.query(query)
    data = query_job.result()
    rows = list(data)
    return rows

a = get_data_from_bigquery()
length = len(a)
line_count = 0
schema = avro.schema.Parse(open(file_schema, "rb").read())  # need to know the schema to write. According to 1.8.2 of Apache Avro
writer = DataFileWriter(open(new_file, "wb"), avro.io.DatumWriter(), schema)

for row in range(length):
    bytes = base64.b64decode(str(a[row][0]))
    bytes = bytes[5:]
    buf = io.BytesIO(bytes)
    decoder = avro.io.BinaryDecoder(buf)
    rec_reader = avro.io.DatumReader(avro.schema.Parse(open(file_schema).read()))
    out=rec_reader.read(decoder)
    writer.append(out)
writer.close()

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob("insert/" + destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}'.format(
        source_file_name,
        destination_blob_name
    ))

upload_blob('bucket', new_file, new_filename)

def insert_bigquery_avro(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.autodetect = True
    job_config.source_format = bigquery.SourceFormat.AVRO
    job_config.use_avro_logical_types = True
    time_partitioning = bigquery.table.TimePartitioning()
    job_config.time_partitioning = time_partitioning
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

dataset1 = 'dataset'
tabela1 = 'test'
bucket_name1 = 'gs://bucket/insert/' + new_filename

insert_bigquery_avro(bucket_name1, dataset1, tabela1)

if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")

错误消息：

[2019-12-03 18:18:55,176] {taskinstance.py:859} INFO - Executing <Task(KubernetesPodOperator): script> on 2019-12-03T18:17:47.034888+00:00
[2019-12-03 18:18:55,176] {base_task_runner.py:133} INFO - Running: ['airflow', 'run', 'test_script', 'script', '2019-12-03T18:17:47.034888+00:00', '--job_id', '37988', '--pool', 'default_pool', '--raw', '-sd', '/airflow/dags/git/test_script.py', '--cfg_path', '/tmp/tmp0j3b1n2u']
[2019-12-03 18:18:55,649] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:65: DeprecationWarning: The elasticsearch_host option in [elasticsearch] has been renamed to host - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_HOST = conf.get('elasticsearch', 'HOST')
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:67: DeprecationWarning: The elasticsearch_log_id_template option in [elasticsearch] has been renamed to log_id_template - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_LOG_ID_TEMPLATE = conf.get('elasticsearch', 'LOG_ID_TEMPLATE')
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/airflow/config_templates/airflow_local_settings.py:69: DeprecationWarning: The elasticsearch_end_of_log_mark option in [elasticsearch] has been renamed to end_of_log_mark - the old setting has been used, but please update your config.
[2019-12-03 18:18:55,650] {base_task_runner.py:115} INFO - Job 37988: Subtask script   ELASTICSEARCH_END_OF_LOG_MARK = conf.get('elasticsearch', 'END_OF_LOG_MARK')
[2019-12-03 18:18:55,817] {base_task_runner.py:115} INFO - Job 37988: Subtask script /usr/local/lib/python3.7/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.
[2019-12-03 18:18:55,817] {base_task_runner.py:115} INFO - Job 37988: Subtask script   """)
[2019-12-03 18:18:55,932] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:55,932] {__init__.py:51} INFO - Using executor LocalExecutor
[2019-12-03 18:18:56,233] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:56,233] {dagbag.py:90} INFO - Filling up the DagBag from /airflow/dags/git/test_script.py
[2019-12-03 18:18:56,979] {base_task_runner.py:115} INFO - Job 37988: Subtask script [2019-12-03 18:18:56,979] {cli.py:516} INFO - Running <TaskInstance: bexs_script.script 2019-12-03T18:17:47.034888+00:00 [running]> on host bexspaytransferpaytransfer-c5050aad788b4547974f8ec02ca25232
[2019-12-03 18:18:57,040] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:57,040[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:18:58,047] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:58,047[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:18:59,054] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:18:59,054[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:00,061] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:00,060[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:01,066] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:01,066[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:02,072] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:02,072[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:03,078] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:03,078[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:04,084] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:04,084[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:05,090] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:05,090[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:06,097] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:06,097[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:07,107] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:07,107[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:08,114] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:08,114[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:09,121] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:09,121[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mPending[0m[0m
[2019-12-03 18:19:10,128] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:10,128[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mRunning[0m[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,735[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'Traceback (most recent call last):\n'[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "script.py", line 101, in <module>\n'[0m
[2019-12-03 18:19:12,738] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    insert_bigquery(bucket_name, dataset, tabela)\n'[0m
[2019-12-03 18:19:12,739] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,738[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "script.py", line 98, in insert_bigquery\n'[0m
[2019-12-03 18:19:12,739] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,739[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    load_job.result()\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,739[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "/usr/local/lib/python3.7/site-packages/google/cloud/bigquery/job.py", line 697, in result\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    return super(_AsyncJob, self).result(timeout=timeout)\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'  File "/usr/local/lib/python3.7/site-packages/google/api_core/future/polling.py", line 127, in result\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'    raise self._exception\n'[0m
[2019-12-03 18:19:12,740] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,740[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'google.api_core.exceptions.BadRequest: 400 Invalid credential\n'[0m
[2019-12-03 18:19:12,741] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,741[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b"Can not delete the file as it doesn't exists\n"[0m
[2019-12-03 18:19:12,742] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:12,741[0m] {[34mpod_launcher.py:[0m105} INFO[0m - b'Starting job 71ea5742-37c6-4152-a171-8f558b83da76\n'[0m
[2019-12-03 18:19:17,752] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,752[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mFailed[0m[0m
[2019-12-03 18:19:17,753] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,752[0m] {[34mpod_launcher.py:[0m208} INFO[0m - Event with job id [1mdag_test-f50779cc[0m Failed[0m
[2019-12-03 18:19:17,757] {logging_mixin.py:95} INFO - [[34m2019-12-03 18:19:17,757[0m] {[34mpod_launcher.py:[0m122} INFO[0m - Event: [1mdag_test-f50779cc[0m had an event of type [1mFailed[0m[0m

[2019-12-03 18:18:55176]{taskinstance.py:859}INFO-在2019-12-03T18:17:47.034888+00:00执行
[2019-12-03 18:18:55176]{base_task_runner.py:133}运行信息：['aiffort'，'run'，'test_script'，'script'，'2019-12-03T18:17:47.034888+00:00'，'--job_id'，'37988'，'--pool'，'default_pool'，'--raw'，'-sd'，'/aiffort/dags/git/test_script.py'，'--cfg_path'，'tmp/tmp03b1n2u']
[2019-12-03 18:18:55649]{base_task_runner.py:115}信息-作业37988:子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:65:deprecation警告：已将[elasticsearch]中的elasticsearch_主机选项重命名为主机-已使用旧设置，但请更新配置。
[2019-12-0318:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本ELASTICSEARCH_HOST=conf.get（'ELASTICSEARCH'，'HOST'））
[2019-12-03 18:18:55650]{base_task_runner.py:115}信息-作业37988：子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:67：弃用警告：[elasticsearch]中的elasticsearch_log_id_模板选项已重命名为log_id_模板-已使用旧设置，但是请更新你的配置。
[2019-12-0318:18:55650]{base_task_runner.py:115}信息-作业37988：子任务脚本ELASTICSEARCH_LOG_ID_TEMPLATE=conf.get（'ELASTICSEARCH'，'LOG_ID_TEMPLATE'））
[2019-12-03 18:18:55650]{base_task_runner.py:115}信息-作业37988:子任务脚本/usr/local/lib/python3.7/site packages/aiffort/config_templates/aiffort_local_settings.py:69:弃用警告：[elasticsearch]中的elasticsearch_end_of_log_标记选项已重命名为end_of_of_log_标记-已使用旧设置，但是请更新你的配置。
[2019-12-03 18:18:55650]{base_task_runner.py:115}INFO-作业37988:子任务脚本ELASTICSEARCH_END_OF_LOG_MARK=conf.get（'ELASTICSEARCH'，'END_OF_LOG_MARK'））
[2019-12-03 18:18:55817]{base_task_runner.py:115}INFO-Job 37988:Subtask script/usr/local/lib/python3.7/site packages/psycopg2/uuu init_uuuuuuuuuuuuuu.py:144:UserWarning:psycopg2 wheel包将从2.8版重命名；为了保持从二进制文件安装，请改用“pip安装psycopg2二进制文件”。有关详细信息，请参阅：。
[2019-12-0318:18:55817]{base_task_runner.py:115}INFO-Job 37988:Subtask script“”）
[2019-12-0318:18:55932]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-0318:18:55932]{{uuuu init_uuuu.py:51}信息-使用executor LocalExecutor
[2019-12-03 18:18:56233]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-03 18:18:56233]{dagbag.py:90}信息-从/aiffair/dags/git/test_script.py填充dagbag
[2019-12-03 18:18:56979]{base_task_runner.py:115}信息-作业37988:子任务脚本[2019-12-03 18:18:56979]{cli.py:516}信息-在主机bexspaytransferpaytransfer-c5050aad788b4547974f8ec02ca25232上运行
[2019-12-03 18:18:57040]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:57040[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:18:58047]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:58047[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:18:59054]{logging_mixin.py:95}INFO-[34m2019-12-03 18:18:59054[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:00061]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:00060[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:01066]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:01066[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:02072]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:02072[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:03078]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:03078[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:04084]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:04084[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:05090]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:05090[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:06097]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:06097[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:07107]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:07107[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:08114]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:08114[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-03 18:19:09121]{logging_mixin.py:95}INFO-[34m2019-12-03 18:19:09121[0m]{[34mpod_launcher.py:[0m122}INFO[0m-事件：[1mdag_test-f50779cc[0m]有一个类型为[1mPending[0m[0m]的事件
[2019-12-0318:19:10128]{logging_mixin.py:95}信息-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import base64
import io
import avro.io
from avro.datafile import DataFileWriter
from google.oauth2 import service_account
import os
import gcloud
from gcloud import storage
from google.cloud import bigquery
from datetime import datetime, timedelta

key_path = 'cert/key.json'

credentials = service_account.Credentials.from_service_account_file(
key_path,
scopes=["https://www.googleapis.com/auth/cloud-platform",
     "https://spreadsheets.google.com/feeds",
     'https://www.googleapis.com/auth/spreadsheets',
     "https://www.googleapis.com/auth/drive.file",
     "https://www.googleapis.com/auth/drive",
     "https://www.googleapis.com/auth/urlshortener",
     "https://www.googleapis.com/auth/sqlservice.admin",
     "https://www.googleapis.com/auth/cloud-platform",
     "https://www.googleapis.com/auth/compute",
     "https://www.googleapis.com/auth/devstorage.full_control",
     "https://www.googleapis.com/auth/logging.admin",
     "https://www.googleapis.com/auth/logging.write",
     "https://www.googleapis.com/auth/monitoring",
     "https://www.googleapis.com/auth/servicecontrol",
     "https://www.googleapis.com/auth/service.management.readonly",
     "https://www.googleapis.com/auth/bigquery",
     "https://www.googleapis.com/auth/datastore",
     "https://www.googleapis.com/auth/taskqueue",
     "https://www.googleapis.com/auth/userinfo.email",
     "https://www.googleapis.com/auth/trace.append",
     "https://www.googleapis.com/auth/plus.login",
     "https://www.googleapis.com/auth/plus.me",
     "https://www.googleapis.com/auth/userinfo.email",
     "https://www.googleapis.com/auth/userinfo.profile"],
)

client = bigquery.Client(
    credentials=credentials,
    project=credentials.project_id,
)

folder = str((datetime.now() - timedelta(days=15)).strftime('%Y-%m-%d'))
data_folder = str((datetime.now() - timedelta(days=15)).strftime('%Y%m%d'))
bucket_name = 'gs://bucket/*.csv'
dataset = 'dataset'
tabela = 'table'

new_file = 'cert/register_' + data_folder + '.avro'
file_schema = 'cert/schema.avsc'
new_filename = 'register_' + data_folder + '.avro'


# As file at filePath is deleted now, so we should check if file exists or not not before deleting them
if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")

bq1 = bigquery.Client(credentials=credentials, project=credentials.project_id)
#Delete IDs
query1 = """DELETE FROM dataset.ids WHERE ID IS NOT NULL"""
query_job1 = bq1.query(query1)

def insert_bigquery(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = [
        bigquery.SchemaField('id','STRING',mode='REQUIRED')
    ]
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.field_delimiter = ";"
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

insert_bigquery(bucket_name, dataset, tabela)

def get_data_from_bigquery():
    """query bigquery to get data to import to PSQL"""
    bq = bigquery.Client(credentials=credentials, project=credentials.project_id)
    #Busca IDs
    query = """SELECT id FROM dataset.ids"""
    query_job = bq.query(query)
    data = query_job.result()
    rows = list(data)
    return rows

a = get_data_from_bigquery()
length = len(a)
line_count = 0
schema = avro.schema.Parse(open(file_schema, "rb").read())  # need to know the schema to write. According to 1.8.2 of Apache Avro
writer = DataFileWriter(open(new_file, "wb"), avro.io.DatumWriter(), schema)

for row in range(length):
    bytes = base64.b64decode(str(a[row][0]))
    bytes = bytes[5:]
    buf = io.BytesIO(bytes)
    decoder = avro.io.BinaryDecoder(buf)
    rec_reader = avro.io.DatumReader(avro.schema.Parse(open(file_schema).read()))
    out=rec_reader.read(decoder)
    writer.append(out)
writer.close()

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client.from_service_account_json('cert/key.json')
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob("insert/" + destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}'.format(
        source_file_name,
        destination_blob_name
    ))

upload_blob('bucket', new_file, new_filename)

def insert_bigquery_avro(target_uri, dataset_id, table_id):
    bigquery_client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    dataset_ref = bigquery_client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.autodetect = True
    job_config.source_format = bigquery.SourceFormat.AVRO
    job_config.use_avro_logical_types = True
    time_partitioning = bigquery.table.TimePartitioning()
    job_config.time_partitioning = time_partitioning
    uri = target_uri
    load_job = bigquery_client.load_table_from_uri(
        uri,
        dataset_ref.table(table_id),
        job_config=job_config
        )
    print('Starting job {}'.format(load_job.job_id))
    load_job.result()
    print('Job finished.')

dataset1 = 'dataset'
tabela1 = 'test'
bucket_name1 = 'gs://bucket/insert/' + new_filename

insert_bigquery_avro(bucket_name1, dataset1, tabela1)

if os.path.exists(new_file):
    os.remove(new_file)
    print("Delete file", new_file)
else:
    print("Can not delete the file as it doesn't exists")