使用Python从Google云存储加载数据--未指定架构运行时错误

使用Python从Google云存储加载数据--未指定架构运行时错误,python,google-bigquery,google-cloud-storage,Python,Google Bigquery,Google Cloud Storage,我试图编写一个函数,将我在Google云存储上的JSON文件加载到BigQuery数据集中,但是,即使我显式地传递模式,它仍然会说“没有在作业或表上指定模式” 我已解决问题,在本例中,我忘记在开始作业之前调用table.create()。我已解决问题,在本例中,我忘记在开始作业之前调用table.create() import oauth2client import uuid import time from google.cloud import bigquery as bq # from o

我试图编写一个函数,将我在Google云存储上的JSON文件加载到BigQuery数据集中,但是,即使我显式地传递模式,它仍然会说“没有在作业或表上指定模式”


我已解决问题,在本例中,我忘记在开始作业之前调用table.create()

我已解决问题,在本例中,我忘记在开始作业之前调用table.create()

import oauth2client
import uuid
import time
from google.cloud import bigquery as bq
# from oauth2client.client import GoogleCredentials

# Configuration
BILLING_PROJECT_ID = ---
DATASET_NAME = ---
TABLE_NAME = ---
BUCKET_NAME = ---
FILE = ---
SOURCE = 'gs://{}/{}'.format(BUCKET_NAME, FILE)

SCHEMA = [
    bq.SchemaField('question_id', 'INTEGER'),
    bq.SchemaField('accepted_answer', 'INTEGER'),
    bq.SchemaField('answer_count', 'INTEGER')
]

# CREDENTIALS = GoogleCredentials.get_application_efault()

client = bq.Client(project=BILLING_PROJECT_ID)


# Dataset
# Check if the dataset exists
def create_datasets(name):
    dataset = client.dataset(name)
    try:
        assert not dataset.exists()
        dataset.create()
        assert dataset.exists()
        print("Dataset {} created".format(name))
    except(AssertionError):
        pass


def load_data_from_gcs(dataset_name, table_name, source, schema):
    '''
    Load Data from Google Cloud Storage
    '''
    dataset = client.dataset(dataset_name)
    table = dataset.table(table_name)
    table.schema = schema
    job_name = str(uuid.uuid4())
    job = client.load_table_from_storage(
        job_name, table, source)
    job.source_format = 'NEWLINE_DELIMITED_JSON'

    job.begin()
    wait_for_job(job)

    print('Loaded {} rows into {}:{}.'.format(
        job.output_rows, dataset_name, table_name))


def wait_for_job(job):
    while True:
        job.reload()
        if job.state == 'DONE':
            if job.error_result:
                raise RuntimeError(job.errors)
            return
        time.sleep(1)


load_data_from_gcs(dataset_name=DATASET_NAME,
                   table_name=TABLE_NAME,
                   source=SOURCE,
                   schema=SCHEMA)