Google bigquery BigQuery:Dataset";“未找到”;使用restapi加载表
我正在尝试使用python脚本将CSV文件加载到BigQuery,该脚本以python示例代码为模型,如下所示: 但是,当我尝试用REST API加载表时,遇到了以下错误:Google bigquery BigQuery:Dataset";“未找到”;使用restapi加载表,google-bigquery,Google Bigquery,我正在尝试使用python脚本将CSV文件加载到BigQuery,该脚本以python示例代码为模型,如下所示: 但是,当我尝试用REST API加载表时,遇到了以下错误: {'status': '200', 'content-length': '1492', 'expires': 'Fri, 01 Jan 1990 00:00:00 GMT', 'server': 'HTTP Upload Server Built on Jun 14 2012 02:12:09 (1339665129)',
{'status': '200', 'content-length': '1492', 'expires': 'Fri, 01 Jan 1990 00:00:00 GMT', 'server': 'HTTP Upload Server Built on Jun 14 2012 02:12:09 (1339665129)', 'etag': '"tcivyOj9QvKAbuEJ5MEMf9we85w/-mxYhUDjvvydxcebR8fXI6l_5RQ"', 'pragma': 'no-cache', 'cache-control': 'no-cache, no-store, must-revalidate', 'date': 'Fri, 06 Jul 2012 22:30:55 GMT', 'content-type': 'application/json'}
{
"kind": "bigquery#job",
"etag": "\"tcivyOj9QvKAbuEJ5MEMf9we85w/-mxYhUDjvvydxcebR8fXI6l_5RQ\"",
"id": "firespotter.com:firespotter:job_d6b99265278b4c0da9c3033acf39d6b2",
"selfLink": "https://www.googleapis.com/bigquery/v2/projects/firespotter.com:firespotter/jobs/job_d6b99265278b4c0da9c3033acf39d6b2",
"jobReference": {
"projectId": "firespotter.com:firespotter",
"jobId": "job_d6b99265278b4c0da9c3033acf39d6b2"
},
"configuration": {
"load": {
"schema": {
"fields": [
{
"name": "date",
"type": "STRING"
},
{
"name": "time",
"type": "STRING"
},
{
"name": "call_uuid",
"type": "STRING"
},
{
"name": "log_level",
"type": "STRING"
},
{
"name": "file_line",
"type": "STRING"
},
{
"name": "message",
"type": "STRING"
}
]
},
"destinationTable": {
"projectId": "385479794093",
"datasetId": "telephony_logs",
"tableId": "table_name"
},
"createDisposition": "CREATE_IF_NEEDED",
"writeDisposition": "WRITE_TRUNCATE",
"encoding": "UTF-8"
}
},
"status": {
"state": "DONE",
"errorResult": {
"reason": "notFound",
"message": "Not Found: Dataset 385479794093:telephony_logs"
},
"errors": [
{
"reason": "notFound",
"message": "Not Found: Dataset 385479794093:telephony_logs"
}
]
}
}
错误“385479794093”中列出的projectId不是我传入的projectId,而是“projectnumber”。projectId应该是“firespotter.com:firespotter”:
当我在三个不同的地方传递正确的值时,为什么restapi坚持提供自己不正确的projectId?是否还有其他地方需要我传入或设置项目ID
以下是相关的代码片段供参考:
PROJECT = 'firespotter.com:firespotter'
DATASET = 'telephony_logs'
FLOW = OAuth2WebServerFlow(
client_id='385479794093.apps.googleusercontent.com',
client_secret='<a_secret_here>',
scope='https://www.googleapis.com/auth/bigquery',
user_agent='firespotter-upload-script/1.0')
def loadTable(http, projectId, datasetId, tableId, file_path, replace=False):
url = "https://www.googleapis.com/upload/bigquery/v2/projects/" + projectId + "/jobs"
# Create the body of the request, separated by a boundary of xxx
mime_data = ('--xxx\n' +
'Content-Type: application/json; charset=UTF-8\n' + '\n' +
'{\n' +
' "projectId": "' + projectId + '",\n' +
' "configuration": {\n' +
' "load": {\n' +
' "schema": {\n' +
' "fields": [\n' +
' {"name":"date", "type":"STRING"},\n' +
' {"name":"time", "type":"STRING"},\n' +
' {"name":"call_uuid", "type":"STRING"},\n' +
' {"name":"log_level", "type":"STRING"},\n' +
' {"name":"file_line", "type":"STRING"},\n' +
' {"name":"message", "type":"STRING"}\n' +
' ]\n' +
' },\n' +
' "destinationTable": {\n' +
' "projectId": "' + projectId + '",\n' +
' "datasetId": "' + datasetId + '",\n' +
' "tableId": "' + tableId + '"\n' +
' },\n' +
' "createDisposition": "CREATE_IF_NEEDED",\n' +
' "writeDisposition": "' + ('WRITE_TRUNCATE' if replace else 'WRITE_APPEND') + '",\n' +
' "encoding": "UTF-8"\n' +
' }\n' +
' }\n' +
'}\n' +
'--xxx\n' +
'Content-Type: application/octet-stream\n' +
'\n')
# Append data from the specified file to the request body
f = open(file_path, 'r')
header_line = f.readline() # skip header line
mime_data += f.read()
# Signify the end of the body
mime_data += ('--xxx--\n')
headers = {'Content-Type': 'multipart/related; boundary=xxx'}
resp, content = http.request(url, method="POST", body=mime_data, headers=headers)
print str(resp) + "\n"
print content
# --- Main ----------------------------------------------
def main(argv):
csv_path = args[0]
# If the credentials don't exist or are invalid, run the native client
# auth flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = Storage('bigquery2_credentials.dat') # Choose a file name to store the credentials.
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run(FLOW, storage)
# Create an httplib2.Http object to handle our HTTP requests and authorize it
# with our good credentials.
http = httplib2.Http()
http = credentials.authorize(http)
loadTable(http, PROJECT, DATASET, 'table_name', csv_path, replace=True)
if __name__ == '__main__':
main(sys.argv)
PROJECT='firespotter.com:firespotter'
数据集='telephony_logs'
FLOW=OAuth2WebServerFlow(
client_id='385479794093.apps.googleusercontent.com',
客户_机密=“”,
范围=https://www.googleapis.com/auth/bigquery',
用户(agent='firespotter-upload-script/1.0')
def loadTable(http、projectId、datasetId、tableId、file_path、replace=False):
url=”https://www.googleapis.com/upload/bigquery/v2/projects/“+projectId+”/jobs”
#创建请求主体,以xxx为边界分隔
mime_数据=('-xxx\n'+
'内容类型:application/json;charset=UTF-8\n'+'\n'+
“{\n”+
““projectId”:“+projectId+”,\n”+
““配置”:{\n”+
““加载”:{\n”+
““架构”:{\n”+
““字段”:[\n”+
“{”名称“:”日期“,”类型“:”字符串“}\n”+
“{”name:“time”,“type:“STRING”}\n”+
“{”name:“call_uuid”,“type:“STRING”}\n”+
{“名称”:“日志级别”,“类型”:“字符串”}\n+
“{”名称“:”文件行“,”类型“:”字符串“},\n”+
“{”名称“:”消息“,”类型“:”字符串“}\n”+
“]\n”+
“},\n”+
““destinationTable”:{\n”+
““projectId”:“+projectId+”,\n”+
““datasetId”:“+datasetId+”,\n”+
““tableId”:“'+tableId+”\n”+
“},\n”+
““createDisposition”:“如果需要,则创建”,\n”+
““writeDisposition”:“+”(“WRITE_TRUNCATE”if replace else“WRITE_APPEND”)+”,\n”+
““编码”:“UTF-8”\n”+
“}\n”+
“}\n”+
“}\n”+
“--xxx\n”+
'内容类型:应用程序/八位字节流\n'+
“\n”)
#将指定文件中的数据追加到请求正文
f=打开(文件路径“r”)
header_line=f.readline()#跳过页眉行
mime_data+=f.read()
#表示身体的结束
mime_data+=('--xxx--\n')
headers={'Content-Type':'multipart/related;boundary=xxx'}
resp,content=http.request(url,method=“POST”,body=mime\u data,headers=headers)
打印str(resp)+“\n”
印刷内容
#---梅因----------------------------------------------
def总管(argv):
csv_path=args[0]
#如果凭据不存在或无效,请运行本机客户端
#验证流。存储对象将确保,如果成功,则
#凭据将被写回文件。
storage=storage('bigquery2_credentials.dat')#选择一个文件名来存储凭据。
凭据=存储。获取()
如果凭据为无或凭据无效:
凭据=运行(流、存储)
#创建一个httplib2.Http对象来处理我们的Http请求并对其进行授权
#以我们良好的信誉。
http=httplib2.http()
http=凭据。授权(http)
loadTable(http、项目、数据集、“表名”、csv路径,replace=True)
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu':
主(系统argv)
这里有几个问题:
- 为什么我的加载作业失败?只是想检查一下,这是您发送的全部请求吗?如果是,则看起来没有要加载的数据,即
为空。如果是这样,那就是问题所在,我们显然返回了世界上最糟糕的错误消息sourceUris
- 为什么使用数字项目ID?BigQuery交替使用项目名称和关联的数字ID,因此您看到的是,我们倾向于将项目名称转换为ID。只是确认一下,如果您访问并查找您的项目,您是否在url中看到相同的数字ID
- 为什么要在多个位置指定项目ID?首先,您似乎将项目ID指定为作业中的顶级属性;这应该没有必要。(我怀疑它只是覆盖了您在作业引用本身中指定的任何项目ID。)这就留下了两个位置——一个作为作业引用的一部分,另一个作为表引用的一部分。这实际上意味着两件不同的事情——作业中的一件指定要将作业插入哪个项目,即谁为作业付费;表中的一件指定结果表所在的项目,即谁拥有结果数据。一般来说,它们是相同的,但API允许它们是不同的。(例如,如果您构建了一个需要将数据插入到最终由客户拥有的表中的服务,这可能会很有用。)
PROJECT = 'firespotter.com:firespotter'
DATASET = 'telephony_logs'
FLOW = OAuth2WebServerFlow(
client_id='385479794093.apps.googleusercontent.com',
client_secret='<a_secret_here>',
scope='https://www.googleapis.com/auth/bigquery',
user_agent='firespotter-upload-script/1.0')
def loadTable(http, projectId, datasetId, tableId, file_path, replace=False):
url = "https://www.googleapis.com/upload/bigquery/v2/projects/" + projectId + "/jobs"
# Create the body of the request, separated by a boundary of xxx
mime_data = ('--xxx\n' +
'Content-Type: application/json; charset=UTF-8\n' + '\n' +
'{\n' +
' "projectId": "' + projectId + '",\n' +
' "configuration": {\n' +
' "load": {\n' +
' "schema": {\n' +
' "fields": [\n' +
' {"name":"date", "type":"STRING"},\n' +
' {"name":"time", "type":"STRING"},\n' +
' {"name":"call_uuid", "type":"STRING"},\n' +
' {"name":"log_level", "type":"STRING"},\n' +
' {"name":"file_line", "type":"STRING"},\n' +
' {"name":"message", "type":"STRING"}\n' +
' ]\n' +
' },\n' +
' "destinationTable": {\n' +
' "projectId": "' + projectId + '",\n' +
' "datasetId": "' + datasetId + '",\n' +
' "tableId": "' + tableId + '"\n' +
' },\n' +
' "createDisposition": "CREATE_IF_NEEDED",\n' +
' "writeDisposition": "' + ('WRITE_TRUNCATE' if replace else 'WRITE_APPEND') + '",\n' +
' "encoding": "UTF-8"\n' +
' }\n' +
' }\n' +
'}\n' +
'--xxx\n' +
'Content-Type: application/octet-stream\n' +
'\n')
# Append data from the specified file to the request body
f = open(file_path, 'r')
header_line = f.readline() # skip header line
mime_data += f.read()
# Signify the end of the body
mime_data += ('--xxx--\n')
headers = {'Content-Type': 'multipart/related; boundary=xxx'}
resp, content = http.request(url, method="POST", body=mime_data, headers=headers)
print str(resp) + "\n"
print content
# --- Main ----------------------------------------------
def main(argv):
csv_path = args[0]
# If the credentials don't exist or are invalid, run the native client
# auth flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = Storage('bigquery2_credentials.dat') # Choose a file name to store the credentials.
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run(FLOW, storage)
# Create an httplib2.Http object to handle our HTTP requests and authorize it
# with our good credentials.
http = httplib2.Http()
http = credentials.authorize(http)
loadTable(http, PROJECT, DATASET, 'table_name', csv_path, replace=True)
if __name__ == '__main__':
main(sys.argv)