Google bigquery BigQuery:Dataset"；“未找到”；使用restapi加载表_Google Bigquery

Google bigquery BigQuery:Dataset"；“未找到”；使用restapi加载表

google-bigquery

Google bigquery BigQuery:Dataset"；“未找到”；使用restapi加载表,google-bigquery,Google Bigquery,我正在尝试使用python脚本将CSV文件加载到BigQuery，该脚本以python示例代码为模型，如下所示：但是，当我尝试用REST API加载表时，遇到了以下错误： {'status': '200', 'content-length': '1492', 'expires': 'Fri, 01 Jan 1990 00:00:00 GMT', 'server': 'HTTP Upload Server Built on Jun 14 2012 02:12:09 (1339665129)',

我正在尝试使用python脚本将CSV文件加载到BigQuery，该脚本以python示例代码为模型，如下所示：

但是，当我尝试用REST API加载表时，遇到了以下错误：

{'status': '200', 'content-length': '1492', 'expires': 'Fri, 01 Jan 1990 00:00:00 GMT', 'server': 'HTTP Upload Server Built on Jun 14 2012 02:12:09 (1339665129)', 'etag': '"tcivyOj9QvKAbuEJ5MEMf9we85w/-mxYhUDjvvydxcebR8fXI6l_5RQ"', 'pragma': 'no-cache', 'cache-control': 'no-cache, no-store, must-revalidate', 'date': 'Fri, 06 Jul 2012 22:30:55 GMT', 'content-type': 'application/json'}

{
 "kind": "bigquery#job",
 "etag": "\"tcivyOj9QvKAbuEJ5MEMf9we85w/-mxYhUDjvvydxcebR8fXI6l_5RQ\"",
 "id": "firespotter.com:firespotter:job_d6b99265278b4c0da9c3033acf39d6b2",
 "selfLink": "https://www.googleapis.com/bigquery/v2/projects/firespotter.com:firespotter/jobs/job_d6b99265278b4c0da9c3033acf39d6b2",
 "jobReference": {
  "projectId": "firespotter.com:firespotter",
  "jobId": "job_d6b99265278b4c0da9c3033acf39d6b2"
 },
 "configuration": {
  "load": {
   "schema": {
    "fields": [
     {
      "name": "date",
      "type": "STRING"
     },
     {
      "name": "time",
      "type": "STRING"
     },
     {
      "name": "call_uuid",
      "type": "STRING"
     },
     {
      "name": "log_level",
      "type": "STRING"
     },
     {
      "name": "file_line",
      "type": "STRING"
     },
     {
      "name": "message",
      "type": "STRING"
     }
    ]
   },
   "destinationTable": {
    "projectId": "385479794093",
    "datasetId": "telephony_logs",
    "tableId": "table_name"
   },
   "createDisposition": "CREATE_IF_NEEDED",
   "writeDisposition": "WRITE_TRUNCATE",
   "encoding": "UTF-8"
  }
 },
 "status": {
  "state": "DONE",
  "errorResult": {
   "reason": "notFound",
   "message": "Not Found: Dataset 385479794093:telephony_logs"
  },
  "errors": [
   {
    "reason": "notFound",
    "message": "Not Found: Dataset 385479794093:telephony_logs"
   }
  ]
 }
}

错误“385479794093”中列出的projectId不是我传入的projectId，而是“projectnumber”。projectId应该是“firespotter.com:firespotter”：

当我在三个不同的地方传递正确的值时，为什么restapi坚持提供自己不正确的projectId？是否还有其他地方需要我传入或设置项目ID

以下是相关的代码片段供参考：

PROJECT = 'firespotter.com:firespotter'
DATASET = 'telephony_logs'


FLOW = OAuth2WebServerFlow(
    client_id='385479794093.apps.googleusercontent.com',
    client_secret='<a_secret_here>',
    scope='https://www.googleapis.com/auth/bigquery',
    user_agent='firespotter-upload-script/1.0')

def loadTable(http, projectId, datasetId, tableId, file_path, replace=False):
  url = "https://www.googleapis.com/upload/bigquery/v2/projects/" + projectId + "/jobs"
  # Create the body of the request, separated by a boundary of xxx
  mime_data = ('--xxx\n' +
            'Content-Type: application/json; charset=UTF-8\n' + '\n' +
            '{\n' +
            '   "projectId": "' + projectId + '",\n' +
            '   "configuration": {\n' +
            '     "load": {\n' +
            '       "schema": {\n' +
            '         "fields": [\n' +
            '          {"name":"date", "type":"STRING"},\n' +
            '          {"name":"time", "type":"STRING"},\n' +
            '          {"name":"call_uuid", "type":"STRING"},\n' +
            '          {"name":"log_level", "type":"STRING"},\n' +
            '          {"name":"file_line", "type":"STRING"},\n' +
            '          {"name":"message", "type":"STRING"}\n' +
            '        ]\n' +
            '      },\n' +
            '      "destinationTable": {\n' +
            '        "projectId": "' + projectId + '",\n' +
            '        "datasetId": "' + datasetId + '",\n' +
            '        "tableId": "' + tableId + '"\n' +
            '      },\n' +
            '     "createDisposition": "CREATE_IF_NEEDED",\n' +
            '     "writeDisposition": "' + ('WRITE_TRUNCATE' if replace else 'WRITE_APPEND') + '",\n' +
            '     "encoding": "UTF-8"\n' +
            '    }\n' +
            '  }\n' +
            '}\n' +
            '--xxx\n' +
            'Content-Type: application/octet-stream\n' +
            '\n')
  # Append data from the specified file to the request body
  f = open(file_path, 'r')
  header_line = f.readline()  # skip header line
  mime_data += f.read()

  # Signify the end of the body
  mime_data += ('--xxx--\n')

  headers = {'Content-Type': 'multipart/related; boundary=xxx'}
  resp, content = http.request(url, method="POST", body=mime_data, headers=headers)
  print str(resp) + "\n"
  print content

# --- Main ----------------------------------------------
def main(argv):

  csv_path = args[0]

  # If the credentials don't exist or are invalid, run the native client
  # auth flow. The Storage object will ensure that if successful the good
  # credentials will get written back to a file.
  storage = Storage('bigquery2_credentials.dat') # Choose a file name to store the credentials.
  credentials = storage.get()
  if credentials is None or credentials.invalid:
    credentials = run(FLOW, storage)

  # Create an httplib2.Http object to handle our HTTP requests and authorize it
  # with our good credentials.
  http = httplib2.Http()
  http = credentials.authorize(http)

  loadTable(http, PROJECT, DATASET, 'table_name', csv_path, replace=True)

if __name__ == '__main__':
  main(sys.argv)

PROJECT='firespotter.com:firespotter'
数据集='telephony_logs'
FLOW=OAuth2WebServerFlow(
client_id='385479794093.apps.googleusercontent.com'，
客户_机密=“”，
范围=https://www.googleapis.com/auth/bigquery',
用户（agent='firespotter-upload-script/1.0'）
def loadTable（http、projectId、datasetId、tableId、file_path、replace=False）：
url=”https://www.googleapis.com/upload/bigquery/v2/projects/“+projectId+”/jobs”
#创建请求主体，以xxx为边界分隔
mime_数据=（'-xxx\n'+
'内容类型：application/json；charset=UTF-8\n'+'\n'+
“{\n”+
““projectId”：“+projectId+”，\n”+
““配置”：{\n”+
““加载”：{\n”+
““架构”：{\n”+
““字段”：[\n”+
“{”名称“：”日期“，”类型“：”字符串“}\n”+
“{”name:“time”，“type:“STRING”}\n”+
“{”name:“call_uuid”，“type:“STRING”}\n”+
{“名称”：“日志级别”，“类型”：“字符串”}\n+
“{”名称“：”文件行“，”类型“：”字符串“}，\n”+
“{”名称“：”消息“，”类型“：”字符串“}\n”+
“]\n”+
“}，\n”+
““destinationTable”：{\n”+
““projectId”：“+projectId+”，\n”+
““datasetId”：“+datasetId+”，\n”+
““tableId”：“'+tableId+”\n”+
“}，\n”+
““createDisposition”：“如果需要，则创建”，\n”+
““writeDisposition”：“+”（“WRITE_TRUNCATE”if replace else“WRITE_APPEND”）+”，\n”+
““编码”：“UTF-8”\n”+
“}\n”+
“}\n”+
“}\n”+
“--xxx\n”+
'内容类型：应用程序/八位字节流\n'+
“\n”）
#将指定文件中的数据追加到请求正文
f=打开（文件路径“r”）
header_line=f.readline（）#跳过页眉行
mime_data+=f.read（）
#表示身体的结束
mime_data+=（'--xxx--\n'）
headers={'Content-Type'：'multipart/related；boundary=xxx'}
resp，content=http.request（url，method=“POST”，body=mime\u data，headers=headers）
打印str（resp）+“\n”
印刷内容
#---梅因----------------------------------------------
def总管（argv）：
csv_path=args[0]
#如果凭据不存在或无效，请运行本机客户端
#验证流。存储对象将确保，如果成功，则
#凭据将被写回文件。
storage=storage（'bigquery2_credentials.dat'）#选择一个文件名来存储凭据。
凭据=存储。获取（）
如果凭据为无或凭据无效：
凭据=运行（流、存储）
#创建一个httplib2.Http对象来处理我们的Http请求并对其进行授权
#以我们良好的信誉。
http=httplib2.http（）
http=凭据。授权（http）
loadTable（http、项目、数据集、“表名”、csv路径，replace=True）
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu'：
主（系统argv）

这里有几个问题：

为什么我的加载作业失败？只是想检查一下，这是您发送的全部请求吗？如果是，则看起来没有要加载的数据，即
```
sourceUris
```
为空。如果是这样，那就是问题所在，我们显然返回了世界上最糟糕的错误消息
为什么使用数字项目ID？BigQuery交替使用项目名称和关联的数字ID，因此您看到的是，我们倾向于将项目名称转换为ID。只是确认一下，如果您访问并查找您的项目，您是否在url中看到相同的数字ID
为什么要在多个位置指定项目ID？首先，您似乎将项目ID指定为作业中的顶级属性；这应该没有必要。（我怀疑它只是覆盖了您在作业引用本身中指定的任何项目ID。）这就留下了两个位置——一个作为作业引用的一部分，另一个作为表引用的一部分。这实际上意味着两件不同的事情——作业中的一件指定要将作业插入哪个项目，即谁为作业付费；表中的一件指定结果表所在的项目，即谁拥有结果数据。一般来说，它们是相同的，但API允许它们是不同的。（例如，如果您构建了一个需要将数据插入到最终由客户拥有的表中的服务，这可能会很有用。）

您最近是否将项目id设置为firespotter.com:firespotter？如果数据集是在项目命名之前创建的，则旧项目id和新项目id之间将不匹配。有一个自动系统可以更新项目ID，但是它可能还没有运行或者有问题（我现在正在度假，所以无法检查）。希望，如果你很快再试一次，它能正常工作。如果没有，请告诉我们。

我正在将csv文件附加到MIME数据。我能够通过浏览器工具手动上传相同的CSV文件，在返回的结果中，数据集被列为“fir”

PROJECT = 'firespotter.com:firespotter'
DATASET = 'telephony_logs'


FLOW = OAuth2WebServerFlow(
    client_id='385479794093.apps.googleusercontent.com',
    client_secret='<a_secret_here>',
    scope='https://www.googleapis.com/auth/bigquery',
    user_agent='firespotter-upload-script/1.0')

def loadTable(http, projectId, datasetId, tableId, file_path, replace=False):
  url = "https://www.googleapis.com/upload/bigquery/v2/projects/" + projectId + "/jobs"
  # Create the body of the request, separated by a boundary of xxx
  mime_data = ('--xxx\n' +
            'Content-Type: application/json; charset=UTF-8\n' + '\n' +
            '{\n' +
            '   "projectId": "' + projectId + '",\n' +
            '   "configuration": {\n' +
            '     "load": {\n' +
            '       "schema": {\n' +
            '         "fields": [\n' +
            '          {"name":"date", "type":"STRING"},\n' +
            '          {"name":"time", "type":"STRING"},\n' +
            '          {"name":"call_uuid", "type":"STRING"},\n' +
            '          {"name":"log_level", "type":"STRING"},\n' +
            '          {"name":"file_line", "type":"STRING"},\n' +
            '          {"name":"message", "type":"STRING"}\n' +
            '        ]\n' +
            '      },\n' +
            '      "destinationTable": {\n' +
            '        "projectId": "' + projectId + '",\n' +
            '        "datasetId": "' + datasetId + '",\n' +
            '        "tableId": "' + tableId + '"\n' +
            '      },\n' +
            '     "createDisposition": "CREATE_IF_NEEDED",\n' +
            '     "writeDisposition": "' + ('WRITE_TRUNCATE' if replace else 'WRITE_APPEND') + '",\n' +
            '     "encoding": "UTF-8"\n' +
            '    }\n' +
            '  }\n' +
            '}\n' +
            '--xxx\n' +
            'Content-Type: application/octet-stream\n' +
            '\n')
  # Append data from the specified file to the request body
  f = open(file_path, 'r')
  header_line = f.readline()  # skip header line
  mime_data += f.read()

  # Signify the end of the body
  mime_data += ('--xxx--\n')

  headers = {'Content-Type': 'multipart/related; boundary=xxx'}
  resp, content = http.request(url, method="POST", body=mime_data, headers=headers)
  print str(resp) + "\n"
  print content

# --- Main ----------------------------------------------
def main(argv):

  csv_path = args[0]

  # If the credentials don't exist or are invalid, run the native client
  # auth flow. The Storage object will ensure that if successful the good
  # credentials will get written back to a file.
  storage = Storage('bigquery2_credentials.dat') # Choose a file name to store the credentials.
  credentials = storage.get()
  if credentials is None or credentials.invalid:
    credentials = run(FLOW, storage)

  # Create an httplib2.Http object to handle our HTTP requests and authorize it
  # with our good credentials.
  http = httplib2.Http()
  http = credentials.authorize(http)

  loadTable(http, PROJECT, DATASET, 'table_name', csv_path, replace=True)

if __name__ == '__main__':
  main(sys.argv)