Google cloud storage 如何使用CDAP创建从BigQuery到Google存储的ETL?

Google cloud storage 如何使用CDAP创建从BigQuery到Google存储的ETL?,google-cloud-storage,google-cloud-data-fusion,cdap,google-bigquery,Google Cloud Storage,Google Cloud Data Fusion,Cdap,Google Bigquery,我正在Google云环境中设置CDAP,但在执行以下管道时遇到问题:在BigQuery上运行查询,并将结果保存在Google Storage上的CSV文件中 我的过程是: 使用Google Marketplace上的安装CDAP 构建以下管道: 凭证密钥具有所有者权限,我可以使用“预览”选项访问查询结果 管道结果: 档案: _成功(空) 第r部分-00000(查询结果) 没有生成csv文件,我也找不到可以在CDAP中为输出文件设置名称的位置。我是否错过了任何配置步骤 更新: 我们最终放弃了C

我正在Google云环境中设置CDAP,但在执行以下管道时遇到问题:在BigQuery上运行查询,并将结果保存在Google Storage上的CSV文件中

我的过程是:

  • 使用Google Marketplace上的安装CDAP

  • 构建以下管道:

  • 凭证密钥具有所有者权限,我可以使用“预览”选项访问查询结果

    管道结果:

    档案:

    • _成功(空)
    • 第r部分-00000(查询结果)
    没有生成csv文件,我也找不到可以在CDAP中为输出文件设置名称的位置。我是否错过了任何配置步骤

    更新:
    我们最终放弃了CDAP,我们正在使用Google数据流。

    在管道中配置GCS接收器时,有一个“格式”字段,您已将其设置为JSON。您可以将其设置为CSV以实现所需的格式

    我猜你想把一个完整的表提取到存储中,不是吗?没有具体的查询。你在bigquery日志中看到什么了吗?part-r-00000中的数据是json格式的吗?@AliAnwar是的,是的。
    {
        "artifact": {
            "name": "cdap-data-pipeline",
            "version": "6.0.0",
            "scope": "SYSTEM"
        },
        "description": "Data Pipeline Application",
        "name": "cdap_dsc_test",
        "config": {
            "resources": {
                "memoryMB": 2048,
                "virtualCores": 1
            },
            "driverResources": {
                "memoryMB": 2048,
                "virtualCores": 1
            },
            "connections": [
                {
                    "from": "BigQuery",
                    "to": "Google Cloud Storage"
                }
            ],
            "comments": [],
            "postActions": [],
            "properties": {},
            "processTimingEnabled": true,
            "stageLoggingEnabled": true,
            "stages": [
                {
                    "name": "BigQuery",
                    "plugin": {
                        "name": "BigQueryTable",
                        "type": "batchsource",
                        "label": "BigQuery",
                        "artifact": {
                            "name": "google-cloud",
                            "version": "0.12.2",
                            "scope": "SYSTEM"
                        },
                        "properties": {
                            "project": "bi-data-science",
                            "serviceFilePath": "/home/ubuntu/bi-data-science-cdap-4cbf526de374.json",
                            "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"destination_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"double\",\"null\"]},{\"name\":\"desktop\",\"type\":[\"double\",\"null\"]},{\"name\":\"tablet\",\"type\":[\"double\",\"null\"]},{\"name\":\"mobile\",\"type\":[\"double\",\"null\"]}]}",
                            "referenceName": "test_tables",
                            "dataset": "google_trends",
                            "table": "devices"
                        }
                    },
                    "outputSchema": [
                        {
                            "name": "etlSchemaBody",
                            "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"destination_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"double\",\"null\"]},{\"name\":\"desktop\",\"type\":[\"double\",\"null\"]},{\"name\":\"tablet\",\"type\":[\"double\",\"null\"]},{\"name\":\"mobile\",\"type\":[\"double\",\"null\"]}]}"
                        }
                    ]
                },
                {
                    "name": "Google Cloud Storage",
                    "plugin": {
                        "name": "GCS",
                        "type": "batchsink",
                        "label": "Google Cloud Storage",
                        "artifact": {
                            "name": "google-cloud",
                            "version": "0.12.2",
                            "scope": "SYSTEM"
                        },
                        "properties": {
                            "project": "bi-data-science",
                            "suffix": "yyyy-MM-dd",
                            "format": "json",
                            "serviceFilePath": "/home/ubuntu/bi-data-science-cdap-4cbf526de374.json",
                            "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"destination_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"double\",\"null\"]},{\"name\":\"desktop\",\"type\":[\"double\",\"null\"]},{\"name\":\"tablet\",\"type\":[\"double\",\"null\"]},{\"name\":\"mobile\",\"type\":[\"double\",\"null\"]}]}",
                            "delimiter": ",",
                            "referenceName": "gcs_cdap",
                            "path": "gs://hurb_sandbox/cdap_experiments/"
                        }
                    },
                    "outputSchema": [
                        {
                            "name": "etlSchemaBody",
                            "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"destination_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"double\",\"null\"]},{\"name\":\"desktop\",\"type\":[\"double\",\"null\"]},{\"name\":\"tablet\",\"type\":[\"double\",\"null\"]},{\"name\":\"mobile\",\"type\":[\"double\",\"null\"]}]}"
                        }
                    ],
                    "inputSchema": [
                        {
                            "name": "BigQuery",
                            "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"destination_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"double\",\"null\"]},{\"name\":\"desktop\",\"type\":[\"double\",\"null\"]},{\"name\":\"tablet\",\"type\":[\"double\",\"null\"]},{\"name\":\"mobile\",\"type\":[\"double\",\"null\"]}]}"
                        }
                    ]
                }
            ],
            "schedule": "0 * * * *",
            "engine": "mapreduce",
            "numOfRecordsPreview": 100,
            "description": "Data Pipeline Application",
            "maxConcurrentRuns": 1
        }
    }