Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/rest/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python beam.error.RuntimeValueProviderError:RuntimeValueProvider(选项:output\u path,类型:str,默认值:None)。未从运行时上下文调用get()_Python_Google Cloud Dataflow_Apache Beam - Fatal编程技术网

Python beam.error.RuntimeValueProviderError:RuntimeValueProvider(选项:output\u path,类型:str,默认值:None)。未从运行时上下文调用get()

Python beam.error.RuntimeValueProviderError:RuntimeValueProvider(选项:output\u path,类型:str,默认值:None)。未从运行时上下文调用get(),python,google-cloud-dataflow,apache-beam,Python,Google Cloud Dataflow,Apache Beam,尝试在dataflow pubsub to gcs作业中运行值提供程序时遇到错误 import argparse import logging import random from datetime import datetime import apache_beam as beam from apache_beam import DoFn, GroupByKey, io, ParDo, Pipeline, PTransform, WindowInto, WithKeys from apach

尝试在dataflow pubsub to gcs作业中运行值提供程序时遇到错误

import argparse
import logging
import random
from datetime import datetime

import apache_beam as beam
from apache_beam import DoFn, GroupByKey, io, ParDo, Pipeline, PTransform, WindowInto, WithKeys
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.transforms.window import FixedWindows

class CustomPipelineOptions(PipelineOptions):

    @classmethod
    def _add_argparse_args(cls, parser):
        parser.add_value_provider_argument(
            "--output_path",
            type=str,
            help="Path of the output GCS file including the prefix.",
        )

class WriteToGCS(DoFn):
    def __init__(self, output_path):
        self.output_path = output_path

    def process(self, key_value, window=DoFn.WindowParam):
        """Write messages in a batch to Google Cloud Storage."""

        ts_format = "%H:%M"
        window_start = window.start.to_utc_datetime().strftime(ts_format)
        window_end = window.end.to_utc_datetime().strftime(ts_format)
        shard_id, batch = key_value
        seq = self.output_path, window_start, window_end, str(shard_id)
        filename = "-".join(seq)

        with io.gcsio.GcsIO().open(filename=filename, mode="w") as f:
            for message_body in batch:
                f.write("{}\n".format(message_body).encode("utf-8"))

def run(input_topic, num_shards, window_size):


    # Set `save_main_session` to True so DoFns can access globally imported modules.
    pipeline_options = PipelineOptions(
        pipeline_args, streaming=True, save_main_session=True
    )

    custom_options = pipeline_options.view_as(CustomPipelineOptions)
    
    output_path = custom_options.output_path.get()
    
    with Pipeline(options=pipeline_options) as pipeline:
        (
            pipeline
            | "Read from Pub/Sub" >> io.ReadFromPubSub(topic=input_topic)
            | "Write to GCS" >> ParDo(WriteToGCS(custom_options.output_path))
        )

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    
    parser.add_argument(
            "--input_topic",
            help="The Cloud Pub/Sub topic to read from."
            '"projects/<PROJECT_ID>/topics/<TOPIC_ID>".',
        )
    parser.add_argument(
            "--num_shards",
            default=5,
            type=int,
            help="Number of shards to use when writing windowed elements to GCS.",
        )
    parser.add_argument(
            "--window_size",
            default=1,
            type=int,
            help="Output file's window size in minutes.",
        )
    known_args, pipeline_args = parser.parse_known_args()

    run(
        known_args.input_topic,
        known_args.num_shards,
        known_args.window_size
        )
import argparse
导入日志记录
随机输入
从日期时间导入日期时间
将apache_梁作为梁导入
从apache_beam导入DoFn、GroupByKey、io、ParDo、Pipeline、PTransform、WindowInto、withkey
从apache_beam.options.pipeline_options导入PipelineOptions
从apache_beam.transforms.window导入FixedWindows
类CustomPipelineOptions(PipelineOptions):
@类方法
def_add_argparse_args(cls,解析器):
parser.add_value_provider_参数(
“--输出路径”,
类型=str,
help=“输出GCS文件的路径,包括前缀。”,
)
类写入GCS(DoFn):
定义初始化(自,输出路径):
self.output\u path=输出路径
def进程(自身,键值,窗口=DoFn.WindowParam):
“”“将一批邮件写入Google云存储。”“”
ts_format=“%H:%M”
window\u start=window.start.to\u utc\u datetime().strftime(ts\U格式)
window\u end=window.end.to\u utc\u datetime().strftime(ts\U格式)
分片id,批处理=键值
seq=self.output\u path、window\u start、window\u end、str(shard\u id)
filename=“-”.join(seq)
将io.gcsio.gcsio()打开(filename=filename,mode=“w”)作为f:
对于批处理的消息体:
f、 写入(“{}\n”.format(消息体).encode(“utf-8”))
def运行(输入主题、数量碎片、窗口大小):
#将“save_main_session”设置为True,以便DOFN可以访问全局导入的模块。
管道选项=管道选项(
管道参数,流=True,保存主会话=True
)
自定义\选项=管道\选项。将\视为(CustomPipelineOptions)
output\u path=custom\u options.output\u path.get()
将管道(选项=管道选项)作为管道:
(
管道
|“从发布/订阅中读取”>>io.ReadFromPubSub(主题=输入\主题)
|“写入GCS”>>ParDo(写入GCS(自定义选项。输出路径))
)
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu':
parser=argparse.ArgumentParser()
parser.add_参数(
“--输入主题”,
help=“要读取的云发布/子主题。”
““项目//主题/”,
)
parser.add_参数(
“--num_碎片”,
默认值=5,
类型=int,
help=“将窗口元素写入GCS时要使用的碎片数。”,
)
parser.add_参数(
“--窗口大小”,
默认值=1,
类型=int,
help=“输出文件的窗口大小(分钟)。”,
)
已知参数,管道参数=解析器。解析已知参数()
跑(
已知参数输入主题,
已知的_args.num_碎片,
已知参数窗口大小
)
错误跟踪:

回溯(最近一次呼叫最后一次):
文件“first.py”,第107行,大小已知
文件“first.py”,第68行,在runoutput\u path=custom\u options.output\u path.get()中
文件“/home/env/lib/python3.7/site packages/apache_beam/options/value_provider.py”,第125行,在get“%s”中。get()未从运行时上下文“%self”调用
apache_beam.error.RuntimeValueProviderError:RuntimeValueProvider(选项:output_path,类型:str,默认值:None)。未从运行时上下文调用get()

正如回溯所指出的,问题在于
输出路径=自定义选项。输出路径。get()

此时无法访问运行时值提供程序,因为这是在管道构造时。您只能在管道运行时访问它(即在
WriteToGCS
类中)


删除该行,并确保仅在管道运行时函数(例如,在DoFn内部)中调用get on
RuntimeValueProvider
s)。

错误是什么?您可以编辑您的帖子并添加完整的错误消息吗?回溯(最近一次调用):文件“first.py”,第107行,在已知的_args.window_大小文件“first.py”中,第68行,在运行输出路径=自定义选项。输出路径。get()文件“/home/env/lib/python3.7/site packages/apache_beam/options/value_provider.py”,第125行,在get“%s.get”中()未从运行时上下文“%self”调用apache_beam.error.RuntimeValueProviderError:RuntimeValueProvider(选项:output_path,类型:str,默认值:None)。get()未从运行时上下文调用尝试此操作时,我遇到不同的错误