Google cloud dataflow span Rio java.lang.IllegalStateException:此处的分类器应为null

Google cloud dataflow span Rio java.lang.IllegalStateException:此处的分类器应为null,google-cloud-dataflow,apache-beam,google-cloud-spanner,Google Cloud Dataflow,Apache Beam,Google Cloud Spanner,我正试图通过使用从数据流作业写入到扳手 <dependency> <groupId>org.apache.beam</groupId> <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId> <version>2.18.0</version> </dependency> 然而,它抛出 java.lang.Illeg

我正试图通过使用从数据流作业写入到扳手

<dependency>
  <groupId>org.apache.beam</groupId>
  <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
  <version>2.18.0</version>
</dependency>
然而,它抛出

java.lang.IllegalStateException: Sorter should be null here
        at org.apache.beam.sdk.io.gcp.spanner.SpannerIO$GatherBundleAndSortFn.startBundle (SpannerIO.java:1080)
出现这种异常的原因是什么

以下管道生成异常。我用20个工人测试了它,但看起来它与数据负载无关

import com.google.cloud.spanner.Mutation;

import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage;
import org.apache.beam.sdk.io.gcp.spanner.SpannerIO;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
import org.apache.beam.sdk.transforms.windowing.Repeatedly;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.joda.time.Duration;

import java.util.UUID;

public final class TestPipeline {

    private static final Duration WINDOW_DURATION = Duration.standardSeconds(1);
    private static final String DATABASE_ID = "test";
    private static final String INSTANCE_ID = "test-spanner";
    private static final String TEST_TABLE = "test";

    public static void main(String[] args) {
        TestPipelineOptions options = PipelineOptionsFactory
                .fromArgs(args)
                .withValidation()
                .as(TestPipelineOptions.class);

        Pipeline pipeline = Pipeline.create(options);

        pipeline
                .apply("Read pubsub", PubsubIO.readMessagesWithAttributes()
                        .fromSubscription(options.getInputSubscription()))
                .apply("Parse message", ParDo.of(new ProcessMessage()))
                .apply("Windowing", Window.<Mutation>into(new GlobalWindows())
                        .triggering(Repeatedly.forever(
                                AfterProcessingTime.pastFirstElementInPane()
                                        .plusDelayOf(WINDOW_DURATION)))
                        .withAllowedLateness(Duration.ZERO)
                        .discardingFiredPanes())
                .apply("Write mutations", SpannerIO.write()
                        .withInstanceId(INSTANCE_ID)
                        .withDatabaseId(DATABASE_ID)
                );

        pipeline.run();
    }

    private static class ProcessMessage extends DoFn<PubsubMessage, Mutation> {

        @ProcessElement
        public void processElement(@Element final PubsubMessage message,
                                   final OutputReceiver<Mutation> out) {
            out.output(Mutation.newInsertOrUpdateBuilder(TEST_TABLE)
                    .set("id").to(UUID.randomUUID().toString())
                    .set("string").to("test")
                    .set("count").to(Long.MAX_VALUE)
                    .build()
            );
        }
    }

    interface TestPipelineOptions extends DataflowPipelineOptions {

        void setInputSubscription(String inputSubscription);

        @Description("Google Pubsub subscription id.")
        String getInputSubscription();
    }

}
import com.google.cloud.panner.Mutation;
导入org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.io.gcp.pubsub.PubsubIO;
导入org.apache.beam.sdk.io.gcp.pubsub.pubsub子消息;
导入org.apache.beam.sdk.io.gcp.span.span;
导入org.apache.beam.sdk.options.Description;
导入org.apache.beam.sdk.options.pipelineoptions工厂;
导入org.apache.beam.sdk.transforms.DoFn;
导入org.apache.beam.sdk.transforms.ParDo;
导入org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
导入org.apache.beam.sdk.transforms.windowing.GlobalWindows;
重复导入org.apache.beam.sdk.transforms.windowing.com;
导入org.apache.beam.sdk.transforms.windowing.Window;
导入org.joda.time.Duration;
导入java.util.UUID;
公共最终类测试管道{
专用静态最终持续时间窗口\u持续时间=持续时间。标准秒(1);
私有静态最终字符串数据库\u ID=“test”;
私有静态最终字符串实例\u ID=“测试扳手”;
专用静态最终字符串测试\u TABLE=“TEST”;
公共静态void main(字符串[]args){
TestPipelineOptions=PipelineOptions工厂
.fromArgs(args)
.withValidation()
.as(TestPipelineOptions.class);
Pipeline=Pipeline.create(选项);
管道
.apply(“Read pubsub”,PubsubIO.readMessagesWithAttributes())
.fromSubscription(options.getInputSubscription())
.apply(“解析消息”,ParDo.of(new ProcessMessage()))
.apply(“窗口化”,Window.into(新的全局窗口())
.触发(反复地、永远地)(
AfterProcessingTime.pastFirstElementInPane()之后
.plusDelayOf(窗口时间)
.允许延迟(持续时间为零)
.discardingFiredPanes())
.apply(“写入突变”,span.Write()
.withInstanceId(实例ID)
.withDatabaseId(数据库ID)
);
pipeline.run();
}
私有静态类ProcessMessage扩展了DoFn{
@过程元素
public void processElement(@Element最终发布子消息消息,
最终输出(接收端输出){
out.output(Mutation.newInsertOrUpdateBuilder(测试表)
.set(“id”).to(UUID.randomUUID().toString())
.将(“字符串”)设置为(“测试”)
.将(“计数”)设置为(长最大值)
.build()
);
}
}
接口TestPipelineOptions扩展了DataflowPipelineOptions{
void setInputSubscription(字符串inputSubscription);
@说明(“Google Pubsub订阅id”)
字符串getInputSubscription();
}
}

Table
创建表测试(id字符串(50)不为NULL,字符串字符串(50)不为NULL,count INT64)主键(id)

此问题似乎在apache beam 2.18版中出现,但在2.17版中没有出现


apache beam 2.18版的问题如下:

多次调用StartBundle时,似乎会引发此异常。如果您能提供一个完整/更大的代码示例,这将非常有用。谢谢。@RoseLiu我已经添加了导致此问题的完整测试管道。感谢您提供完整的代码示例@allen[1]进行了一些实验,发现apache beam 2.18版存在同样的问题,但2.17版却没有。[1] :谢谢你的确认。是的,2.17对我有用。
import com.google.cloud.spanner.Mutation;

import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage;
import org.apache.beam.sdk.io.gcp.spanner.SpannerIO;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
import org.apache.beam.sdk.transforms.windowing.Repeatedly;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.joda.time.Duration;

import java.util.UUID;

public final class TestPipeline {

    private static final Duration WINDOW_DURATION = Duration.standardSeconds(1);
    private static final String DATABASE_ID = "test";
    private static final String INSTANCE_ID = "test-spanner";
    private static final String TEST_TABLE = "test";

    public static void main(String[] args) {
        TestPipelineOptions options = PipelineOptionsFactory
                .fromArgs(args)
                .withValidation()
                .as(TestPipelineOptions.class);

        Pipeline pipeline = Pipeline.create(options);

        pipeline
                .apply("Read pubsub", PubsubIO.readMessagesWithAttributes()
                        .fromSubscription(options.getInputSubscription()))
                .apply("Parse message", ParDo.of(new ProcessMessage()))
                .apply("Windowing", Window.<Mutation>into(new GlobalWindows())
                        .triggering(Repeatedly.forever(
                                AfterProcessingTime.pastFirstElementInPane()
                                        .plusDelayOf(WINDOW_DURATION)))
                        .withAllowedLateness(Duration.ZERO)
                        .discardingFiredPanes())
                .apply("Write mutations", SpannerIO.write()
                        .withInstanceId(INSTANCE_ID)
                        .withDatabaseId(DATABASE_ID)
                );

        pipeline.run();
    }

    private static class ProcessMessage extends DoFn<PubsubMessage, Mutation> {

        @ProcessElement
        public void processElement(@Element final PubsubMessage message,
                                   final OutputReceiver<Mutation> out) {
            out.output(Mutation.newInsertOrUpdateBuilder(TEST_TABLE)
                    .set("id").to(UUID.randomUUID().toString())
                    .set("string").to("test")
                    .set("count").to(Long.MAX_VALUE)
                    .build()
            );
        }
    }

    interface TestPipelineOptions extends DataflowPipelineOptions {

        void setInputSubscription(String inputSubscription);

        @Description("Google Pubsub subscription id.")
        String getInputSubscription();
    }

}