Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/304.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/google-apps-script/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
CoderException:java.io.EOFException在对使用Jackson的CustomCoder编码的Json值执行GroupByKey时_Java_Jackson_Google Cloud Dataflow - Fatal编程技术网

CoderException:java.io.EOFException在对使用Jackson的CustomCoder编码的Json值执行GroupByKey时

CoderException:java.io.EOFException在对使用Jackson的CustomCoder编码的Json值执行GroupByKey时,java,jackson,google-cloud-dataflow,Java,Jackson,Google Cloud Dataflow,为什么在执行以下代码时会出现此EOFEException 我已经成功地在更简单的情况下使用了GroupByKey,我认为触发错误的似乎是使用自定义编码器(用于Json对象)。有人能解释为什么会这样吗 以下是错误: com.google.cloud.dataflow.sdk.Pipeline$PipelineExecutionException: com.google.cloud.dataflow.sdk.coders.CoderException: java.io.EOFException

为什么在执行以下代码时会出现此EOFEException

我已经成功地在更简单的情况下使用了
GroupByKey
,我认为触发错误的似乎是使用自定义编码器(用于Json对象)。有人能解释为什么会这样吗

以下是错误:

com.google.cloud.dataflow.sdk.Pipeline$PipelineExecutionException: com.google.cloud.dataflow.sdk.coders.CoderException: java.io.EOFException

    at com.google.cloud.dataflow.sdk.Pipeline.run(Pipeline.java:186)
    at com.google.cloud.dataflow.sdk.testing.TestPipeline.run(TestPipeline.java:106)
    at com.example.dataflow.TestGroupByKeyCustomCoder.testPipeline(TestGroupByKeyCustomCoder.java:85)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:497)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
    at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:119)
    at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:42)
    at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:234)
    at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:74)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:497)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
Caused by: com.google.cloud.dataflow.sdk.coders.CoderException: java.io.EOFException
    at com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder.decode(BigEndianLongCoder.java:62)
    at com.google.cloud.dataflow.sdk.coders.InstantCoder.decode(InstantCoder.java:83)
    at com.google.cloud.dataflow.sdk.util.WindowedValue$FullWindowedValueCoder.decode(WindowedValue.java:621)
    at com.google.cloud.dataflow.sdk.util.WindowedValue$FullWindowedValueCoder.decode(WindowedValue.java:553)
    at com.google.cloud.dataflow.sdk.coders.KvCoder.decode(KvCoder.java:98)
    at com.google.cloud.dataflow.sdk.coders.KvCoder.decode(KvCoder.java:42)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromSafeStream(CoderUtils.java:157)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray(CoderUtils.java:140)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray(CoderUtils.java:134)
    at com.google.cloud.dataflow.sdk.util.MutationDetectors$CodedValueMutationDetector.<init>(MutationDetectors.java:107)
    at com.google.cloud.dataflow.sdk.util.MutationDetectors.forValueWithCoder(MutationDetectors.java:44)
Caused by: java.io.EOFException
    at java.io.DataInputStream.readFully(DataInputStream.java:197)
    at java.io.DataInputStream.readLong(DataInputStream.java:416)
    at com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder.decode(BigEndianLongCoder.java:58)
    at com.google.cloud.dataflow.sdk.coders.InstantCoder.decode(InstantCoder.java:83)
    at com.google.cloud.dataflow.sdk.util.WindowedValue$FullWindowedValueCoder.decode(WindowedValue.java:621)
    at com.google.cloud.dataflow.sdk.util.WindowedValue$FullWindowedValueCoder.decode(WindowedValue.java:553)
    at com.google.cloud.dataflow.sdk.coders.KvCoder.decode(KvCoder.java:98)
    at com.google.cloud.dataflow.sdk.coders.KvCoder.decode(KvCoder.java:42)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromSafeStream(CoderUtils.java:157)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray(CoderUtils.java:140)
    at com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray(CoderUtils.java:134)
    at com.google.cloud.dataflow.sdk.util.MutationDetectors$CodedValueMutationDetector.<init>(MutationDetectors.java:107)
    at com.google.cloud.dataflow.sdk.util.MutationDetectors.forValueWithCoder(MutationDetectors.java:44)
    at com.google.cloud.dataflow.sdk.transforms.ParDo$ImmutabilityCheckingOutputManager.output(ParDo.java:1303)
    at com.google.cloud.dataflow.sdk.util.DoFnRunnerBase$DoFnContext.outputWindowedValue(DoFnRunnerBase.java:287)
    at com.google.cloud.dataflow.sdk.util.DoFnRunnerBase$DoFnProcessContext.output(DoFnRunnerBase.java:449)
    at com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn.processElement(ReifyTimestampAndWindowsDoFn.java:38)
    at com.google.cloud.dataflow.sdk.util.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:49)
    at com.google.cloud.dataflow.sdk.util.DoFnRunnerBase.processElement(DoFnRunnerBase.java:138)
    at com.google.cloud.dataflow.sdk.transforms.ParDo.evaluateHelper(ParDo.java:1229)
    at com.google.cloud.dataflow.sdk.transforms.ParDo.evaluateSingleHelper(ParDo.java:1098)
    at com.google.cloud.dataflow.sdk.transforms.ParDo.access$300(ParDo.java:457)
    at com.google.cloud.dataflow.sdk.transforms.ParDo$1.evaluate(ParDo.java:1084)
    at com.google.cloud.dataflow.sdk.transforms.ParDo$1.evaluate(ParDo.java:1079)
    at com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner$Evaluator.visitTransform(DirectPipelineRunner.java:858)
    at com.google.cloud.dataflow.sdk.runners.TransformTreeNode.visit(TransformTreeNode.java:219)
    at com.google.cloud.dataflow.sdk.runners.TransformTreeNode.visit(TransformTreeNode.java:215)
    at com.google.cloud.dataflow.sdk.runners.TransformTreeNode.visit(TransformTreeNode.java:215)
    at com.google.cloud.dataflow.sdk.runners.TransformTreeNode.visit(TransformTreeNode.java:215)
    at com.google.cloud.dataflow.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:102)
    at com.google.cloud.dataflow.sdk.Pipeline.traverseTopologically(Pipeline.java:259)
    at com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner$Evaluator.run(DirectPipelineRunner.java:814)
    at com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.run(DirectPipelineRunner.java:526)
    at com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.run(DirectPipelineRunner.java:96)
    at com.google.cloud.dataflow.sdk.Pipeline.run(Pipeline.java:180)
    at com.google.cloud.dataflow.sdk.testing.TestPipeline.run(TestPipeline.java:106)
    at com.example.dataflow.TestGroupByKeyCustomCoder.testPipeline(TestGroupByKeyCustomCoder.java:85)

堆栈跟踪指示在解析时间戳的大端
long
时到达文件末尾

是您的编码值,后跟时间戳,后跟窗口,最后是窗格元数据。因此,这反过来意味着
JsonCoder
从输入流中消耗了太多字节(可能是所有字节?),因此时间戳的解码到达了文件的末尾

SDK提供了许多实用程序,用于在中测试编码器。实际上,您可以通过使用coder
WindowedValue.FullWindowedValueCoder.of(JsonCoder.of(),new GlobalWindow.coder())运行,在全局窗口中直接测试这个案例

有一个传递给
encode
decode
的标志,您可能必须注意:
Coder.Context

  • Coder.Context.OUTER
    表示您的编码器是最外层的
    Coder
    并拥有整个流。在这种情况下,编码时可以利用EOF信号并省略元数据,如长度前缀或括号,而解码时可以随意使用
  • Coder.Context.NESTED
    表示您的
    Coder
    只对值的一部分进行编码,因此它需要写入足够的元数据,以便能够智能地使用自己编码中的字节

感谢您的回复@Kennnowles。我尝试按照您所描述的创建一个测试,并将其添加到上述代码中。它确实会产生同样的错误。我深入研究了一下,发现
ObjectMapper.readTree()
忽略并丢弃了有效JSON文档后面的任何输入。让每个数据流编码器在编码时写入其输出的长度,以便解码器知道在何处停止,这是标准做法吗?在嵌套上下文中编码时,确实需要对此进行管理。这就是为什么我在答案中添加了对这一点的描述。如果查看ByteArrayCoder,您会发现如果上下文是整个流,它会去掉长度前缀。
package com.example.dataflow;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.cloud.dataflow.sdk.coders.CustomCoder;
import com.google.cloud.dataflow.sdk.testing.CoderProperties;
import com.google.cloud.dataflow.sdk.testing.TestPipeline;
import com.google.cloud.dataflow.sdk.transforms.*;
import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
import com.google.cloud.dataflow.sdk.util.WindowedValue;
import org.joda.time.Instant;
import org.junit.Assert;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;


class ParseJson extends DoFn<String, JsonNode> {

    private static final long serialVersionUID = 1L;
    private transient ObjectMapper om;

    { init(); }

    private void init() {
        om = new ObjectMapper();
    }

    private void readObject(java.io.ObjectInputStream in)
            throws IOException, ClassNotFoundException {
        init();
    }

    @Override
    public void processElement(ProcessContext c) throws Exception {
        JsonNode node = om.readTree(c.element());
        c.output(node);
    }
}

class JsonNodeCoder extends CustomCoder<JsonNode> {

    private static final long serialVersionUID = 1L;

    private ObjectMapper mapper = new ObjectMapper();

    private static final JsonNodeCoder INSTANCE = new JsonNodeCoder();

    public static JsonNodeCoder of() {
        return INSTANCE;
    }

    @Override
    public void encode(JsonNode value, OutputStream outStream, Context context) throws IOException {
        mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false).writeValue(outStream, value);
    }

    @Override
    public JsonNode decode(InputStream inStream, Context context) throws IOException {
        return mapper.configure(JsonParser.Feature.AUTO_CLOSE_SOURCE, false).readTree(inStream);
    }
}

public class TestGroupByKeyCustomCoder {

    @Test // original code the produces the error
    public void testPipeline() throws IOException {

        TestPipeline p = TestPipeline.create();

        p.getCoderRegistry().registerCoder(JsonNode.class, JsonNodeCoder.class);

        p.apply(Create.of("{}"))
                .apply(ParDo.of(new ParseJson()))
                .apply(WithKeys.of("foo"))
                .apply("GroupByAction", GroupByKey.create());

        p.run();
    }

    // Test as per Kenn Knowles' suggestion
    // this throws the same error
    @Test
    public void testCustomCoder() throws Exception {
        ObjectMapper mapper = new ObjectMapper();
        JsonNode value = mapper.readTree("{}");

        WindowedValue.FullWindowedValueCoder<JsonNode> windowedValueCoder
                = WindowedValue.FullWindowedValueCoder
                    .of(JsonNodeCoder.of(), GlobalWindow.Coder.INSTANCE);

        WindowedValue<JsonNode> x = WindowedValue.of(
                value, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.ON_TIME_AND_ONLY_FIRING);
        CoderProperties.coderDecodeEncodeEqual(windowedValueCoder, x);
    }
}
@Test
public void testJackson() throws IOException {
    ObjectMapper mapper = new ObjectMapper();
    ByteArrayInputStream bis = new ByteArrayInputStream("{}1".getBytes());
    mapper.readTree(bis);
    Assert.assertNotEquals(bis.read(), -1); // assertion fails
}