Java Apache Flink可以写入基于密钥命名的文件吗？_Java_Apache Flink

Java Apache Flink可以写入基于密钥命名的文件吗？
java apache-flink
Java Apache Flink可以写入基于密钥命名的文件吗？,java,apache-flink,Java,Apache Flink,在Apache Flink中，是否可以根据密钥写入多个文本文件？例如，我有一些这样的数据 key1，foo，bar 钥匙2，巴兹，福键3等密钥的值在编译时是未知的；新的密钥将会出现，我想将该密钥的结果写入一个与其他密钥的结果分开的文件中我希望看到3个文件，分别名为“key1.txt”、“key2.txt”和“key3.txt” 这是弗林克能在开箱即用的吗？这在开箱即用是不可能的。但是，您可以实现自己的输出格式，并通过result.out（…）（对于批处理API）使用它；看对于流式API，
在Apache Flink中，是否可以根据密钥写入多个文本文件？例如，我有一些这样的数据
key1，foo，bar
钥匙2，巴兹，福
键3等
密钥的值在编译时是未知的；新的密钥将会出现，我想将该密钥的结果写入一个与其他密钥的结果分开的文件中
我希望看到3个文件，分别名为“key1.txt”、“key2.txt”和“key3.txt”
这是弗林克能在开箱即用的吗？
这在开箱即用是不可能的。但是，您可以实现自己的输出格式，并通过result.out（…）
（对于批处理API）使用它；看
对于流式API，它应该是stream.addSink（…）
；请参见
您可以尝试以下接收器的实现，该实现可与KeyedStream
一起使用：
KeyedStream<Tuple2<String, String>, Tuple> keyedDataStream = dataStream.keyBy(0);



StreamKeyPartitionerSink<Tuple2<String, SynopsesEvent>> sinkFunction = new StreamKeyPartitionerSink<Tuple2<String, SynopsesEvent>>(
    "../data/key_grouping", "f0"); // f0 is the key field name
keyedDataStream.addSink(sinkFunction);

KeyedStream keyedDataStream=dataStream.keyBy（0）；
StreamKeyPartitionerSink sinkFunction=新的StreamKeyPartitionerSink(
“./数据/键组”，“f0”）；//f0是键字段名
keyedDataStream.addSink（sinkFunction）；

有关Flink中状态管理的更多信息：因为我使用它来管理每个键的状态
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.Field;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;

import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

/**
 * * Flink sink writes tuples to files partitioned by their keys, which also writes the records as
 * batches.
 *
 * @param <IN> Input tuple type
 * 
 * @author ehabqadah
 */
public class StreamKeyPartitionerSink<IN> extends RichSinkFunction<IN> {

  private transient ValueState<String> outputFilePath;
  private transient ValueState<List<IN>> inputTupleList;
  /**
   * Number of rcords to be hold before writing.
   */
  private int writeBatchSize;
  /**
   * The output directory path
   */
  private String outputDirPath;
  /**
   * The name of the input tuple key
   */
  private String keyFieldName;



  public StreamKeyPartitionerSink(String outputDirPath, String keyFieldName) {

    this(outputDirPath, keyFieldName, 1);
  }

  /**
   * 
   * @param outputDirPath- writeBatchSize the size of on hold batch before write
   * @param writeBatchSize - output directory
   */
  public StreamKeyPartitionerSink(String outputDirPath, String keyFieldName, int writeBatchSize) {

    this.writeBatchSize = writeBatchSize;
    this.outputDirPath = outputDirPath;
    this.keyFieldName = keyFieldName;
  }

  @Override
  public void open(Configuration config) {
    // initialize state holders
 `//for more info about state management check  `//
    ValueStateDescriptor<String> outputFilePathDesc =
        new ValueStateDescriptor<String>("outputFilePathDesc",
            TypeInformation.of(new TypeHint<String>() {}));

    ValueStateDescriptor<List<IN>> inputTupleListDesc =
        new ValueStateDescriptor<List<IN>>("inputTupleListDesc",
            TypeInformation.of(new TypeHint<List<IN>>() {}));

    outputFilePath = getRuntimeContext().getState(outputFilePathDesc);
    inputTupleList = getRuntimeContext().getState(inputTupleListDesc);

  }

  @Override
  public void invoke(IN value) throws Exception {
    List<IN> inputTuples =
        inputTupleList.value() == null ? new ArrayList<IN>() : inputTupleList.value();

    inputTuples.add(value);
    if (inputTuples.size() == writeBatchSize) {

      writeInputList(inputTuples);
      inputTuples = new ArrayList<IN>();
    }

    // update the state
    inputTupleList.update(inputTuples);

  }

  /**
   * Write the tuple list, each record in separate line
   * 
   * @param tupleList
   * @throws Exception
   */
  public void writeInputList(List<IN> tupleList) {

    String path = getOrInitFilePath(tupleList);
    try (PrintWriter outStream = new PrintWriter(new BufferedWriter(new FileWriter(path, true)))) {
      for (IN tupleToWrite : tupleList) {
        outStream.println(tupleToWrite);
      }
    } catch (IOException e) {
      throw new RuntimeException("Exception occured while writing file " + path, e);
    }
  }

  private String getOrInitFilePath(List<IN> tupleList) {

    IN firstInstance = tupleList.get(0);
    String path = null;
    try {
      path = outputFilePath.value();

      if (path == null) {
        Field keyField = firstInstance.getClass().getField(keyFieldName);
        String keyValue = keyField.get(firstInstance).toString();
        path = Paths.get(outputDirPath, keyValue + ".txt").toString();

        setUpOutputFilePathPath(outputDirPath, path);
        // save the computed path for this key
        outputFilePath.update(path);
      }
    } catch (IOException | NoSuchFieldException | SecurityException | IllegalArgumentException
        | IllegalAccessException e) {
      throw new RuntimeException(
          "ExceptionsetUpOutputFilePathPath occured while fetching the value of key field " + path,
          e);
    }
    return path;
  }

  private void setUpOutputFilePathPath(String outputDirPath, String path) throws IOException {
    if (!Files.exists(Paths.get(outputDirPath))) {
      Files.createDirectories(Paths.get(outputDirPath));

    }
    // create the file if it does not exist and delete its content
    Files.write(Paths.get(path), "".getBytes(), StandardOpenOption.CREATE,
        StandardOpenOption.TRUNCATE_EXISTING);

  }
}

导入java.io.BufferedWriter；
导入java.io.FileWriter；
导入java.io.IOException；
导入java.io.PrintWriter；
导入java.lang.reflect.Field；
导入java.nio.file.Files；
导入java.nio.file.path；
导入java.nio.file.StandardOpenOption；
导入java.util.ArrayList；
导入java.util.List；
导入org.apache.flink.api.common.state.ValueState；
导入org.apache.flink.api.common.state.ValueStateDescriptor；
导入org.apache.flink.api.common.typeinfo.TypeHint；
导入org.apache.flink.api.common.typeinfo.TypeInformation；
导入org.apache.flink.configuration.configuration；
导入org.apache.flink.streaming.api.functions.sink.RichSinkFunction；
/**
**Flink sink将元组写入按其键分区的文件，这也会将记录写入
*批次。
*
*@param输入元组类型
* 
*@作者ehabqadah
*/
公共类StreamKeyPartitionsLink扩展了RichSink函数{
私有瞬态值状态输出文件路径；
私有瞬时值状态输入列表；
/**
*写入前要保留的RCORD数。
*/
私有int-writeBatchSize；
/**
*输出目录路径
*/
私有字符串outputDirPath；
/**
*输入元组键的名称
*/
私有字符串keyFieldName；
public StreamKeyPartitionsLink（字符串outputDirPath，字符串keyFieldName）{
这（outputDirPath，keyFieldName，1）；
}
/**
* 
*@param outputDirPath-writeBatchSize写入前保留批处理的大小
*@param writeBatchSize-输出目录
*/
public StreamKeyPartitionsLink（字符串outputDirPath、字符串keyFieldName、int writeBatchSize）{
this.writeBatchSize=writeBatchSize；
this.outputDirPath=outputDirPath；
this.keyFieldName=keyFieldName；
}
@凌驾
公共无效打开（配置）{
//初始化状态持有者
`//有关状态管理检查的更多信息`//
ValueStateDescriptor输出文件路径描述=
新的ValueStateDescriptor（“outputFilePathDesc”，
TypeInformation.of（newtypehint（）{}）；
ValueStateDescriptor InputUppleListDesc=
新的ValueStateDescriptor（“InputUpleListDesc”，
TypeInformation.of（newtypehint（）{}）；
outputFilePath=getRuntimeContext（）.getState（outputFilePathDesc）；
InputUpleList=getRuntimeContext（）.getState（InputUpleListDesc）；
}
@凌驾
public void invoke（值中）引发异常{
列出整数=
InputUpleList.value（）==null？新建ArrayList（）：InputUpleList.value（）；
输入两倍。添加（值）；
if（inputUples.size（）==writeBatchSize）{
写入输入列表（输入个数）；
InputUples=新的ArrayList（）；
}
//更新状态
inputUpleList.更新（inputUples）；
}
/**
*编写元组列表，每条记录在单独的行中
* 
*@param-tupleList
*@抛出异常
*/
public void writeInputList（列表元组列表）{
字符串路径=getOrInitFilePath（tupleList）；
try（PrintWriter outStream=new PrintWriter（new BufferedWriter（new FileWriter，path，true）））{
for（在tupleToWrite:tupleList中）{
exptream.println（tupleToWrite）；
}
}捕获（IOE异常）{
抛出新的RuntimeException（“写入文件时发生异常”+path，e）；
}
}
私有字符串getOrInitFilePath（列表元组列表）{
在firstInstance=tupleList.get（0）；
字符串路径=null；
试一试{
path=outputFilePath.value（）；
if（路径==null）{
Field keyField=firstInstance.getClass（）.getField（keyFieldName）；
字符串keyValue=keyField.get（firstInstance.toString（）；
path=path.get（outputDirPath，keyValue+“.txt”）.toString（）；
setUpOutputFilePathPath（outputDirPath，path）；
//保存此键的计算路径
outputFilePath.update（路径）；
}
}捕获（IOException | NoSuchFieldException | SecurityException | IllegalArgumentException）
|非法访问（例外e）{
抛出新的运行时异常(
“获取键字段“+path”的值时发生异常SetupOutputFilePathPath，
e） )；
}
返回路径；
}
私有void setUpOutputFilePathPath（字符串outputDirPath，字符串路径）引发IOException{
如果（！Files.exists（path.get（outputDirPath）））{
Files.createDirectories（path.get（outputDirPath））；
}
//如果文件不存在，请创建该文件并删除其内容
Files.write（路径）.get（路径），“”.getBytes（），StandardOpenOption.CREATE，
StandardOpenOption。截断_（现有）；
}
}
我在这里找到了一些相关信息，原作者也回答了这个问题。