elasticsearch,apache-flink,Java,elasticsearch,Apache Flink" /> elasticsearch,apache-flink,Java,elasticsearch,Apache Flink" />

Java ApacheFlink与Elasticsearch的集成

Java ApacheFlink与Elasticsearch的集成,java,elasticsearch,apache-flink,Java,elasticsearch,Apache Flink,我正在尝试将Flink与Elasticsearch 2.1.1集成,我正在使用maven依赖项 <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch2_2.10</artifactId> <version>1.1-SNAPSHOT</v

我正在尝试将Flink与Elasticsearch 2.1.1集成,我正在使用maven依赖项

     <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-elasticsearch2_2.10</artifactId>
        <version>1.1-SNAPSHOT</version>
    </dependency>

org.apache.flink
flink-connector-elasticsearch2_2.10
1.1-快照
这是Java代码,我正在从Kafka队列读取事件(工作正常),但不知何故,这些事件没有在Elasticsearch中发布,并且如果我更改与端口、主机名和,ElasticSearch的群集名或索引名,然后我立即看到一个错误,但目前它没有显示任何错误,也没有在ElasticSearch中创建任何新文档

       StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // parse user parameters
    ParameterTool parameterTool = ParameterTool.fromArgs(args);

    DataStream<String> messageStream = env.addSource(new FlinkKafkaConsumer082<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));

    messageStream.print();

    Map<String, String> config = new HashMap<>();
    config.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
    config.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_INTERVAL_MS, "1");

    config.put("cluster.name", "FlinkDemo");

    List<InetSocketAddress> transports = new ArrayList<>();
    transports.add(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));

    messageStream.addSink(new ElasticsearchSink<String>(config, transports, new TestElasticsearchSinkFunction()));

    env.execute();
}
private static class TestElasticsearchSinkFunction implements ElasticsearchSinkFunction<String> {
    private static final long serialVersionUID = 1L;

    public IndexRequest createIndexRequest(String element) {
        Map<String, Object> json = new HashMap<>();
        json.put("data", element);

        return Requests.indexRequest()
                .index("flink").id("hash"+element).source(json);
    }

    @Override
    public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
        indexer.add(createIndexRequest(element));
    }
}
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//解析用户参数
ParameterTool ParameterTool=ParameterTool.fromArgs(args);
DataStream messageStream=env.addSource(新的FlinkKafkaConsumer082(parameterTool.getRequired(“主题”)、新的SimpleStringSchema()、parameterTool.getProperties());
messageStream.print();
Map config=newhashmap();
config.put(ElasticsearchSink.config_KEY_BULK_FLUSH_MAX_ACTIONS,“1”);
config.put(ElasticsearchSink.config_KEY_BULK_FLUSH_INTERVAL_MS,“1”);
config.put(“cluster.name”、“FlinkDemo”);
List transports=new ArrayList();
add(新的InetSocketAddress(InetAddress.getByName(“localhost”),9300));
addSink(新的ElasticsearchSink(配置、传输、新的TestElasticsearchSinkFunction());
execute();
}
私有静态类TestElasticsearchSinkFunction实现ElasticsearchSinkFunction{
私有静态最终长serialVersionUID=1L;
公共IndexRequest createIndexRequest(字符串元素){
Map json=newhashmap();
put(“数据”,元素);
返回请求。indexRequest()
.index(“flink”).id(“哈希”+元素).source(json);
}
@凌驾
公共无效进程(字符串元素、RuntimeContext ctx、RequestIndexer indexer){
add(createIndexRequest(元素));
}
}

我确实在本地机器上运行了它并进行了调试,但我唯一缺少的是正确配置日志记录,因为大多数弹性问题都在“log.warn”语句中描述。问题是elasticsearch-2.2.1客户端API中的“BulkRequestHandler.java”内部出现异常,引发错误-“org.elasticsearch.action.ActionRequestValidationException:验证失败:1:缺少类型因为我创建了索引,但不是一个我觉得很奇怪的类型,因为它应该主要关注索引,并默认创建类型。

我发现了一个非常有用的Flink&Elasticsearch Connector

第一个Maven依赖项:

<dependency>
  <groupId>org.apache.flink</groupId>
  <artifactId>flink-connector-elasticsearch2_2.10</artifactId>
  <version>1.1-SNAPSHOT</version>
</dependency>

org.apache.flink
flink-connector-elasticsearch2_2.10
1.1-快照
第二个示例Java代码

public static void writeElastic(DataStream<String> input) {

    Map<String, String> config = new HashMap<>();

    // This instructs the sink to emit after every element, otherwise they would be buffered
    config.put("bulk.flush.max.actions", "1");
    config.put("cluster.name", "es_keira");

    try {
        // Add elasticsearch hosts on startup
        List<InetSocketAddress> transports = new ArrayList<>();
        transports.add(new InetSocketAddress("127.0.0.1", 9300)); // port is 9300 not 9200 for ES TransportClient

        ElasticsearchSinkFunction<String> indexLog = new ElasticsearchSinkFunction<String>() {
            public IndexRequest createIndexRequest(String element) {
                String[] logContent = element.trim().split("\t");
                Map<String, String> esJson = new HashMap<>();
                esJson.put("IP", logContent[0]);
                esJson.put("info", logContent[1]);

                return Requests
                        .indexRequest()
                        .index("viper-test")
                        .type("viper-log")
                        .source(esJson);
            }

            @Override
            public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
                indexer.add(createIndexRequest(element));
            }
        };

        ElasticsearchSink esSink = new ElasticsearchSink(config, transports, indexLog);
        input.addSink(esSink);
    } catch (Exception e) {
        System.out.println(e);
    }
}
公共静态void writeElistic(数据流输入){
Map config=newhashmap();
//这指示接收器在每个元素之后发射,否则它们将被缓冲
config.put(“bulk.flush.max.actions”,“1”);
config.put(“cluster.name”、“es_keira”);
试一试{
//启动时添加elasticsearch主机
List transports=new ArrayList();
transports.add(新的InetSocketAddress(“127.0.0.1”,9300));//对于Transportes客户端,端口是9300而不是9200
ElasticsearchSinkFunction indexLog=新的ElasticsearchSinkFunction(){
公共IndexRequest createIndexRequest(字符串元素){
字符串[]logContent=element.trim().split(“\t”);
Map esJson=newhashmap();
put(“IP”,logContent[0]);
put(“info”,logContent[1]);
返回请求
.indexRequest()文件
.指数(“毒蛇试验”)
.类型(“毒蛇日志”)
.来源(esJson);
}
@凌驾
公共无效进程(字符串元素、RuntimeContext ctx、RequestIndexer indexer){
add(createIndexRequest(元素));
}
};
ElasticsearchSink esSink=新的ElasticsearchSink(配置、传输、索引日志);
input.addSink(esSink);
}捕获(例外e){
系统输出打印ln(e);
}
}

由于您是在本地执行代码,我想您可以使用IDE轻松地运行它,并使用调试器查看发生了什么。例如,我会在
createIndexRequest
方法中设置一个断点,看看它是否被调用,以及之后会发生什么。您好@rmetzer,我确实在本地机器上运行了它并进行了调试,但是,我唯一缺少的是正确配置日志记录,因为大多数弹性问题都在“log.warn”中描述陈述问题是elasticsearch-2.2.1客户端API中的“BulkRequestHandler.java”中出现异常,该异常引发错误-“org.elasticsearch.action.ActionRequestValidationException:验证失败:1:缺少类型因为我创建了索引,但不是一种我觉得很奇怪的类型,因为它应该主要与索引有关,而不是该索引的_type属性。你能详细解释一下你是如何让它工作的吗?你可以添加log4j.properties文件来记录完整的日志,然后就会发现错误。这是我的计算机中的es身份验证问题。