Apache flink 无法将数据发送到kafka

Apache flink 无法将数据发送到kafka,apache-flink,flink-streaming,Apache Flink,Flink Streaming,我试图在本地运行flink,为并行度为8的作业分配8个任务槽。我使用FlinkKafkaProducer011作为水槽。作业正常运行约20分钟,在此期间检查点设置成功。但是,在此之后作业失败,我得到以下错误: org.apache.flink.streaming.connectors.kafka.flinkkafka011例外: 无法向Kafka发送数据:服务器在发生故障之前已断开连接 收到了答复 我已尝试将request.timeout.ms增加到15分钟,但仍然没有成功。试试这个例子,它对我

我试图在本地运行flink,为并行度为8的作业分配8个任务槽。我使用FlinkKafkaProducer011作为水槽。作业正常运行约20分钟,在此期间检查点设置成功。但是,在此之后作业失败,我得到以下错误:

org.apache.flink.streaming.connectors.kafka.flinkkafka011例外: 无法向Kafka发送数据:服务器在发生故障之前已断开连接 收到了答复


我已尝试将request.timeout.ms增加到15分钟,但仍然没有成功。

试试这个例子,它对我很有效

package com.aws.examples.kafka.consumer
导入java.util.{Properties,UUID}
导入org.apache.flink.api.common.restartstrategy.restartstrategys
导入org.apache.flink.api.java.utils.ParameterTool
导入org.apache.flink.streaming.api.TimeCharacteristic
导入org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
导入org.apache.flink.table.api.{TableEnvironment,Types}
导入org.apache.flink.table.descriptors.{Json,Kafka,Rowtime,Schema}
导入org.apache.flink.types.Row
导入org.apache.flink.api.java.io.jdbc
导入org.apache.flink.api.java.io.jdbc.jdbInputFormat.jdbInputFormatBuilder
导入org.apache.flink.api.java.io.jdbc.JDBCOutputFormat
导入org.apache.flink.table.sinks.CsvTableSink
对象卡夫卡消费者{
def main(参数:数组[字符串]):单位={
var params:ParameterTool=ParameterTool.fromArgs(args);
/*
if(params.getNumberOfParameters<4){
System.out.println(“\n用法:readwritekafakatablesqlapi--read-topic--write-topic--bootstrap.servers--group.id”)
返回
}
*/
val kparams:Properties=params.getProperties
kparams.setProperty(“auto.offset.reset”、“最早”)
kparams.setProperty(“flink.起始位置”、“最早”)
kparams.setProperty(“group.id”,UUID.randomUUID.toString)
setProperty(“kafka.bootstrap.servers”,“localhost:9092”)
//设置流媒体环境
val env=StreamExecutionEnvironment.getExecutionEnvironment
env.getConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(41000))
环境启用检查点(300000)//300秒
env.getConfig.setGlobalJobParameters(参数)
环境setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
环境设置(1)
val tableEnv=TableEnvironment.getTableEnvironment(env)
tableEnv.connect(新Kafka().version(“0.11”).topic(“creditcardTransaction”)。
属性(“bootstrap.servers”、“localhost:9092”))
.withSchema(新模式()
.field(“cc_num”,org.apache.flink.table.api.Types.STRING)
.field(“第一列”,org.apache.flink.table.api.Types.STRING)
.field(“最后一列”,org.apache.flink.table.api.Types.STRING)
.field(“trans_num”,org.apache.flink.table.api.Types.STRING)
.field(“trans_time”,org.apache.flink.table.api.Types.STRING)
.field(“category_column”,org.apache.flink.table.api.Types.STRING)
.field(“merchant_column”,org.apache.flink.table.api.Types.STRING)
.field(“amt_column”,org.apache.flink.table.api.Types.STRING)
.field(“merch_lat”,org.apache.flink.table.api.Types.STRING)
.field(“merch_long”,org.apache.flink.table.api.Types.STRING)
.field(“ts”,org.apache.flink.table.api.Types.SQL\u时间戳)
.rowtime(new rowtime().timestampsFromSource.watermarksPeriodicBounded(1000)))
.withFormat(新Json().deriveSchema).inAppendMode.registerTableSource(“creditcardTransactiontable”)
val query=“从creditcardTransactiontable中选择不同的cc_num、first_column、last_column、trans_num、trans_time、category_column、merchant_column、amt_column、merch_lat、merch_long,其中cc_num不在('cc_num')”
val table=tableEnv.sqlQuery(查询)
table.printSchema()
//tEnv.toAppendStream(表,类别[行])打印
tableEnv.toRetractStream(表,classOf[org.apache.flink.types.Row]).print
/*写信给卡夫卡*/
val test=tableEnv.connect(新Kafka().version(“0.11”).主题(“creditcardTransactionTargetTopic”)。
属性(“bootstrap.servers”、“localhost:9092”).sinkPartitionerRoundRobin)。
使用模式(新模式()
.field(“cc_num”,org.apache.flink.table.api.Types.STRING)
.field(“第一列”,org.apache.flink.table.api.Types.STRING)
.field(“最后一列”,org.apache.flink.table.api.Types.STRING)
.field(“trans_num”,org.apache.flink.table.api.Types.STRING)
.field(“trans_time”,org.apache.flink.table.api.Types.STRING)
.field(“category_column”,org.apache.flink.table.api.Types.STRING)
.field(“merchant_column”,org.apache.flink.table.api.Types.STRING)
.field(“amt_column”,org.apache.flink.table.api.Types.STRING)
.field(“merch_lat”,org.apache.flink.table.api.Types.STRING)
.field(“merch_long”,org.apache.flink.table.api.Types.STRING))
.withFormat(新Json().deriveSchema).inAppendMode.registerTableSink(“sinkTopic”)
val sql=“插入到sinkTopic从creditcardTransactiontable中选择不同的cc_num、first_列、last_列、trans_num、trans_时间、category_列、merchant_列、amt_列、merch_lat、merch_long,其中cc_num不在('cc_num')”
//val csvSink:CsvTableSink=new CsvTableSink(“/path/to/file”,“~”)
tableEnv.sqlUpdate(sql)
env.execute()
}
}
POM:-


4.0.0
FlinkStreamAndSql
FlinkStreamAndSql
1.0-快照
src/main/scala
src/test/scala
net.alchim31.maven
scala maven插件
3.1.3
编译
测试编译
org.apache.maven.plugins
maven surefire插件
package com.aws.examples.kafka.consumer

import java.util.{Properties, UUID}

import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.table.api.{TableEnvironment, Types}
import org.apache.flink.table.descriptors.{Json, Kafka, Rowtime, Schema}
import org.apache.flink.types.Row
import org.apache.flink.api.java.io.jdbc
import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder
import org.apache.flink.api.java.io.jdbc.JDBCOutputFormat
import org.apache.flink.table.sinks.CsvTableSink

object KafkaConsumer {

  def main(args: Array[String]): Unit = {

    var params: ParameterTool = ParameterTool.fromArgs(args);

    /*
    if (params.getNumberOfParameters < 4) {
      System.out.println("\nUsage: ReadWriteKafkaTableSQLAPI --read-topic <topic> --write-topic <topic> --bootstrap.servers <kafka brokers> --group.id <groupid>")
      return
    }
    */

    val kparams: Properties = params.getProperties
    kparams.setProperty("auto.offset.reset", "earliest")
    kparams.setProperty("flink.starting-position", "earliest")
    kparams.setProperty("group.id", UUID.randomUUID.toString)
    kparams.setProperty("kafka.bootstrap.servers", "localhost:9092")


    // setup streaming environment
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000))
    env.enableCheckpointing(300000) // 300 seconds

    env.getConfig.setGlobalJobParameters(params)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setParallelism(1)

    val tableEnv = TableEnvironment.getTableEnvironment(env)

    tableEnv.connect(new Kafka().version("0.11").topic("creditcardTransaction").
      property("bootstrap.servers", "localhost:9092"))
      .withSchema(new Schema()
        .field("cc_num", org.apache.flink.table.api.Types.STRING)
        .field("first_column", org.apache.flink.table.api.Types.STRING)
        .field("last_column", org.apache.flink.table.api.Types.STRING)
        .field("trans_num", org.apache.flink.table.api.Types.STRING)
        .field("trans_time", org.apache.flink.table.api.Types.STRING)
        .field("category_column", org.apache.flink.table.api.Types.STRING)
        .field("merchant_column", org.apache.flink.table.api.Types.STRING)
        .field("amt_column", org.apache.flink.table.api.Types.STRING)
        .field("merch_lat", org.apache.flink.table.api.Types.STRING)
        .field("merch_long", org.apache.flink.table.api.Types.STRING)
        .field("ts", org.apache.flink.table.api.Types.SQL_TIMESTAMP)
        .rowtime(new Rowtime().timestampsFromSource.watermarksPeriodicBounded(1000)))
        .withFormat(new Json().deriveSchema).inAppendMode.registerTableSource("creditcardTransactiontable")

    val query = "SELECT distinct cc_num,first_column,last_column,trans_num,trans_time,category_column,merchant_column,amt_column,merch_lat,merch_long from creditcardTransactiontable where cc_num not in ('cc_num')"
    val table = tableEnv.sqlQuery(query)

    table.printSchema()

    //tEnv.toAppendStream(table, classOf[Row]).print
    tableEnv.toRetractStream(table, classOf[org.apache.flink.types.Row]).print


    /* write to kafka */

    val test = tableEnv.connect(new Kafka().version("0.11").topic("creditcardTransactionTargetTopic").
      property("bootstrap.servers", "localhost:9092").sinkPartitionerRoundRobin).
      withSchema(new Schema()
        .field("cc_num", org.apache.flink.table.api.Types.STRING)
        .field("first_column", org.apache.flink.table.api.Types.STRING)
        .field("last_column", org.apache.flink.table.api.Types.STRING)
        .field("trans_num", org.apache.flink.table.api.Types.STRING)
        .field("trans_time", org.apache.flink.table.api.Types.STRING)
        .field("category_column", org.apache.flink.table.api.Types.STRING)
        .field("merchant_column", org.apache.flink.table.api.Types.STRING)
        .field("amt_column", org.apache.flink.table.api.Types.STRING)
        .field("merch_lat", org.apache.flink.table.api.Types.STRING)
        .field("merch_long", org.apache.flink.table.api.Types.STRING))
      .withFormat(new Json().deriveSchema).inAppendMode.registerTableSink("sinkTopic")


    val sql = "INSERT INTO sinkTopic SELECT distinct cc_num,first_column,last_column,trans_num,trans_time,category_column,merchant_column,amt_column,merch_lat,merch_long from creditcardTransactiontable where cc_num not in ('cc_num')"

    //val csvSink: CsvTableSink = new CsvTableSink("/path/to/file","~")


    tableEnv.sqlUpdate(sql)

    env.execute()


  }

}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>FlinkStreamAndSql</groupId>
    <artifactId>FlinkStreamAndSql</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <testSourceDirectory>src/test/scala</testSourceDirectory>
        <plugins>
            <plugin>
                <!-- see http://davidb.github.com/scala-maven-plugin -->
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.1.3</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                        <configuration>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>2.13</version>
                <configuration>
                    <useFile>false</useFile>
                    <disableXmlReport>true</disableXmlReport>
                    <!-- If you have classpath issue like NoDefClassError,... -->
                    <!-- useManifestOnlyJar>false</useManifestOnlyJar -->
                    <includes>
                        <include>**/*Test.*</include>
                        <include>**/*Suite.*</include>
                    </includes>
                </configuration>
            </plugin>

            <!-- "package" command plugin -->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.4.1</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    <dependencies>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.derby</groupId>
            <artifactId>derby</artifactId>
            <version>10.13.1.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-jdbc_2.11</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala_2.11</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java</artifactId>
            <version>1.8.0</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.11</artifactId>
            <version>1.8.0</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>1.8.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.8.0</version>
        </dependency>

        <!--   -->
               <dependency>
                   <groupId>org.apache.flink</groupId>
                   <artifactId>flink-scala_2.11</artifactId>
                   <version>1.8.0</version>
               </dependency>

               <dependency>
                   <groupId>org.apache.flink</groupId>
                   <artifactId>flink-streaming-scala_2.11</artifactId>
                   <version>1.8.0</version>
               </dependency>


               <dependency>
                   <groupId>org.apache.flink</groupId>
                   <artifactId>flink-streaming-java_2.11</artifactId>
                   <version>1.8.0</version>
               </dependency>

               <dependency>
                   <groupId>org.apache.flink</groupId>
                   <artifactId>flink-connector-kinesis_2.11</artifactId>
                   <version>1.8.0</version>
               </dependency>

               <dependency>
                   <groupId>org.apache.flink</groupId>
                   <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
                   <version>1.8.0</version>
               </dependency>

               <dependency>
                   <groupId>org.scala-lang</groupId>
                   <artifactId>scala-library</artifactId>
                   <version>2.12.8</version>
               </dependency>

               <dependency>
                   <groupId>com.amazonaws</groupId>
                   <artifactId>amazon-kinesis-client</artifactId>
                   <version>1.8.8</version>
               </dependency>

               <dependency>
                   <groupId>com.amazonaws</groupId>
                   <artifactId>aws-java-sdk-kinesis</artifactId>
                   <version>1.11.579</version>
               </dependency>

               <dependency>
                   <groupId>commons-dbcp</groupId>
                   <artifactId>commons-dbcp</artifactId>
                   <version>1.2.2</version>
               </dependency>
               <dependency>
                   <groupId>com.google.code.gson</groupId>
                   <artifactId>gson</artifactId>
                   <version>2.1</version>
               </dependency>

               <dependency>
                   <groupId>commons-cli</groupId>
                   <artifactId>commons-cli</artifactId>
                   <version>1.4</version>
               </dependency>

               <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-csv</artifactId>
            <version>1.7</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-compress</artifactId>
            <version>1.4.1</version>
        </dependency>

        <dependency>
            <groupId>com.amazonaws</groupId>
            <artifactId>dynamodb-streams-kinesis-adapter</artifactId>
            <version>1.4.0</version>
        </dependency>

        <dependency>
            <groupId>com.amazonaws</groupId>
            <artifactId>dynamodb-streams-kinesis-adapter</artifactId>
            <version>1.4.0</version>
        </dependency>

        <dependency>
            <groupId>com.amazonaws</groupId>
            <artifactId>aws-java-sdk</artifactId>
            <version>1.11.579</version>
        </dependency>

    </dependencies>

</project>