Google cloud dataflow 如何从Google数据流连接到云SQL
我正在尝试使用BeamJavaSDK和GoogleDataflow创建一个管道任务,将数据从云SQL移动到弹性搜索 我创建了以下类main方法:Google cloud dataflow 如何从Google数据流连接到云SQL,google-cloud-dataflow,google-cloud-sql,apache-beam,Google Cloud Dataflow,Google Cloud Sql,Apache Beam,我正在尝试使用BeamJavaSDK和GoogleDataflow创建一个管道任务,将数据从云SQL移动到弹性搜索 我创建了以下类main方法: publicstaticvoidmain(字符串[]args)引发异常{ DataflowPipelineOptions=PipelineOptions工厂.as(DataflowPipelineOptions.class); 选项。设置项目(“分期”); options.setTempLocation(“gs://csv\u to\u sql\u s
publicstaticvoidmain(字符串[]args)引发异常{
DataflowPipelineOptions=PipelineOptions工厂.as(DataflowPipelineOptions.class);
选项。设置项目(“分期”);
options.setTempLocation(“gs://csv\u to\u sql\u staging/temp”);
options.setRunner(DataflowRunner.class);
options.setGcpTempLocation(“gs://csv\u to\u sql\u staging/temp”);options.setUsePublicIps(false);
options.setJobName(“tamer new”);'
选项。设置子网(“区域/us-central1/子网/新网络”);
最终列表范围=Arrays.asList(
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/devstorage.full_control",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/datastore",
"https://www.googleapis.com/auth/sqlservice.admin",
"https://www.googleapis.com/auth/pubsub");
options.setGcpCredential(ServiceAccountCredentials.fromStream(新的ElasticSearchIO().getClass().getResourceAsStream(“/staging-b648da5d2b9b.json”)).createScoped(SCOPES));options.setServiceAccount(“数据-flow@staging.iam.gserviceaccount.com");
Pipeline p=Pipeline.create(选项);
p、 begin();
PCollection>rows=p.apply(JdbcIO.>read().withQuery(“从user_表中选择u.id,u.name”)。withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(“com.mysql.jdbc.Driver”),“jdbc:mysql://google/nameDB_new?cloudSqlInstance=staging:europe-west1:sql暂存实例&socketFactory=com.google.cloud.sql.mysql.socketFactory&useUnicode=true&characterEncoding=UTF-8&user=user&password=password&useSSL=false”)。使用行映射器(新的行映射器>(){
@重写公共列表mapRow(ResultSet ResultSet)引发异常{
ListaddRow=newarraylist();
对于(int i=1;i of())
);
Write w=ElasticsearchIO.Write(),带连接配置(
ElasticsearchIO.ConnectionConfiguration.create(新字符串[]){
"https://host:9243"
},“user temp”,“String”).withUsername(“弹性”).withPassword(“密码”)
);
rows.apply(w.compose(新的SerializableFunction()){
@覆盖公共对象应用(对象输入){
//TODO自动生成的方法存根
返回输入;
}
}));
p、 run().waitUntilFinish();
}
下面是pom.xml文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.harmonica.dataflow</groupId>
<artifactId>com-harmonica-dataflow</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>
<exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
<slf4j.version>1.7.25</slf4j.version>
<beam.version>2.19.0</beam.version>
</properties>
<repositories>
<repository>
<id>ossrh.snapshots</id>
<name>Sonatype OSS Repository Hosting</name>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>${exec-maven-plugin.version}</version>
<configuration>
<cleanupDaemonThreads>false</cleanupDaemonThreads>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<!-- Beam Lib -->
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-core</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-elasticsearch</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.19</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory-connector-j-8</artifactId>
<version>1.0.15</version>
</dependency>
<!-- slf4j API frontend binding with JUL backend -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>${slf4j.version}</version>
</dependency>
</dependencies>
</project>
工作进程已成功启动,但无法连接到云SQL:
甚至认为我已经做了流动:
- 我已经创建了一个具有项目所有者访问权的服务帐户,并将其传递给runner options
- 我创建了一个VPC网络,名称为new network,IP范围为190.10.0.0/16,并将其分配给管道选项,然后在cloud SQL中列出该范围
man exec mvn compile exec:java -Dexec.mainClass=com.dataflow.ElasticSearchIO
connection = connectToCloudSql(map.get(LiteralConstant.URL.toString()),
map.get(LiteralConstant.USERNAME.toString()), map.get(LiteralConstant.PASSWORD.toString()));
statement = connection.prepareCall("query");
statement.execute();
resultSet = statement.getResultSet();
ResultSetMetaData rsmd = resultSet.getMetaData();
int count = rsmd.getColumnCount();
if(!resultSet.next() || count < 1)
throw new ConnectionFailureException("Failed to connect to Cloud SQL");
for (int k = 1; k <= count; k++) {
row.set(rsmd.getColumnName(k), resultSet.getString(k));
}
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId> <!-- mysql-socket-factory-connector-j-6 if using 6.x.x -->
<version>1.0.0</version>
</dependency>
//Increase pool size based on your records
ComboPooledDataSource dataSource = new ComboPooledDataSource();
dataSource.setDriverClass("com.mysql.jdbc.Driver");
dataSource.setJdbcUrl(
"jdbc:mysql://google/test?cloudSqlInstance=dataflowtest-:us-central1:sql-test&socketFactory=com.google.cloud.sql.mysql.SocketFactory");
dataSource.setUser("root");
dataSource.setPassword("root");
dataSource.setMaxPoolSize(10);
dataSource.setInitialPoolSize(6);
JdbcIO.DataSourceConfiguration config = JdbcIO.DataSourceConfiguration.create(dataSource);
// ADD rewriteBatchedStatements=true to improve write speed"
PCollection<KV<String, String>> sqlResult = p.apply(JdbcIO.<KV<String, String>>read()
.withDataSourceConfiguration(config)
.withQuery("select * from test_table").withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
.withRowMapper(new JdbcIO.RowMapper<KV<String, String>>() {
private static final long serialVersionUID = 1L;
public KV<String, String> mapRow(ResultSet resultSet) throws Exception {
return KV.of(resultSet.getString(1), resultSet.getString(2));
}
}));
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.25</version>
</dependency>
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId>
<version>1.0.0</version>
</dependency>