如何运行ImplementSpringBatch逐行处理csv文件?
我有一个spring批处理应用程序,它从csv文件中读取数据,传递所有行并对其进行处理,传递所有已处理行并将其写入数据库。非常经典。现在我的问题是csv文件太大了,我有一个如何运行ImplementSpringBatch逐行处理csv文件?,spring,spring-batch,Spring,Spring Batch,我有一个spring批处理应用程序,它从csv文件中读取数据,传递所有行并对其进行处理,传递所有已处理行并将其写入数据库。非常经典。现在我的问题是csv文件太大了,我有一个java堆空间,所以我想我可以通过每x行处理一个文件来优化它,比如说每10000行(每10000行释放一次内存,而不是在内存中加载所有行) 是否有必要告诉SpringBatch以递归方式处理步骤? 还是有别的办法解决我的问题 任何建议都将不胜感激。 谢谢下面是一个将以下csv文件处理为bean的示例 headerA,heade
java堆空间
,所以我想我可以通过每x行处理一个文件来优化它,比如说每10000行(每10000行释放一次内存,而不是在内存中加载所有行)
是否有必要告诉SpringBatch以递归方式处理步骤?
还是有别的办法解决我的问题
任何建议都将不胜感激。
谢谢下面是一个将以下csv文件处理为bean的示例
headerA,headerB,headerC
col1,col2,col3
忽略第一行(标题),其他列直接映射到“匹配”对象。(这种方式只是为了简洁起见)
下面是使用Spring批处理开箱即用组件的作业配置
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:batch="http://www.springframework.org/schema/batch"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd">
<batch:job id="fileJob">
<batch:step id="fileJob.step1">
<batch:tasklet>
<batch:chunk reader="fileReader" writer="databaseWriter" commit-interval="10000"/>
</batch:tasklet>
</batch:step>
<batch:validator>
<bean class="org.springframework.batch.core.job.DefaultJobParametersValidator">
<property name="requiredKeys" value="fileName"/>
</bean>
</batch:validator>
</batch:job>
<bean id="fileReader"
class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
<property name="lineMapper" ref="lineMapper"/>
<property name="resource" value="file:#{jobParameters['fileName']}"/>
<property name="linesToSkip" value="1"/>
</bean>
<bean id="lineMapper"
class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="fieldSetMapper" ref="fieldSetMapper"/>
<property name="lineTokenizer" ref="lineTokenizer"/>
</bean>
<bean id="lineTokenizer"
class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<property name="delimiter" value=","/>
<property name="names" value="col1,col2,col3"/>
</bean>
<bean id="fieldSetMapper"
class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
<property name="targetType" value="de.incompleteco.spring.batch.domain.SimpleEntity"/>
</bean>
<bean id="databaseWriter"
class="org.springframework.batch.item.database.JdbcBatchItemWriter">
<property name="dataSource" ref="dataSource"/>
<property name="itemSqlParameterSourceProvider">
<bean class="org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider"/>
</property>
<property name="sql" value="insert into simple_entity (col1,col2,col3) values (:col1,:col2,:col3)"/>
</bean>
</beans>
有几个注意事项
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:jdbc="http://www.springframework.org/schema/jdbc"
xmlns:task="http://www.springframework.org/schema/task"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd
http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd">
<batch:job-repository id="jobRepository"/>
<bean id="jobExplorer"
class="org.springframework.batch.core.explore.support.JobExplorerFactoryBean">
<property name="dataSource" ref="dataSource"/>
</bean>
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository"/>
<property name="taskExecutor" ref="taskExecutor"/>
</bean>
<beans profile="junit">
<jdbc:embedded-database id="dataSource" type="H2">
<jdbc:script location="classpath:/org/springframework/batch/core/schema-h2.sql"/>
<jdbc:script location="classpath:/META-INF/sql/schema-h2.sql"/>
</jdbc:embedded-database>
<task:executor id="taskExecutor"/>
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="dataSource"/>
</bean>
</beans>
</beans>
这里还有一个单元测试
package de.incompleteco.spring.batch;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileOutputStream;
import javax.sql.DataSource;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.explore.JobExplorer;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration({"classpath:/META-INF/spring/*-context.xml"})
@ActiveProfiles("junit")
public class FileJobIntegrationTest {
@Autowired
private Job job;
@Autowired
private JobLauncher jobLauncher;
@Autowired
private JobExplorer jobExplorer;
@Autowired
private DataSource dataSource;
private int recordCount = 1000000;
private String fileName = System.getProperty("java.io.tmpdir") + File.separator + "test.csv";
@Before
public void before() throws Exception {
if (new File(fileName).exists()) {
new File(fileName).delete();
}//end if
}
@Test
public void test() throws Exception {
//create a file
FileOutputStream fos = new FileOutputStream(fileName);
fos.write("col1,col2,col3".getBytes());
fos.flush();
for (int i=0;i<=recordCount;i++) {
fos.write(new String(i + "," + (i+1) + "," + (i+2) + "\n").getBytes());
fos.flush();//flush it
}//end for
fos.close();
//lets get the size of the file
long length = new File(fileName).length();
System.out.println("file size: " + ((length / 1024) / 1024));
//execute the job
JobParameters jobParameters = new JobParametersBuilder().addString("fileName",fileName).toJobParameters();
JobExecution execution = jobLauncher.run(job,jobParameters);
//monitor
while (jobExplorer.getJobExecution(execution.getId()).isRunning()) {
Thread.sleep(1000);
}//end while
//load again
execution = jobExplorer.getJobExecution(execution.getId());
//test
assertEquals(ExitStatus.COMPLETED.getExitCode(),execution.getExitStatus().getExitCode());
//lets see what's in the database
int count = new JdbcTemplate(dataSource).queryForObject("select count(*) from simple_entity", Integer.class);
//test
assertTrue(count == recordCount);
}
}
package de.incompleteco.spring.batch;
导入静态org.junit.Assert.assertEquals;
导入静态org.junit.Assert.assertTrue;
导入java.io.File;
导入java.io.FileOutputStream;
导入javax.sql.DataSource;
导入org.junit.Before;
导入org.junit.Test;
导入org.junit.runner.RunWith;
导入org.springframework.batch.core.ExitStatus;
导入org.springframework.batch.core.Job;
导入org.springframework.batch.core.JobExecution;
导入org.springframework.batch.core.JobParameters;
导入org.springframework.batch.core.JobParametersBuilder;
导入org.springframework.batch.core.explore.JobExplorer;
导入org.springframework.batch.core.launch.JobLauncher;
导入org.springframework.beans.factory.annotation.Autowired;
导入org.springframework.jdbc.core.jdbc模板;
导入org.springframework.test.context.ActiveProfiles;
导入org.springframework.test.context.ContextConfiguration;
导入org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration({“classpath:/META-INF/spring/*-context.xml”})
@ActiveProfiles(“junit”)
公共类FileJobIntegrationTest{
@自动连线
私人工作;
@自动连线
私有JobLauncher JobLauncher;
@自动连线
私人JobExplorer;
@自动连线
私有数据源;
私有int记录计数=1000000;
私有字符串文件名=System.getProperty(“java.io.tmpdir”)+File.separator+“test.csv”;
@以前
public void before()引发异常{
if(新文件(文件名).exists()){
新文件(文件名).delete();
}//如果结束
}
@试验
public void test()引发异常{
//创建一个文件
FileOutputStream fos=新的FileOutputStream(文件名);
fos.write(“col1,col2,col3.getBytes());
fos.flush();
对于(int i=0;ii如果使用批处理块(读-处理器-写程序)进行处理,它不会加载内存中的所有内容。很好,这不是我想要的答案(这是我自己的代码),但我真的很喜欢你的sampleHi storm_buster:你能继续吗?