如何运行ImplementSpringBatch逐行处理csv文件?

如何运行ImplementSpringBatch逐行处理csv文件?,spring,spring-batch,Spring,Spring Batch,我有一个spring批处理应用程序,它从csv文件中读取数据,传递所有行并对其进行处理,传递所有已处理行并将其写入数据库。非常经典。现在我的问题是csv文件太大了,我有一个java堆空间,所以我想我可以通过每x行处理一个文件来优化它,比如说每10000行(每10000行释放一次内存,而不是在内存中加载所有行) 是否有必要告诉SpringBatch以递归方式处理步骤? 还是有别的办法解决我的问题 任何建议都将不胜感激。 谢谢下面是一个将以下csv文件处理为bean的示例 headerA,heade

我有一个spring批处理应用程序,它从csv文件中读取数据,传递所有行并对其进行处理,传递所有已处理行并将其写入数据库。非常经典。现在我的问题是csv文件太大了,我有一个
java堆空间
,所以我想我可以通过每x行处理一个文件来优化它,比如说每10000行(每10000行释放一次内存,而不是在内存中加载所有行)

是否有必要告诉SpringBatch以递归方式处理步骤? 还是有别的办法解决我的问题

任何建议都将不胜感激。
谢谢

下面是一个将以下csv文件处理为bean的示例

headerA,headerB,headerC
col1,col2,col3
忽略第一行(标题),其他列直接映射到“匹配”对象。(这种方式只是为了简洁起见)

下面是使用Spring批处理开箱即用组件的作业配置

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xmlns:batch="http://www.springframework.org/schema/batch"
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
        http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd">

    <batch:job id="fileJob">
        <batch:step id="fileJob.step1">
            <batch:tasklet>
                <batch:chunk reader="fileReader" writer="databaseWriter" commit-interval="10000"/>
            </batch:tasklet>
        </batch:step>
        <batch:validator>
            <bean class="org.springframework.batch.core.job.DefaultJobParametersValidator">
                <property name="requiredKeys" value="fileName"/>
            </bean>
        </batch:validator>
    </batch:job>

    <bean id="fileReader"
        class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
        <property name="lineMapper" ref="lineMapper"/>
        <property name="resource" value="file:#{jobParameters['fileName']}"/>
        <property name="linesToSkip" value="1"/>
    </bean>

    <bean id="lineMapper"
        class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
        <property name="fieldSetMapper" ref="fieldSetMapper"/>
        <property name="lineTokenizer" ref="lineTokenizer"/>
    </bean>


    <bean id="lineTokenizer"
        class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
        <property name="delimiter" value=","/>
        <property name="names" value="col1,col2,col3"/>
    </bean>

    <bean id="fieldSetMapper"
        class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
        <property name="targetType" value="de.incompleteco.spring.batch.domain.SimpleEntity"/>
    </bean>

    <bean id="databaseWriter"
        class="org.springframework.batch.item.database.JdbcBatchItemWriter">
        <property name="dataSource" ref="dataSource"/>
        <property name="itemSqlParameterSourceProvider">
            <bean class="org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider"/>
        </property>
        <property name="sql" value="insert into simple_entity (col1,col2,col3) values (:col1,:col2,:col3)"/>
    </bean>
</beans>

有几个注意事项

  • 此作业需要一个参数“fileName”来告诉fileReader在何处查找文件
  • 设置了jobParametersValidator以确保参数存在
  • 这里是批处理资源配置

    <?xml version="1.0" encoding="UTF-8"?>
    <beans xmlns="http://www.springframework.org/schema/beans"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xmlns:batch="http://www.springframework.org/schema/batch"
        xmlns:jdbc="http://www.springframework.org/schema/jdbc"
        xmlns:task="http://www.springframework.org/schema/task"
        xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
            http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd
            http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
            http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd">
    
        <batch:job-repository id="jobRepository"/>
    
        <bean id="jobExplorer"
            class="org.springframework.batch.core.explore.support.JobExplorerFactoryBean">
            <property name="dataSource" ref="dataSource"/>
        </bean>
        <bean id="jobLauncher"
            class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
            <property name="jobRepository" ref="jobRepository"/>
            <property name="taskExecutor" ref="taskExecutor"/>
        </bean>
    
        <beans profile="junit">
            <jdbc:embedded-database id="dataSource" type="H2">
                <jdbc:script location="classpath:/org/springframework/batch/core/schema-h2.sql"/>
                <jdbc:script location="classpath:/META-INF/sql/schema-h2.sql"/>
            </jdbc:embedded-database>
    
            <task:executor id="taskExecutor"/>
    
            <bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
                <property name="dataSource" ref="dataSource"/>
            </bean>
        </beans>
    </beans>
    
    
    
    这里还有一个单元测试

    package de.incompleteco.spring.batch;
    
    import static org.junit.Assert.assertEquals;
    import static org.junit.Assert.assertTrue;
    
    import java.io.File;
    import java.io.FileOutputStream;
    
    import javax.sql.DataSource;
    
    import org.junit.Before;
    import org.junit.Test;
    import org.junit.runner.RunWith;
    import org.springframework.batch.core.ExitStatus;
    import org.springframework.batch.core.Job;
    import org.springframework.batch.core.JobExecution;
    import org.springframework.batch.core.JobParameters;
    import org.springframework.batch.core.JobParametersBuilder;
    import org.springframework.batch.core.explore.JobExplorer;
    import org.springframework.batch.core.launch.JobLauncher;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.jdbc.core.JdbcTemplate;
    import org.springframework.test.context.ActiveProfiles;
    import org.springframework.test.context.ContextConfiguration;
    import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
    
    @RunWith(SpringJUnit4ClassRunner.class)
    @ContextConfiguration({"classpath:/META-INF/spring/*-context.xml"})
    @ActiveProfiles("junit")
    public class FileJobIntegrationTest {
    
        @Autowired
        private Job job;
    
        @Autowired
        private JobLauncher jobLauncher;
    
        @Autowired
        private JobExplorer jobExplorer;
    
        @Autowired
        private DataSource dataSource;
    
        private int recordCount = 1000000;
    
        private String fileName = System.getProperty("java.io.tmpdir") + File.separator + "test.csv";
    
        @Before
        public void before() throws Exception {
            if (new File(fileName).exists()) {
                new File(fileName).delete();
            }//end if
        }
    
        @Test
        public void test() throws Exception {
            //create a file
            FileOutputStream fos = new FileOutputStream(fileName);
            fos.write("col1,col2,col3".getBytes());
            fos.flush();
            for (int i=0;i<=recordCount;i++) {
                fos.write(new String(i + "," + (i+1) + "," + (i+2) + "\n").getBytes());
                fos.flush();//flush it
            }//end for
            fos.close();
            //lets get the size of the file
            long length = new File(fileName).length();
            System.out.println("file size: " + ((length / 1024) / 1024));
            //execute the job
            JobParameters jobParameters = new JobParametersBuilder().addString("fileName",fileName).toJobParameters();
            JobExecution execution = jobLauncher.run(job,jobParameters);
            //monitor
            while (jobExplorer.getJobExecution(execution.getId()).isRunning()) {
                Thread.sleep(1000);
            }//end while
            //load again
            execution = jobExplorer.getJobExecution(execution.getId());
            //test
            assertEquals(ExitStatus.COMPLETED.getExitCode(),execution.getExitStatus().getExitCode());
            //lets see what's in the database
            int count = new JdbcTemplate(dataSource).queryForObject("select count(*) from simple_entity", Integer.class);
            //test
            assertTrue(count == recordCount);
        }
    
    }
    
    package de.incompleteco.spring.batch;
    导入静态org.junit.Assert.assertEquals;
    导入静态org.junit.Assert.assertTrue;
    导入java.io.File;
    导入java.io.FileOutputStream;
    导入javax.sql.DataSource;
    导入org.junit.Before;
    导入org.junit.Test;
    导入org.junit.runner.RunWith;
    导入org.springframework.batch.core.ExitStatus;
    导入org.springframework.batch.core.Job;
    导入org.springframework.batch.core.JobExecution;
    导入org.springframework.batch.core.JobParameters;
    导入org.springframework.batch.core.JobParametersBuilder;
    导入org.springframework.batch.core.explore.JobExplorer;
    导入org.springframework.batch.core.launch.JobLauncher;
    导入org.springframework.beans.factory.annotation.Autowired;
    导入org.springframework.jdbc.core.jdbc模板;
    导入org.springframework.test.context.ActiveProfiles;
    导入org.springframework.test.context.ContextConfiguration;
    导入org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
    @RunWith(SpringJUnit4ClassRunner.class)
    @ContextConfiguration({“classpath:/META-INF/spring/*-context.xml”})
    @ActiveProfiles(“junit”)
    公共类FileJobIntegrationTest{
    @自动连线
    私人工作;
    @自动连线
    私有JobLauncher JobLauncher;
    @自动连线
    私人JobExplorer;
    @自动连线
    私有数据源;
    私有int记录计数=1000000;
    私有字符串文件名=System.getProperty(“java.io.tmpdir”)+File.separator+“test.csv”;
    @以前
    public void before()引发异常{
    if(新文件(文件名).exists()){
    新文件(文件名).delete();
    }//如果结束
    }
    @试验
    public void test()引发异常{
    //创建一个文件
    FileOutputStream fos=新的FileOutputStream(文件名);
    fos.write(“col1,col2,col3.getBytes());
    fos.flush();
    
    对于(int i=0;ii如果使用批处理块(读-处理器-写程序)进行处理,它不会加载内存中的所有内容。很好,这不是我想要的答案(这是我自己的代码),但我真的很喜欢你的sampleHi storm_buster:你能继续吗?