MultiResourcePartitioning的速度与spring批处理中没有分区的速度相同

MultiResourcePartitioning的速度与spring批处理中没有分区的速度相同,spring,spring-batch,Spring,Spring Batch,批处理作业: 从txt文件读取(50k记录) 将其拆分为10个txt文件(每个5k记录) 为txt中的每条记录创建一个xml文件 问题是没有分割分区(从原始txt读取)的同一个作业需要相同的时间 我的配置: <beans xmlns="http://www.springframework.org/schema/beans" xmlns:context="http://www.springframework.org/schema/context" xmlns:batch="h

批处理作业:

  • 从txt文件读取(50k记录)
  • 将其拆分为10个txt文件(每个5k记录)
  • 为txt中的每条记录创建一个xml文件
  • 问题是没有分割分区(从原始txt读取)的同一个作业需要相同的时间

    我的配置:

    <beans xmlns="http://www.springframework.org/schema/beans"
        xmlns:context="http://www.springframework.org/schema/context"
        xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
        xmlns:util="http://www.springframework.org/schema/util" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://www.springframework.org/schema/batch
            http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
            http://www.springframework.org/schema/beans 
            http://www.springframework.org/schema/beans/spring-beans-3.2.xsd
            http://www.springframework.org/schema/util 
            http://www.springframework.org/schema/util/spring-util-3.2.xsd
            http://www.springframework.org/schema/context
            http://www.springframework.org/schema/context/spring-context-3.0.xsd">
    
        <context:annotation-config/>
    
        <!-- My beans -->   
        <bean id="task" class="com.example.model.MyModel" scope="prototype" />
        <bean id="noInputException" class="com.example.listener.NoWorkFoundStepExecutionListener"/>
        <bean id="idNameListener" class="com.example.listener.IdNameListener">
            <property name="mriw" ref="multiResourceItemWriter"></property>
        </bean>
        <bean id="txtsuffix" class="com.example.filename.TxtSuffix"/>
        <bean id="headerCallback" class="com.example.listener.HeaderCallBack"/>
    
        <bean id="partitioner" 
        class="org.springframework.batch.core.partition.support.MultiResourcePartitioner"
        scope="step">
        <property name="keyName" value="inputFile" />
        <property name="resources" value="file:../xml/xmlfiles/*.txt" />
        </bean>
    
        <bean id="taskExecutor" class="org.springframework.core.task.SyncTaskExecutor" >
        </bean> 
          <!--<bean id="taskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
                    <property name="corePoolSize" value="10" />
                    <property name="maxPoolSize" value="10" />
            </bean>-->
    
        <!-- Batch Job declaration -->
        <batch:job id="txttoxml">
             <batch:step id="step1" next="step2master">
            <batch:tasklet>
                <batch:chunk reader="fileReader" writer="TXTExporter" commit-interval="1000">
                </batch:chunk>
                <batch:listeners>
                    <batch:listener ref="noInputException"/>
                    <batch:listener ref="idNameListener"/>
                </batch:listeners>
            </batch:tasklet>
            </batch:step>
            <batch:step id="step2master">
            <partition step="step2" partitioner="partitioner">
                <handler grid-size="10" task-executor="taskExecutor"/>
            </partition>
        </batch:step>
        </batch:job>
    
        <batch:step id="step2">
            <batch:tasklet>
                <batch:chunk reader="fileReaderstep2" writer="multiResourceItemWriter" commit-interval="1">
                </batch:chunk>
                <batch:listeners>
                    <batch:listener ref="noInputException"/>
                    <batch:listener ref="idNameListener"/>
                </batch:listeners>
            </batch:tasklet>
       </batch:step>
    
        <!-- step1 writer -->
        <bean id="TXTExporter"
        class="org.springframework.batch.item.file.MultiResourceItemWriter">
        <property name="resource" value="file:../xml/xmlfiles/xml"></property>
        <property name="delegate" ref="flatFileItemWriter"></property>
        <property name="itemCountLimitPerResource" value="5000"/>
        <property name="resourceSuffixCreator" ref="xmlsuffix"/>
        </bean>
    
        <bean id="flatFileItemWriter"
        class="org.springframework.batch.item.file.FlatFileItemWriter">
        <property name="lineAggregator">
            <bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
                <property name="delimiter" value="£"/>
                <property name="fieldExtractor">
                    <bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
                    <property name="names" value="ID,TYPE,NAME,DATESTARTED,DATEENDED" />
                    </bean>
                </property>
    
            </bean>
        </property>
        </bean>
    
        <bean id="fileReaderstep2"
            class="org.springframework.batch.item.file.FlatFileItemReader"
            autowire-candidate="false" scope="step">
            <property name="resource" value="#{stepExecutionContext[inputFile]}" />
            <property name="lineMapper" ref="lineMapper"/>        
        </bean>
    
        <bean id="multiResourceItemWriter"
        class="org.springframework.batch.item.file.MultiResourceItemWriter">
            <property name="resource" value="file:../xml/P" />
            <property name="delegate" ref="XMLwriter"/>
            <property name="itemCountLimitPerResource" value="1"/>
        </bean>
    
        <bean id="XMLwriter" 
        class="org.springframework.batch.item.xml.StaxEventItemWriter"> 
            <property name="marshaller" ref="taskUnmarshaller" />
            <property name="rootTagName" value="Task" />
        </bean>
    
        <bean id="taskUnmarshaller"
         class="org.springframework.oxm.xstream.XStreamMarshaller">
            <property name="aliases">
                <map>
                    <entry key="Task"
                    value="com.example.model.MyModel" />
                </map>
            </property>
        </bean>
    
    
      <!-- txt format -->
      <bean id="fileReader"
            class="org.springframework.batch.item.file.FlatFileItemReader">
            <property name="lineMapper" ref="lineMapper"/>
            <property name="resource" value="file:../original.txt"/>
            <property name="strict" value="false"/>
            <!-- <property name="linesToSkip" value="1"/> -->
        </bean>
    
        <bean id="lineMapper"
            class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
            <property name="fieldSetMapper" ref="fieldSetMapper"/>
            <property name="lineTokenizer" ref="lineTokenizer"/>
        </bean>
    
    
        <bean id="lineTokenizer"
            class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
            <property name="delimiter" value="£"/>
            <property name="names" value="ID,TYPE,NAME,DATESTARTED,DATEENDED"/>
            <property name="strict" value="false"></property>
        </bean>
    
        <bean id="fieldSetMapper"
            class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
            <property name="targetType" value="com.example.model.MyModel"/>
        </bean>
    
    </beans>
    
    
    

    有没有办法提高性能?

    您的配置正在使用SycTaskExecutor。这意味着分区一次执行一个分区(而不是并行执行)。我注意到ThreadPoolTaskExecutor被注释掉了。你试过了吗?当我使用ThreadPoolTaskExecutor时,作业在处理第一个txt后被压碎,我得到:分区处理程序返回了一个不成功的步骤。我应该添加transactionManager和JobRepository的配置吗?我看到的一个小问题可能是你在ThreadPoolTaskExecutor方面的问题……确定读卡器步骤的范围。现在它们是单例的,FlatFileItemReader不是线程安全的。如果它们是步骤作用域,则每个分区(因为它是一个完整的步骤实例)都应该获得自己的实例。分区步骤(fileReaderstep2)的读取器已经是步骤作用域,因此我需要查看不成功步骤返回的异常。无论哪种情况,使用SyncTaskExecutor都不会获得更好的性能。