MultiResourcePartitioning的速度与spring批处理中没有分区的速度相同
批处理作业:MultiResourcePartitioning的速度与spring批处理中没有分区的速度相同,spring,spring-batch,Spring,Spring Batch,批处理作业: 从txt文件读取(50k记录) 将其拆分为10个txt文件(每个5k记录) 为txt中的每条记录创建一个xml文件 问题是没有分割分区(从原始txt读取)的同一个作业需要相同的时间 我的配置: <beans xmlns="http://www.springframework.org/schema/beans" xmlns:context="http://www.springframework.org/schema/context" xmlns:batch="h
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
xmlns:util="http://www.springframework.org/schema/util" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd
http://www.springframework.org/schema/util
http://www.springframework.org/schema/util/spring-util-3.2.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
<context:annotation-config/>
<!-- My beans -->
<bean id="task" class="com.example.model.MyModel" scope="prototype" />
<bean id="noInputException" class="com.example.listener.NoWorkFoundStepExecutionListener"/>
<bean id="idNameListener" class="com.example.listener.IdNameListener">
<property name="mriw" ref="multiResourceItemWriter"></property>
</bean>
<bean id="txtsuffix" class="com.example.filename.TxtSuffix"/>
<bean id="headerCallback" class="com.example.listener.HeaderCallBack"/>
<bean id="partitioner"
class="org.springframework.batch.core.partition.support.MultiResourcePartitioner"
scope="step">
<property name="keyName" value="inputFile" />
<property name="resources" value="file:../xml/xmlfiles/*.txt" />
</bean>
<bean id="taskExecutor" class="org.springframework.core.task.SyncTaskExecutor" >
</bean>
<!--<bean id="taskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="corePoolSize" value="10" />
<property name="maxPoolSize" value="10" />
</bean>-->
<!-- Batch Job declaration -->
<batch:job id="txttoxml">
<batch:step id="step1" next="step2master">
<batch:tasklet>
<batch:chunk reader="fileReader" writer="TXTExporter" commit-interval="1000">
</batch:chunk>
<batch:listeners>
<batch:listener ref="noInputException"/>
<batch:listener ref="idNameListener"/>
</batch:listeners>
</batch:tasklet>
</batch:step>
<batch:step id="step2master">
<partition step="step2" partitioner="partitioner">
<handler grid-size="10" task-executor="taskExecutor"/>
</partition>
</batch:step>
</batch:job>
<batch:step id="step2">
<batch:tasklet>
<batch:chunk reader="fileReaderstep2" writer="multiResourceItemWriter" commit-interval="1">
</batch:chunk>
<batch:listeners>
<batch:listener ref="noInputException"/>
<batch:listener ref="idNameListener"/>
</batch:listeners>
</batch:tasklet>
</batch:step>
<!-- step1 writer -->
<bean id="TXTExporter"
class="org.springframework.batch.item.file.MultiResourceItemWriter">
<property name="resource" value="file:../xml/xmlfiles/xml"></property>
<property name="delegate" ref="flatFileItemWriter"></property>
<property name="itemCountLimitPerResource" value="5000"/>
<property name="resourceSuffixCreator" ref="xmlsuffix"/>
</bean>
<bean id="flatFileItemWriter"
class="org.springframework.batch.item.file.FlatFileItemWriter">
<property name="lineAggregator">
<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
<property name="delimiter" value="£"/>
<property name="fieldExtractor">
<bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
<property name="names" value="ID,TYPE,NAME,DATESTARTED,DATEENDED" />
</bean>
</property>
</bean>
</property>
</bean>
<bean id="fileReaderstep2"
class="org.springframework.batch.item.file.FlatFileItemReader"
autowire-candidate="false" scope="step">
<property name="resource" value="#{stepExecutionContext[inputFile]}" />
<property name="lineMapper" ref="lineMapper"/>
</bean>
<bean id="multiResourceItemWriter"
class="org.springframework.batch.item.file.MultiResourceItemWriter">
<property name="resource" value="file:../xml/P" />
<property name="delegate" ref="XMLwriter"/>
<property name="itemCountLimitPerResource" value="1"/>
</bean>
<bean id="XMLwriter"
class="org.springframework.batch.item.xml.StaxEventItemWriter">
<property name="marshaller" ref="taskUnmarshaller" />
<property name="rootTagName" value="Task" />
</bean>
<bean id="taskUnmarshaller"
class="org.springframework.oxm.xstream.XStreamMarshaller">
<property name="aliases">
<map>
<entry key="Task"
value="com.example.model.MyModel" />
</map>
</property>
</bean>
<!-- txt format -->
<bean id="fileReader"
class="org.springframework.batch.item.file.FlatFileItemReader">
<property name="lineMapper" ref="lineMapper"/>
<property name="resource" value="file:../original.txt"/>
<property name="strict" value="false"/>
<!-- <property name="linesToSkip" value="1"/> -->
</bean>
<bean id="lineMapper"
class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="fieldSetMapper" ref="fieldSetMapper"/>
<property name="lineTokenizer" ref="lineTokenizer"/>
</bean>
<bean id="lineTokenizer"
class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<property name="delimiter" value="£"/>
<property name="names" value="ID,TYPE,NAME,DATESTARTED,DATEENDED"/>
<property name="strict" value="false"></property>
</bean>
<bean id="fieldSetMapper"
class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
<property name="targetType" value="com.example.model.MyModel"/>
</bean>
</beans>
有没有办法提高性能?您的配置正在使用SycTaskExecutor。这意味着分区一次执行一个分区(而不是并行执行)。我注意到ThreadPoolTaskExecutor被注释掉了。你试过了吗?当我使用ThreadPoolTaskExecutor时,作业在处理第一个txt后被压碎,我得到:分区处理程序返回了一个不成功的步骤。我应该添加transactionManager和JobRepository的配置吗?我看到的一个小问题可能是你在ThreadPoolTaskExecutor方面的问题……确定读卡器步骤的范围。现在它们是单例的,FlatFileItemReader不是线程安全的。如果它们是步骤作用域,则每个分区(因为它是一个完整的步骤实例)都应该获得自己的实例。分区步骤(fileReaderstep2)的读取器已经是步骤作用域,因此我需要查看不成功步骤返回的异常。无论哪种情况,使用SyncTaskExecutor都不会获得更好的性能。