适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource

适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource,java,spring,amazon-s3,spring-integration,dsl,Java,Spring,Amazon S3,Spring Integration,Dsl,我正在使用一个AmazonS3InboundSynchronizationMessageSource来读取一个S3存储桶的子目录中可能散落的数以百万计的文件,这些文件由类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz组织。理想情况下,我希望轮询和写入,并让同步器记住我在后续轮询中读取的最后一个位置,以检索后续批处理。然而,上述MessageSource不是这样设计的 无论如何,我可以通过选择一个范围和阅读内容来解决这个问题 除此之外,如果我采取一种简单的方法,在第

我正在使用一个
AmazonS3InboundSynchronizationMessageSource
来读取一个S3存储桶的子目录中可能散落的数以百万计的文件,这些文件由
类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz
组织。理想情况下,我希望轮询和写入,并让同步器记住我在后续轮询中读取的最后一个位置,以检索后续批处理。然而,上述
MessageSource
不是这样设计的

无论如何,我可以通过选择一个范围和阅读内容来解决这个问题

除此之外,如果我采取一种简单的方法,在第一次轮询时从一个目录中读取文件;我想在此之后关闭(
System.exit
)(实际上是在下面的注释中进行了一些处理之后)

因此,与这里的问题类似:

我只想投票一次,在第一次投票后退出。(也许有不同的方法?我愿意接受建议)

应用程序引导

更新(2015-09-06)

代码示例

@配置
公共类DataMigrationModule{
私有最终记录器log=LoggerFactory.getLogger(getClass());
@值(${cloud.aws.credentials.accessKey}”)
私有字符串访问密钥;
@值(${cloud.aws.credentials.secretKey}”)
私钥;
@值(${cloud.aws.s3.bucket}”)
私有串桶;
@值(${cloud.aws.s3.max objects per batch:1024}”)
私有int-maxObjectsPerBatch;
@值(${cloud.aws.s3.accept子文件夹:false}”)
私有字符串接受子文件夹;
@值(${cloud.aws.s3.remote directory}”)
私有字符串远程目录;
@值(${cloud.aws.s3.localdirectory:target/s3 dump})
私有字符串本地目录;
@值(${cloud.aws.s3.filename通配符:}”)
私有字符串文件名通配符;
@值(“${app.persistent type:}”)
私有字符串persistentType;
@值(${app.repository type:}”)
私有字符串存储类型;
@值(${app.persistence批处理大小:2500}”)
私有int persistenceBatchSize;
@自动连线
私有上市beanFactory beanFactory;
private final AtomicBoolean invoked=新的AtomicBoolean();
公共日期下一个执行时间(TriggerContext TriggerContext){
返回this.invoked.getAndSet(true)?null:new Date();
}
私有文件输入StreamTransformer unzipTransformer(){
FileToInputStreamTransformer transformer=新建FileToInputStreamTransformer();
transformer.setDeleteFiles(true);
回流变压器;
}
私有类repositoryType(){
试一试{
返回类.forName(repositoryType);
}捕获(ClassNotFoundException cnfe){
log.error(“DataMigrationModule.failure——(未知存储库实现!)”,cnfe;
系统出口(0);
}
返回null;
}
私有类persistentType(){
试一试{
返回类.forName(persistentType);
}捕获(ClassNotFoundException cnfe){
log.error(“DataMigrationModule.failure--(不支持的类型!)”,cnfe);
系统出口(0);
}
返回null;
}
@豆子
public MessageSource amazonS3InboundSynchronizationMessageSource(){
AWSCredentials credentials=new BasicAWSCredentials(this.accessKey,this.secretKey);
AmazonS3InboundSynchronizationMessageSource=新的AmazonS3InboundSynchronizationMessageSource();
messageSource.setCredentials(凭证);
messageSource.setBucket(bucket);
messageSource.setMaxObjectsPerBatch(maxObjectsPerBatch);
messageSource.setAcceptSubFolders(Boolean.valueOf(acceptSubFolders));
messageSource.setRemoteDirectory(remoteDirectory);
如果(!fileNameWildcard.isEmpty()){
messageSource.setFileNameWildcard(fileNameWildcard);
}
String directory=System.getProperty(“java.io.tmpdir”);
如果(!localDirectory.startsWith(“/”){
localDirectory=“/”+localDirectory;
}
如果(!localDirectory.endsWith(“/”){
localDirectory=localDirectory+“/”;
}
directory=directory+localDirectory;
FileUtils.mkdir(目录);
setDirectory(新的LiteralExpression(目录));
返回消息源;
}
@豆子
DirectChannel inputChannel(){
返回新的DirectChannel();
}
@豆子
JdbcRepositoryHandler JdbcRepositoryHandler(){
返回新的JdbcRepositoryHandler(repositoryType(),beanFactory);
}
@豆子
公共集成流(){
//格式化程序:关闭
返回积分流
.来自(
this.amazonS3InboundSynchronizationMessageSource(),
e->e.poller(p->p.trigger(this::nextExecutionTime))
)
.transform(unzipttransformer())
//TODO添加建议的PollableChannel以处理可能的解压缩问题
.split(f->newfilespliter())
.channel(MessageChannels.executor(Executors.newCachedThreadPool()))
.transform(Transformers.fromJson(persistentType()))
//TODO添加建议的PollableChannel以处理可能的转换问题
//@见http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-和组到
.合计(a->
a、 releaseStrategy(g->g.size()==persistenceBatchSize)
.expireGroupsUponCompletion(真)
.sendPartialResultOnExpiry(true)
.groupTimeoutExpression(“大小()ge 2?10000:-1”)
无效的
)
.handle(jdbcRepositoryHandler())
//TODO添加建议的PollableChannel以处理可能的持久性问题,并使用部分批处理重试
.get();
//格式化程序:打开
}
公共类JdbcRepositoryHandler扩展了AbstractReplyProducingMessageHandler{
私有最终记录器log=LoggerFactory.getLogger(getClass());
@抑制警告(“原始类型”)
私有可插入存储库;
公共JdbcRepositoryHandler(类repositoryClass,
@SpringBootApplication
@EnableIntegration
@IntegrationComponentScan
public class DataMigrationApp extends SpringBootServletInitializer {

@Override
protected SpringApplicationBuilder configure(SpringApplicationBuilder application) {
    return application.sources(DataMigrationApp.class);
}

public static void main(String[] args) {
    SpringApplication.run(DataMigrationApp.class, args);
}

}
@Configuration
public class DataMigrationModule {

private final Logger log = LoggerFactory.getLogger(getClass());

@Value("${cloud.aws.credentials.accessKey}")
private String accessKey;

@Value("${cloud.aws.credentials.secretKey}")
private String secretKey;

@Value("${cloud.aws.s3.bucket}")
private String bucket;

@Value("${cloud.aws.s3.max-objects-per-batch:1024}")
private int maxObjectsPerBatch;

@Value("${cloud.aws.s3.accept-subfolders:false}")
private String acceptSubFolders;

@Value("${cloud.aws.s3.remote-directory}")
private String remoteDirectory;

@Value("${cloud.aws.s3.local-directory:target/s3-dump}")
private String localDirectory;

@Value("${cloud.aws.s3.filename-wildcard:}")
private String fileNameWildcard;

@Value("${app.persistent-type:}")
private String persistentType;

@Value("${app.repository-type:}")
private String repositoryType;

@Value("${app.persistence-batch-size:2500}")
private int persistenceBatchSize;

@Autowired
private ListableBeanFactory beanFactory;

private final AtomicBoolean invoked = new AtomicBoolean();

public Date nextExecutionTime(TriggerContext triggerContext) {
    return this.invoked.getAndSet(true) ? null : new Date();
}

private FileToInputStreamTransformer unzipTransformer() {
    FileToInputStreamTransformer transformer = new FileToInputStreamTransformer();
    transformer.setDeleteFiles(true);
    return transformer;
}

private Class<?> repositoryType() {
    try {
        return Class.forName(repositoryType);
    } catch (ClassNotFoundException cnfe) {
        log.error("DataMigrationModule.failure -- (Unknown repository implementation!)", cnfe);
        System.exit(0);
    }
    return null;
}

private Class<?> persistentType() {
    try {
        return Class.forName(persistentType);
    } catch (ClassNotFoundException cnfe) {
        log.error("DataMigrationModule.failure -- (Unsupported type!)", cnfe);
        System.exit(0);
    }
    return null;
}

@Bean
public MessageSource<?> amazonS3InboundSynchronizationMessageSource() {
    AWSCredentials credentials = new BasicAWSCredentials(this.accessKey, this.secretKey);
    AmazonS3InboundSynchronizationMessageSource messageSource = new AmazonS3InboundSynchronizationMessageSource();
    messageSource.setCredentials(credentials);
    messageSource.setBucket(bucket);
    messageSource.setMaxObjectsPerBatch(maxObjectsPerBatch);
    messageSource.setAcceptSubFolders(Boolean.valueOf(acceptSubFolders));
    messageSource.setRemoteDirectory(remoteDirectory);
    if (!fileNameWildcard.isEmpty()) {
        messageSource.setFileNameWildcard(fileNameWildcard);
    }
    String directory = System.getProperty("java.io.tmpdir");
    if (!localDirectory.startsWith("/")) {
        localDirectory = "/" + localDirectory;
    }
    if (!localDirectory.endsWith("/")) {
        localDirectory = localDirectory + "/";
    }
    directory = directory + localDirectory;
    FileUtils.mkdir(directory);
    messageSource.setDirectory(new LiteralExpression(directory));
    return messageSource;
}

@Bean
DirectChannel inputChannel() {
    return new DirectChannel();
}

@Bean 
JdbcRepositoryHandler jdbcRepositoryHandler() {
    return new JdbcRepositoryHandler(repositoryType(), beanFactory);
}

@Bean
public IntegrationFlow flow() {
    // formatter:off
    return IntegrationFlows
            .from(
                    this.amazonS3InboundSynchronizationMessageSource(),
                    e -> e.poller(p -> p.trigger(this::nextExecutionTime))
            )
            .transform(unzipTransformer())
            // TODO add advised PollableChannel to deal with possible decompression issues

            .split(f -> new FileSplitter())
            .channel(MessageChannels.executor(Executors.newCachedThreadPool()))
            .transform(Transformers.fromJson(persistentType()))
            // TODO add advised PollableChannel to deal with possible transform issues

            // @see http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-and-group-to
            .aggregate(a -> 
                            a.releaseStrategy(g -> g.size() == persistenceBatchSize)
                            .expireGroupsUponCompletion(true)
                            .sendPartialResultOnExpiry(true)
                            .groupTimeoutExpression("size() ge 2 ? 10000 : -1")
                            , null
            )
            .handle(jdbcRepositoryHandler())
            // TODO add advised PollableChannel to deal with possible persistence issue and retry with partial batch
            .get();
    // formatter:on
}

public class JdbcRepositoryHandler extends AbstractReplyProducingMessageHandler {

    private final Logger log = LoggerFactory.getLogger(getClass());

    @SuppressWarnings("rawtypes")
    private Insertable repository;

    public JdbcRepositoryHandler(Class<?> repositoryClass, ListableBeanFactory beanFactory) {
        repository = (Insertable<?>) beanFactory.getBean(repositoryClass);
    }

    @Override
    protected Object handleRequestMessage(Message<?> message) {
        List<?> result = null;
        try {
            result = repository.insert((List<?>) message.getPayload());
        } catch (TransactionSystemException | DataAccessException e) {
            // TODO Quite a bit more work to add retry capability for records that didn't cause failure
            log.error("DataMigrationModule.failure -- (Could not persist batch!)", ExceptionUtils.getStackTrace(e));
        }
        return result;
    }

}

public class FileToInputStreamTransformer extends AbstractFilePayloadTransformer<InputStream> {

    @Override
    protected InputStream transformFile(File payload) throws Exception {
        return new GZIPInputStream(new FileInputStream(payload));
    }
}

}
    private final AtomicBoolean invoked = new AtomicBoolean();

    public Date nextExecutionTime(TriggerContext triggerContext) {
        return this.invoked.getAndSet(true) ? null : new Date();
    }

...

    e -> e.poller(p -> p.trigger(this::nextExecutionTime))
.<File, InputStream>transform(p -> new GZIPInputStream(new FileInputStream(p)))
.split(new FileSplitter())