适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource_Java_Spring_Amazon S3_Spring Integration_Dsl

适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource

java spring amazon-s3 spring-integration

适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource,java,spring,amazon-s3,spring-integration,dsl,Java,Spring,Amazon S3,Spring Integration,Dsl,我正在使用一个AmazonS3InboundSynchronizationMessageSource来读取一个S3存储桶的子目录中可能散落的数以百万计的文件，这些文件由类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz组织。理想情况下，我希望轮询和写入，并让同步器记住我在后续轮询中读取的最后一个位置，以检索后续批处理。然而，上述MessageSource不是这样设计的无论如何，我可以通过选择一个范围和阅读内容来解决这个问题除此之外，如果我采取一种简单的方法，在第

我正在使用一个

AmazonS3InboundSynchronizationMessageSource

来读取一个S3存储桶的子目录中可能散落的数以百万计的文件，这些文件由

类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz

组织。理想情况下，我希望轮询和写入，并让同步器记住我在后续轮询中读取的最后一个位置，以检索后续批处理。然而，上述

MessageSource

不是这样设计的

无论如何，我可以通过选择一个范围和阅读内容来解决这个问题

除此之外，如果我采取一种简单的方法，在第一次轮询时从一个目录中读取文件；我想在此之后关闭（

System.exit

）（实际上是在下面的注释中进行了一些处理之后）

因此，与这里的问题类似：

我只想投票一次，在第一次投票后退出。（也许有不同的方法？我愿意接受建议）

应用程序引导

更新（2015-09-06）

代码示例

@配置
公共类DataMigrationModule{
私有最终记录器log=LoggerFactory.getLogger（getClass（））；
@值（${cloud.aws.credentials.accessKey}”）
私有字符串访问密钥；
@值（${cloud.aws.credentials.secretKey}”）
私钥；
@值（${cloud.aws.s3.bucket}”）
私有串桶；
@值（${cloud.aws.s3.max objects per batch:1024}”）
私有int-maxObjectsPerBatch；
@值（${cloud.aws.s3.accept子文件夹：false}”）
私有字符串接受子文件夹；
@值（${cloud.aws.s3.remote directory}”）
私有字符串远程目录；
@值（${cloud.aws.s3.localdirectory:target/s3 dump}）
私有字符串本地目录；
@值（${cloud.aws.s3.filename通配符：}”）
私有字符串文件名通配符；
@值（“${app.persistent type:}”）
私有字符串persistentType；
@值（${app.repository type:}”）
私有字符串存储类型；
@值（${app.persistence批处理大小：2500}”）
私有int persistenceBatchSize；
@自动连线
私有上市beanFactory beanFactory；
private final AtomicBoolean invoked=新的AtomicBoolean（）；
公共日期下一个执行时间（TriggerContext TriggerContext）{
返回this.invoked.getAndSet（true）？null:new Date（）；
}
私有文件输入StreamTransformer unzipTransformer（）{
FileToInputStreamTransformer transformer=新建FileToInputStreamTransformer（）；
transformer.setDeleteFiles（true）；
回流变压器；
}
私有类repositoryType（）{
试一试{
返回类.forName（repositoryType）；
}捕获（ClassNotFoundException cnfe）{
log.error（“DataMigrationModule.failure——（未知存储库实现！）”，cnfe；
系统出口（0）；
}
返回null；
}
私有类persistentType（）{
试一试{
返回类.forName（persistentType）；
}捕获（ClassNotFoundException cnfe）{
log.error（“DataMigrationModule.failure--（不支持的类型！）”，cnfe）；
系统出口（0）；
}
返回null；
}
@豆子
public MessageSource amazonS3InboundSynchronizationMessageSource（）{
AWSCredentials credentials=new BasicAWSCredentials（this.accessKey，this.secretKey）；
AmazonS3InboundSynchronizationMessageSource=新的AmazonS3InboundSynchronizationMessageSource（）；
messageSource.setCredentials（凭证）；
messageSource.setBucket（bucket）；
messageSource.setMaxObjectsPerBatch（maxObjectsPerBatch）；
messageSource.setAcceptSubFolders（Boolean.valueOf（acceptSubFolders））；
messageSource.setRemoteDirectory（remoteDirectory）；
如果（！fileNameWildcard.isEmpty（））{
messageSource.setFileNameWildcard（fileNameWildcard）；
}
String directory=System.getProperty（“java.io.tmpdir”）；
如果（！localDirectory.startsWith（“/”）{
localDirectory=“/”+localDirectory；
}
如果（！localDirectory.endsWith（“/”）{
localDirectory=localDirectory+“/”；
}
directory=directory+localDirectory；
FileUtils.mkdir（目录）；
setDirectory（新的LiteralExpression（目录））；
返回消息源；
}
@豆子
DirectChannel inputChannel（）{
返回新的DirectChannel（）；
}
@豆子
JdbcRepositoryHandler JdbcRepositoryHandler（）{
返回新的JdbcRepositoryHandler（repositoryType（），beanFactory）；
}
@豆子
公共集成流（）{
//格式化程序：关闭
返回积分流
.来自(
this.amazonS3InboundSynchronizationMessageSource（），
e->e.poller（p->p.trigger（this:：nextExecutionTime））
)
.transform（unzipttransformer（））
//TODO添加建议的PollableChannel以处理可能的解压缩问题
.split（f->newfilespliter（））
.channel（MessageChannels.executor（Executors.newCachedThreadPool（）））
.transform（Transformers.fromJson（persistentType（）））
//TODO添加建议的PollableChannel以处理可能的转换问题
//@见http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-和组到
.合计（a->
a、 releaseStrategy（g->g.size（）==persistenceBatchSize）
.expireGroupsUponCompletion（真）
.sendPartialResultOnExpiry（true）
.groupTimeoutExpression（“大小（）ge 2？10000:-1”）
无效的
)
.handle（jdbcRepositoryHandler（））
//TODO添加建议的PollableChannel以处理可能的持久性问题，并使用部分批处理重试
.get（）；
//格式化程序：打开
}
公共类JdbcRepositoryHandler扩展了AbstractReplyProducingMessageHandler{
私有最终记录器log=LoggerFactory.getLogger（getClass（））；
@抑制警告（“原始类型”）
私有可插入存储库；
公共JdbcRepositoryHandler（类repositoryClass，
@SpringBootApplication
@EnableIntegration
@IntegrationComponentScan
public class DataMigrationApp extends SpringBootServletInitializer {

@Override
protected SpringApplicationBuilder configure(SpringApplicationBuilder application) {
    return application.sources(DataMigrationApp.class);
}

public static void main(String[] args) {
    SpringApplication.run(DataMigrationApp.class, args);
}

}

@Configuration
public class DataMigrationModule {

private final Logger log = LoggerFactory.getLogger(getClass());

@Value("${cloud.aws.credentials.accessKey}")
private String accessKey;

@Value("${cloud.aws.credentials.secretKey}")
private String secretKey;

@Value("${cloud.aws.s3.bucket}")
private String bucket;

@Value("${cloud.aws.s3.max-objects-per-batch:1024}")
private int maxObjectsPerBatch;

@Value("${cloud.aws.s3.accept-subfolders:false}")
private String acceptSubFolders;

@Value("${cloud.aws.s3.remote-directory}")
private String remoteDirectory;

@Value("${cloud.aws.s3.local-directory:target/s3-dump}")
private String localDirectory;

@Value("${cloud.aws.s3.filename-wildcard:}")
private String fileNameWildcard;

@Value("${app.persistent-type:}")
private String persistentType;

@Value("${app.repository-type:}")
private String repositoryType;

@Value("${app.persistence-batch-size:2500}")
private int persistenceBatchSize;

@Autowired
private ListableBeanFactory beanFactory;

private final AtomicBoolean invoked = new AtomicBoolean();

public Date nextExecutionTime(TriggerContext triggerContext) {
    return this.invoked.getAndSet(true) ? null : new Date();
}

private FileToInputStreamTransformer unzipTransformer() {
    FileToInputStreamTransformer transformer = new FileToInputStreamTransformer();
    transformer.setDeleteFiles(true);
    return transformer;
}

private Class<?> repositoryType() {
    try {
        return Class.forName(repositoryType);
    } catch (ClassNotFoundException cnfe) {
        log.error("DataMigrationModule.failure -- (Unknown repository implementation!)", cnfe);
        System.exit(0);
    }
    return null;
}

private Class<?> persistentType() {
    try {
        return Class.forName(persistentType);
    } catch (ClassNotFoundException cnfe) {
        log.error("DataMigrationModule.failure -- (Unsupported type!)", cnfe);
        System.exit(0);
    }
    return null;
}

@Bean
public MessageSource<?> amazonS3InboundSynchronizationMessageSource() {
    AWSCredentials credentials = new BasicAWSCredentials(this.accessKey, this.secretKey);
    AmazonS3InboundSynchronizationMessageSource messageSource = new AmazonS3InboundSynchronizationMessageSource();
    messageSource.setCredentials(credentials);
    messageSource.setBucket(bucket);
    messageSource.setMaxObjectsPerBatch(maxObjectsPerBatch);
    messageSource.setAcceptSubFolders(Boolean.valueOf(acceptSubFolders));
    messageSource.setRemoteDirectory(remoteDirectory);
    if (!fileNameWildcard.isEmpty()) {
        messageSource.setFileNameWildcard(fileNameWildcard);
    }
    String directory = System.getProperty("java.io.tmpdir");
    if (!localDirectory.startsWith("/")) {
        localDirectory = "/" + localDirectory;
    }
    if (!localDirectory.endsWith("/")) {
        localDirectory = localDirectory + "/";
    }
    directory = directory + localDirectory;
    FileUtils.mkdir(directory);
    messageSource.setDirectory(new LiteralExpression(directory));
    return messageSource;
}

@Bean
DirectChannel inputChannel() {
    return new DirectChannel();
}

@Bean 
JdbcRepositoryHandler jdbcRepositoryHandler() {
    return new JdbcRepositoryHandler(repositoryType(), beanFactory);
}

@Bean
public IntegrationFlow flow() {
    // formatter:off
    return IntegrationFlows
            .from(
                    this.amazonS3InboundSynchronizationMessageSource(),
                    e -> e.poller(p -> p.trigger(this::nextExecutionTime))
            )
            .transform(unzipTransformer())
            // TODO add advised PollableChannel to deal with possible decompression issues

            .split(f -> new FileSplitter())
            .channel(MessageChannels.executor(Executors.newCachedThreadPool()))
            .transform(Transformers.fromJson(persistentType()))
            // TODO add advised PollableChannel to deal with possible transform issues

            // @see http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-and-group-to
            .aggregate(a -> 
                            a.releaseStrategy(g -> g.size() == persistenceBatchSize)
                            .expireGroupsUponCompletion(true)
                            .sendPartialResultOnExpiry(true)
                            .groupTimeoutExpression("size() ge 2 ? 10000 : -1")
                            , null
            )
            .handle(jdbcRepositoryHandler())
            // TODO add advised PollableChannel to deal with possible persistence issue and retry with partial batch
            .get();
    // formatter:on
}

public class JdbcRepositoryHandler extends AbstractReplyProducingMessageHandler {

    private final Logger log = LoggerFactory.getLogger(getClass());

    @SuppressWarnings("rawtypes")
    private Insertable repository;

    public JdbcRepositoryHandler(Class<?> repositoryClass, ListableBeanFactory beanFactory) {
        repository = (Insertable<?>) beanFactory.getBean(repositoryClass);
    }

    @Override
    protected Object handleRequestMessage(Message<?> message) {
        List<?> result = null;
        try {
            result = repository.insert((List<?>) message.getPayload());
        } catch (TransactionSystemException | DataAccessException e) {
            // TODO Quite a bit more work to add retry capability for records that didn't cause failure
            log.error("DataMigrationModule.failure -- (Could not persist batch!)", ExceptionUtils.getStackTrace(e));
        }
        return result;
    }

}

public class FileToInputStreamTransformer extends AbstractFilePayloadTransformer<InputStream> {

    @Override
    protected InputStream transformFile(File payload) throws Exception {
        return new GZIPInputStream(new FileInputStream(payload));
    }
}

}

    private final AtomicBoolean invoked = new AtomicBoolean();

    public Date nextExecutionTime(TriggerContext triggerContext) {
        return this.invoked.getAndSet(true) ? null : new Date();
    }

...

    e -> e.poller(p -> p.trigger(this::nextExecutionTime))

.<File, InputStream>transform(p -> new GZIPInputStream(new FileInputStream(p)))

.split(new FileSplitter())