适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource
我正在使用一个适当使用Spring集成Java DSL plus AmazonS3InboundSynchronizationMessageSource,java,spring,amazon-s3,spring-integration,dsl,Java,Spring,Amazon S3,Spring Integration,Dsl,我正在使用一个AmazonS3InboundSynchronizationMessageSource来读取一个S3存储桶的子目录中可能散落的数以百万计的文件,这些文件由类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz组织。理想情况下,我希望轮询和写入,并让同步器记住我在后续轮询中读取的最后一个位置,以检索后续批处理。然而,上述MessageSource不是这样设计的 无论如何,我可以通过选择一个范围和阅读内容来解决这个问题 除此之外,如果我采取一种简单的方法,在第
AmazonS3InboundSynchronizationMessageSource
来读取一个S3存储桶的子目录中可能散落的数以百万计的文件,这些文件由类型>>年>>月>>日>>小时>{filename}-{uniqueid}.gz
组织。理想情况下,我希望轮询和写入,并让同步器记住我在后续轮询中读取的最后一个位置,以检索后续批处理。然而,上述MessageSource
不是这样设计的
无论如何,我可以通过选择一个范围和阅读内容来解决这个问题
除此之外,如果我采取一种简单的方法,在第一次轮询时从一个目录中读取文件;我想在此之后关闭(System.exit
)(实际上是在下面的注释中进行了一些处理之后)
因此,与这里的问题类似:
我只想投票一次,在第一次投票后退出。(也许有不同的方法?我愿意接受建议)
应用程序引导
更新(2015-09-06)
代码示例
@配置
公共类DataMigrationModule{
私有最终记录器log=LoggerFactory.getLogger(getClass());
@值(${cloud.aws.credentials.accessKey}”)
私有字符串访问密钥;
@值(${cloud.aws.credentials.secretKey}”)
私钥;
@值(${cloud.aws.s3.bucket}”)
私有串桶;
@值(${cloud.aws.s3.max objects per batch:1024}”)
私有int-maxObjectsPerBatch;
@值(${cloud.aws.s3.accept子文件夹:false}”)
私有字符串接受子文件夹;
@值(${cloud.aws.s3.remote directory}”)
私有字符串远程目录;
@值(${cloud.aws.s3.localdirectory:target/s3 dump})
私有字符串本地目录;
@值(${cloud.aws.s3.filename通配符:}”)
私有字符串文件名通配符;
@值(“${app.persistent type:}”)
私有字符串persistentType;
@值(${app.repository type:}”)
私有字符串存储类型;
@值(${app.persistence批处理大小:2500}”)
私有int persistenceBatchSize;
@自动连线
私有上市beanFactory beanFactory;
private final AtomicBoolean invoked=新的AtomicBoolean();
公共日期下一个执行时间(TriggerContext TriggerContext){
返回this.invoked.getAndSet(true)?null:new Date();
}
私有文件输入StreamTransformer unzipTransformer(){
FileToInputStreamTransformer transformer=新建FileToInputStreamTransformer();
transformer.setDeleteFiles(true);
回流变压器;
}
私有类repositoryType(){
试一试{
返回类.forName(repositoryType);
}捕获(ClassNotFoundException cnfe){
log.error(“DataMigrationModule.failure——(未知存储库实现!)”,cnfe;
系统出口(0);
}
返回null;
}
私有类persistentType(){
试一试{
返回类.forName(persistentType);
}捕获(ClassNotFoundException cnfe){
log.error(“DataMigrationModule.failure--(不支持的类型!)”,cnfe);
系统出口(0);
}
返回null;
}
@豆子
public MessageSource amazonS3InboundSynchronizationMessageSource(){
AWSCredentials credentials=new BasicAWSCredentials(this.accessKey,this.secretKey);
AmazonS3InboundSynchronizationMessageSource=新的AmazonS3InboundSynchronizationMessageSource();
messageSource.setCredentials(凭证);
messageSource.setBucket(bucket);
messageSource.setMaxObjectsPerBatch(maxObjectsPerBatch);
messageSource.setAcceptSubFolders(Boolean.valueOf(acceptSubFolders));
messageSource.setRemoteDirectory(remoteDirectory);
如果(!fileNameWildcard.isEmpty()){
messageSource.setFileNameWildcard(fileNameWildcard);
}
String directory=System.getProperty(“java.io.tmpdir”);
如果(!localDirectory.startsWith(“/”){
localDirectory=“/”+localDirectory;
}
如果(!localDirectory.endsWith(“/”){
localDirectory=localDirectory+“/”;
}
directory=directory+localDirectory;
FileUtils.mkdir(目录);
setDirectory(新的LiteralExpression(目录));
返回消息源;
}
@豆子
DirectChannel inputChannel(){
返回新的DirectChannel();
}
@豆子
JdbcRepositoryHandler JdbcRepositoryHandler(){
返回新的JdbcRepositoryHandler(repositoryType(),beanFactory);
}
@豆子
公共集成流(){
//格式化程序:关闭
返回积分流
.来自(
this.amazonS3InboundSynchronizationMessageSource(),
e->e.poller(p->p.trigger(this::nextExecutionTime))
)
.transform(unzipttransformer())
//TODO添加建议的PollableChannel以处理可能的解压缩问题
.split(f->newfilespliter())
.channel(MessageChannels.executor(Executors.newCachedThreadPool()))
.transform(Transformers.fromJson(persistentType()))
//TODO添加建议的PollableChannel以处理可能的转换问题
//@见http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-和组到
.合计(a->
a、 releaseStrategy(g->g.size()==persistenceBatchSize)
.expireGroupsUponCompletion(真)
.sendPartialResultOnExpiry(true)
.groupTimeoutExpression(“大小()ge 2?10000:-1”)
无效的
)
.handle(jdbcRepositoryHandler())
//TODO添加建议的PollableChannel以处理可能的持久性问题,并使用部分批处理重试
.get();
//格式化程序:打开
}
公共类JdbcRepositoryHandler扩展了AbstractReplyProducingMessageHandler{
私有最终记录器log=LoggerFactory.getLogger(getClass());
@抑制警告(“原始类型”)
私有可插入存储库;
公共JdbcRepositoryHandler(类repositoryClass,
@SpringBootApplication
@EnableIntegration
@IntegrationComponentScan
public class DataMigrationApp extends SpringBootServletInitializer {
@Override
protected SpringApplicationBuilder configure(SpringApplicationBuilder application) {
return application.sources(DataMigrationApp.class);
}
public static void main(String[] args) {
SpringApplication.run(DataMigrationApp.class, args);
}
}
@Configuration
public class DataMigrationModule {
private final Logger log = LoggerFactory.getLogger(getClass());
@Value("${cloud.aws.credentials.accessKey}")
private String accessKey;
@Value("${cloud.aws.credentials.secretKey}")
private String secretKey;
@Value("${cloud.aws.s3.bucket}")
private String bucket;
@Value("${cloud.aws.s3.max-objects-per-batch:1024}")
private int maxObjectsPerBatch;
@Value("${cloud.aws.s3.accept-subfolders:false}")
private String acceptSubFolders;
@Value("${cloud.aws.s3.remote-directory}")
private String remoteDirectory;
@Value("${cloud.aws.s3.local-directory:target/s3-dump}")
private String localDirectory;
@Value("${cloud.aws.s3.filename-wildcard:}")
private String fileNameWildcard;
@Value("${app.persistent-type:}")
private String persistentType;
@Value("${app.repository-type:}")
private String repositoryType;
@Value("${app.persistence-batch-size:2500}")
private int persistenceBatchSize;
@Autowired
private ListableBeanFactory beanFactory;
private final AtomicBoolean invoked = new AtomicBoolean();
public Date nextExecutionTime(TriggerContext triggerContext) {
return this.invoked.getAndSet(true) ? null : new Date();
}
private FileToInputStreamTransformer unzipTransformer() {
FileToInputStreamTransformer transformer = new FileToInputStreamTransformer();
transformer.setDeleteFiles(true);
return transformer;
}
private Class<?> repositoryType() {
try {
return Class.forName(repositoryType);
} catch (ClassNotFoundException cnfe) {
log.error("DataMigrationModule.failure -- (Unknown repository implementation!)", cnfe);
System.exit(0);
}
return null;
}
private Class<?> persistentType() {
try {
return Class.forName(persistentType);
} catch (ClassNotFoundException cnfe) {
log.error("DataMigrationModule.failure -- (Unsupported type!)", cnfe);
System.exit(0);
}
return null;
}
@Bean
public MessageSource<?> amazonS3InboundSynchronizationMessageSource() {
AWSCredentials credentials = new BasicAWSCredentials(this.accessKey, this.secretKey);
AmazonS3InboundSynchronizationMessageSource messageSource = new AmazonS3InboundSynchronizationMessageSource();
messageSource.setCredentials(credentials);
messageSource.setBucket(bucket);
messageSource.setMaxObjectsPerBatch(maxObjectsPerBatch);
messageSource.setAcceptSubFolders(Boolean.valueOf(acceptSubFolders));
messageSource.setRemoteDirectory(remoteDirectory);
if (!fileNameWildcard.isEmpty()) {
messageSource.setFileNameWildcard(fileNameWildcard);
}
String directory = System.getProperty("java.io.tmpdir");
if (!localDirectory.startsWith("/")) {
localDirectory = "/" + localDirectory;
}
if (!localDirectory.endsWith("/")) {
localDirectory = localDirectory + "/";
}
directory = directory + localDirectory;
FileUtils.mkdir(directory);
messageSource.setDirectory(new LiteralExpression(directory));
return messageSource;
}
@Bean
DirectChannel inputChannel() {
return new DirectChannel();
}
@Bean
JdbcRepositoryHandler jdbcRepositoryHandler() {
return new JdbcRepositoryHandler(repositoryType(), beanFactory);
}
@Bean
public IntegrationFlow flow() {
// formatter:off
return IntegrationFlows
.from(
this.amazonS3InboundSynchronizationMessageSource(),
e -> e.poller(p -> p.trigger(this::nextExecutionTime))
)
.transform(unzipTransformer())
// TODO add advised PollableChannel to deal with possible decompression issues
.split(f -> new FileSplitter())
.channel(MessageChannels.executor(Executors.newCachedThreadPool()))
.transform(Transformers.fromJson(persistentType()))
// TODO add advised PollableChannel to deal with possible transform issues
// @see http://docs.spring.io/spring-integration/reference/html/messaging-routing-chapter.html#agg-and-group-to
.aggregate(a ->
a.releaseStrategy(g -> g.size() == persistenceBatchSize)
.expireGroupsUponCompletion(true)
.sendPartialResultOnExpiry(true)
.groupTimeoutExpression("size() ge 2 ? 10000 : -1")
, null
)
.handle(jdbcRepositoryHandler())
// TODO add advised PollableChannel to deal with possible persistence issue and retry with partial batch
.get();
// formatter:on
}
public class JdbcRepositoryHandler extends AbstractReplyProducingMessageHandler {
private final Logger log = LoggerFactory.getLogger(getClass());
@SuppressWarnings("rawtypes")
private Insertable repository;
public JdbcRepositoryHandler(Class<?> repositoryClass, ListableBeanFactory beanFactory) {
repository = (Insertable<?>) beanFactory.getBean(repositoryClass);
}
@Override
protected Object handleRequestMessage(Message<?> message) {
List<?> result = null;
try {
result = repository.insert((List<?>) message.getPayload());
} catch (TransactionSystemException | DataAccessException e) {
// TODO Quite a bit more work to add retry capability for records that didn't cause failure
log.error("DataMigrationModule.failure -- (Could not persist batch!)", ExceptionUtils.getStackTrace(e));
}
return result;
}
}
public class FileToInputStreamTransformer extends AbstractFilePayloadTransformer<InputStream> {
@Override
protected InputStream transformFile(File payload) throws Exception {
return new GZIPInputStream(new FileInputStream(payload));
}
}
}
private final AtomicBoolean invoked = new AtomicBoolean();
public Date nextExecutionTime(TriggerContext triggerContext) {
return this.invoked.getAndSet(true) ? null : new Date();
}
...
e -> e.poller(p -> p.trigger(this::nextExecutionTime))
.<File, InputStream>transform(p -> new GZIPInputStream(new FileInputStream(p)))
.split(new FileSplitter())