Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/amazon-s3/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用AWS S3 Java将ZipoutStream上载到S3,而不将zip文件(大)临时保存到磁盘_Java_Amazon S3_Aws Lambda_Aws Java Sdk_Zipoutputstream_Aws Java Sdk 2.x - Fatal编程技术网

使用AWS S3 Java将ZipoutStream上载到S3,而不将zip文件(大)临时保存到磁盘

使用AWS S3 Java将ZipoutStream上载到S3,而不将zip文件(大)临时保存到磁盘,java,amazon-s3,aws-lambda,aws-java-sdk,zipoutputstream,aws-java-sdk-2.x,Java,Amazon S3,Aws Lambda,Aws Java Sdk,Zipoutputstream,Aws Java Sdk 2.x,我需要从S3下载照片(不在同一目录中),压缩它们,然后使用AWS S3 Java SDK再次上传到S3。此zip文件大小可以以GBs为单位。目前我使用的是AWS Lambda,它的临时存储限制高达500 MB。所以我不想将ZIP文件保存在磁盘上,而是想将ZIP文件(使用从S3下载的照片动态创建)直接流式传输到S3。我需要使用AWS S3 Java SDK实现此功能。基本思想是使用流操作。这样,您就不会等到ZIP在文件系统上生成,而是在ZIP算法生成任何数据时尽快开始上载。显然,一些数据将被缓冲在

我需要从S3下载照片(不在同一目录中),压缩它们,然后使用AWS S3 Java SDK再次上传到S3。此zip文件大小可以以GBs为单位。目前我使用的是AWS Lambda,它的临时存储限制高达500 MB。所以我不想将ZIP文件保存在磁盘上,而是想将ZIP文件(使用从S3下载的照片动态创建)直接流式传输到S3。我需要使用AWS S3 Java SDK实现此功能。

基本思想是使用流操作。这样,您就不会等到ZIP在文件系统上生成,而是在ZIP算法生成任何数据时尽快开始上载。显然,一些数据将被缓冲在内存中,仍然不需要等待整个ZIP文件在磁盘上生成。我们还将在两个线程中使用流合成和
PipedInputStream
/
PipedOutputStream
:一个用于读取数据,另一个用于压缩内容

以下是一个版本:

final AmazonS3 client=AmazonS3ClientBuilder.defaultClient();
final PipedOutputStream PipedOutputStream=新的PipedOutputStream();
最终PipedInputStream PipedInputStream=新的PipedInputStream(PipedOutStream);
最终螺纹s3In=新螺纹(()->{
try(最终zipoutput流zipoutput流=新的zipoutput流(pipedoutput流)){
S3对象
//这只是列出所有对象的一种方便方式。替换为您自己的逻辑。
.inBucket(客户,“bucket”)
.forEach((S3ObjectSummary objectSummary)->{
试一试{
if(objectSummary.getKey().endsWith(“.png”)){
System.out.println(“处理”+objectSummary.getKey());
最终Zippentry条目=新Zippentry(
UUID.randomuid().toString()+“.png”//I太懒了,无法从文件中提取文件名
//对象摘要
);
zipOutputStream.putNextEntry(条目);
IOUtils.copy(
client.getObject(
objectSummary.getBucketName(),
objectSummary.getKey()
).getObjectContent(),
zipOutputStream
);
zipOutputStream.closeEntry();
}
}捕获(最终异常全部){
all.printStackTrace();
}
});
}捕获(最终异常全部){
all.printStackTrace();
}
});
最终螺纹s3Out=新螺纹(()->{
试一试{
client.putObject(
“另一桶”,
“previews.zip”,
pipedInputStream,
新的ObjectMetadata()
);
pipedInputStream.close();
}捕获(最终异常全部){
all.printStackTrace();
}
});
s3In.start();
s3Out.start();
s3In.join();
s3Out.join();
但是,请注意,它将打印一条警告:

WARNING: No content length specified for stream data.  Stream contents will be buffered in memory and could result in out of memory errors.
这是因为S3需要在上传之前提前知道数据的大小。事先不可能知道结果拉链的大小。你可以试试你的运气,但是代码会更复杂。不过,这个想法是类似的:一个线程应该读取数据并以ZIP流发送内容,另一个线程应该读取压缩的条目并将它们作为多部分上传。上传所有条目(部分)后,应完成多部分

以下是一个示例:

final S3Client=S3Client.create();
final PipedOutputStream PipedOutputStream=新的PipedOutputStream();
最终PipedInputStream PipedInputStream=新的PipedInputStream(PipedOutStream);
最终螺纹s3In=新螺纹(()->{
try(最终zipoutput流zipoutput流=新的zipoutput流(pipedoutput流)){
client.listObjectsV2Paginator(
ListObjectsV2Request
.builder()
.桶(“桶”)
.build()
)
.contents()
.forEach((S3Object)->{
试一试{
if(object.key().endsWith(“.png”)){
System.out.println(“处理”+object.key());
最终Zippentry条目=新Zippentry(
UUID.randomuid().toString()+“.png”//I太懒了,无法从对象中提取文件名
);
zipOutputStream.putNextEntry(条目);
client.getObject(
GetObjectRequest
.builder()
.桶(“桶”)
.key(object.key())
.build(),
ResponseTransformer.toOutputStream(zipOutputStream)
);
zipOutputStream.closeEntry();
}
}捕获(最终异常全部){
all.printStackTrace();
}
});
}捕获(最终异常全部){
all.printStackTrace();
}
});
最终螺纹s3Out=新螺纹(()->{
试一试{
client.putObject(
PutObjectRequest
.builder()
.bucket(“另一个bucket”)
.key(“previews.zip”)
.build(),
RequestBody.fromBytes(
IOUtils.toByteArray(pipedInputStream)
import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class S3OutputStream extends OutputStream {

    private static final Logger LOG = LoggerFactory.getLogger(S3OutputStream.class);

    /** Default chunk size is 10MB */
    protected static final int BUFFER_SIZE = 10000000;

    /** The bucket-name on Amazon S3 */
    private final String bucket;

    /** The path (key) name within the bucket */
    private final String path;

    /** The temporary buffer used for storing the chunks */
    private final byte[] buf;

    /** The position in the buffer */
    private int position;

    /** Amazon S3 client. TODO: support KMS */
    private final AmazonS3 s3Client;

    /** The unique id for this upload */
    private String uploadId;

    /** Collection of the etags for the parts that have been uploaded */
    private final List<PartETag> etags;

    /** indicates whether the stream is still open / valid */
    private boolean open;

    /**
     * Creates a new S3 OutputStream
     * @param s3Client the AmazonS3 client
     * @param bucket name of the bucket
     * @param path path within the bucket
     */
    public S3OutputStream(AmazonS3 s3Client, String bucket, String path) {
        this.s3Client = s3Client;
        this.bucket = bucket;
        this.path = path;
        this.buf = new byte[BUFFER_SIZE];
        this.position = 0;
        this.etags = new ArrayList<>();
        this.open = true;
    }

    /**
     * Write an array to the S3 output stream.
     *
     * @param b the byte-array to append
     */
    @Override
    public void write(byte[] b) {
        write(b,0,b.length);
    }

    /**
     * Writes an array to the S3 Output Stream
     *
     * @param byteArray the array to write
     * @param o the offset into the array
     * @param l the number of bytes to write
     */
    @Override
    public void write(final byte[] byteArray, final int o, final int l) {
        this.assertOpen();
        int ofs = o, len = l;
        int size;
        while (len > (size = this.buf.length - position)) {
            System.arraycopy(byteArray, ofs, this.buf, this.position, size);
            this.position += size;
            flushBufferAndRewind();
            ofs += size;
            len -= size;
        }
        System.arraycopy(byteArray, ofs, this.buf, this.position, len);
        this.position += len;
    }

    /**
     * Flushes the buffer by uploading a part to S3.
     */
    @Override
    public synchronized void flush() {
        this.assertOpen();
        LOG.debug("Flush was called");
    }

    protected void flushBufferAndRewind() {
        if (uploadId == null) {
            LOG.debug("Starting a multipart upload for {}/{}",this.bucket,this.path);
            final InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(this.bucket, this.path)
                    .withCannedACL(CannedAccessControlList.BucketOwnerFullControl);
            InitiateMultipartUploadResult initResponse = s3Client.initiateMultipartUpload(request);
            this.uploadId = initResponse.getUploadId();
        }
        uploadPart();
        this.position = 0;
    }

    protected void uploadPart() {
        LOG.debug("Uploading part {}",this.etags.size());
        UploadPartResult uploadResult = this.s3Client.uploadPart(new UploadPartRequest()
                .withBucketName(this.bucket)
                .withKey(this.path)
                .withUploadId(this.uploadId)
                .withInputStream(new ByteArrayInputStream(buf,0,this.position))
                .withPartNumber(this.etags.size() + 1)
                .withPartSize(this.position));
        this.etags.add(uploadResult.getPartETag());
    }

    @Override
    public void close() {
        if (this.open) {
            this.open = false;
            if (this.uploadId != null) {
                if (this.position > 0) {
                    uploadPart();
                }
                LOG.debug("Completing multipart");
                this.s3Client.completeMultipartUpload(new CompleteMultipartUploadRequest(bucket, path, uploadId, etags));
            }
            else {
                LOG.debug("Uploading object at once to {}/{}",this.bucket,this.path);
                final ObjectMetadata metadata = new ObjectMetadata();
                metadata.setContentLength(this.position);
                final PutObjectRequest request = new PutObjectRequest(this.bucket, this.path, new ByteArrayInputStream(this.buf, 0, this.position), metadata)
                        .withCannedAcl(CannedAccessControlList.BucketOwnerFullControl);
                this.s3Client.putObject(request);
            }
        }
    }

    public void cancel() {
        this.open = false;
        if (this.uploadId != null) {
            LOG.debug("Aborting multipart upload");
            this.s3Client.abortMultipartUpload(new AbortMultipartUploadRequest(this.bucket, this.path, this.uploadId));
        }
    }

    @Override
    public void write(int b) {
        this.assertOpen();
        if (position >= this.buf.length) {
            flushBufferAndRewind();
        }
        this.buf[position++] = (byte)b;
    }

    private void assertOpen() {
        if (!this.open) {
            throw new IllegalStateException("Closed");
        }
    }
}