使用AWS S3 Java将ZipoutStream上载到S3，而不将zip文件（大）临时保存到磁盘_Java_Amazon S3_Aws Lambda_Aws Java Sdk_Zipoutputstream_Aws Java Sdk 2.x

使用AWS S3 Java将ZipoutStream上载到S3，而不将zip文件（大）临时保存到磁盘

java amazon-s3 aws-lambda

使用AWS S3 Java将ZipoutStream上载到S3，而不将zip文件（大）临时保存到磁盘,java,amazon-s3,aws-lambda,aws-java-sdk,zipoutputstream,aws-java-sdk-2.x,Java,Amazon S3,Aws Lambda,Aws Java Sdk,Zipoutputstream,Aws Java Sdk 2.x,我需要从S3下载照片（不在同一目录中），压缩它们，然后使用AWS S3 Java SDK再次上传到S3。此zip文件大小可以以GBs为单位。目前我使用的是AWS Lambda，它的临时存储限制高达500 MB。所以我不想将ZIP文件保存在磁盘上，而是想将ZIP文件（使用从S3下载的照片动态创建）直接流式传输到S3。我需要使用AWS S3 Java SDK实现此功能。基本思想是使用流操作。这样，您就不会等到ZIP在文件系统上生成，而是在ZIP算法生成任何数据时尽快开始上载。显然，一些数据将被缓冲在

我需要从S3下载照片（不在同一目录中），压缩它们，然后使用AWS S3 Java SDK再次上传到S3。此zip文件大小可以以GBs为单位。目前我使用的是AWS Lambda，它的临时存储限制高达500 MB。所以我不想将ZIP文件保存在磁盘上，而是想将ZIP文件（使用从S3下载的照片动态创建）直接流式传输到S3。我需要使用AWS S3 Java SDK实现此功能。

基本思想是使用流操作。这样，您就不会等到ZIP在文件系统上生成，而是在ZIP算法生成任何数据时尽快开始上载。显然，一些数据将被缓冲在内存中，仍然不需要等待整个ZIP文件在磁盘上生成。我们还将在两个线程中使用流合成和

PipedInputStream

PipedOutputStream

：一个用于读取数据，另一个用于压缩内容

以下是一个版本：

final AmazonS3 client=AmazonS3ClientBuilder.defaultClient（）；
final PipedOutputStream PipedOutputStream=新的PipedOutputStream（）；
最终PipedInputStream PipedInputStream=新的PipedInputStream（PipedOutStream）；
最终螺纹s3In=新螺纹（（）->{
try（最终zipoutput流zipoutput流=新的zipoutput流（pipedoutput流））{
S3对象
//这只是列出所有对象的一种方便方式。替换为您自己的逻辑。
.inBucket（客户，“bucket”）
.forEach（（S3ObjectSummary objectSummary）->{
试一试{
if（objectSummary.getKey（）.endsWith（“.png”））{
System.out.println（“处理”+objectSummary.getKey（））；
最终Zippentry条目=新Zippentry(
UUID.randomuid（）.toString（）+“.png”//I太懒了，无法从文件中提取文件名
//对象摘要
);
zipOutputStream.putNextEntry（条目）；
IOUtils.copy(
client.getObject(
objectSummary.getBucketName（），
objectSummary.getKey（）
).getObjectContent（），
zipOutputStream
);
zipOutputStream.closeEntry（）；
}
}捕获（最终异常全部）{
all.printStackTrace（）；
}
});
}捕获（最终异常全部）{
all.printStackTrace（）；
}
});
最终螺纹s3Out=新螺纹（（）->{
试一试{
client.putObject(
“另一桶”，
“previews.zip”，
pipedInputStream，
新的ObjectMetadata（）
);
pipedInputStream.close（）；
}捕获（最终异常全部）{
all.printStackTrace（）；
}
});
s3In.start（）；
s3Out.start（）；
s3In.join（）；
s3Out.join（）；

但是，请注意，它将打印一条警告：

WARNING: No content length specified for stream data.  Stream contents will be buffered in memory and could result in out of memory errors.

这是因为S3需要在上传之前提前知道数据的大小。事先不可能知道结果拉链的大小。你可以试试你的运气，但是代码会更复杂。不过，这个想法是类似的：一个线程应该读取数据并以ZIP流发送内容，另一个线程应该读取压缩的条目并将它们作为多部分上传。上传所有条目（部分）后，应完成多部分

以下是一个示例：

final S3Client=S3Client.create（）；
final PipedOutputStream PipedOutputStream=新的PipedOutputStream（）；
最终PipedInputStream PipedInputStream=新的PipedInputStream（PipedOutStream）；
最终螺纹s3In=新螺纹（（）->{
try（最终zipoutput流zipoutput流=新的zipoutput流（pipedoutput流））{
client.listObjectsV2Paginator(
ListObjectsV2Request
.builder（）
.桶（“桶”）
.build（）
)
.contents（）
.forEach（（S3Object）->{
试一试{
if（object.key（）.endsWith（“.png”））{
System.out.println（“处理”+object.key（））；
最终Zippentry条目=新Zippentry(
UUID.randomuid（）.toString（）+“.png”//I太懒了，无法从对象中提取文件名
);
zipOutputStream.putNextEntry（条目）；
client.getObject(
GetObjectRequest
.builder（）
.桶（“桶”）
.key（object.key（））
.build（），
ResponseTransformer.toOutputStream（zipOutputStream）
);
zipOutputStream.closeEntry（）；
}
}捕获（最终异常全部）{
all.printStackTrace（）；
}
});
}捕获（最终异常全部）{
all.printStackTrace（）；
}
});
最终螺纹s3Out=新螺纹（（）->{
试一试{
client.putObject(
PutObjectRequest
.builder（）
.bucket（“另一个bucket”）
.key（“previews.zip”）
.build（），
RequestBody.fromBytes(
IOUtils.toByteArray（pipedInputStream）
import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PartETag;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class S3OutputStream extends OutputStream {

    private static final Logger LOG = LoggerFactory.getLogger(S3OutputStream.class);

    /** Default chunk size is 10MB */
    protected static final int BUFFER_SIZE = 10000000;

    /** The bucket-name on Amazon S3 */
    private final String bucket;

    /** The path (key) name within the bucket */
    private final String path;

    /** The temporary buffer used for storing the chunks */
    private final byte[] buf;

    /** The position in the buffer */
    private int position;

    /** Amazon S3 client. TODO: support KMS */
    private final AmazonS3 s3Client;

    /** The unique id for this upload */
    private String uploadId;

    /** Collection of the etags for the parts that have been uploaded */
    private final List<PartETag> etags;

    /** indicates whether the stream is still open / valid */
    private boolean open;

    /**
     * Creates a new S3 OutputStream
     * @param s3Client the AmazonS3 client
     * @param bucket name of the bucket
     * @param path path within the bucket
     */
    public S3OutputStream(AmazonS3 s3Client, String bucket, String path) {
        this.s3Client = s3Client;
        this.bucket = bucket;
        this.path = path;
        this.buf = new byte[BUFFER_SIZE];
        this.position = 0;
        this.etags = new ArrayList<>();
        this.open = true;
    }

    /**
     * Write an array to the S3 output stream.
     *
     * @param b the byte-array to append
     */
    @Override
    public void write(byte[] b) {
        write(b,0,b.length);
    }

    /**
     * Writes an array to the S3 Output Stream
     *
     * @param byteArray the array to write
     * @param o the offset into the array
     * @param l the number of bytes to write
     */
    @Override
    public void write(final byte[] byteArray, final int o, final int l) {
        this.assertOpen();
        int ofs = o, len = l;
        int size;
        while (len > (size = this.buf.length - position)) {
            System.arraycopy(byteArray, ofs, this.buf, this.position, size);
            this.position += size;
            flushBufferAndRewind();
            ofs += size;
            len -= size;
        }
        System.arraycopy(byteArray, ofs, this.buf, this.position, len);
        this.position += len;
    }

    /**
     * Flushes the buffer by uploading a part to S3.
     */
    @Override
    public synchronized void flush() {
        this.assertOpen();
        LOG.debug("Flush was called");
    }

    protected void flushBufferAndRewind() {
        if (uploadId == null) {
            LOG.debug("Starting a multipart upload for {}/{}",this.bucket,this.path);
            final InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(this.bucket, this.path)
                    .withCannedACL(CannedAccessControlList.BucketOwnerFullControl);
            InitiateMultipartUploadResult initResponse = s3Client.initiateMultipartUpload(request);
            this.uploadId = initResponse.getUploadId();
        }
        uploadPart();
        this.position = 0;
    }

    protected void uploadPart() {
        LOG.debug("Uploading part {}",this.etags.size());
        UploadPartResult uploadResult = this.s3Client.uploadPart(new UploadPartRequest()
                .withBucketName(this.bucket)
                .withKey(this.path)
                .withUploadId(this.uploadId)
                .withInputStream(new ByteArrayInputStream(buf,0,this.position))
                .withPartNumber(this.etags.size() + 1)
                .withPartSize(this.position));
        this.etags.add(uploadResult.getPartETag());
    }

    @Override
    public void close() {
        if (this.open) {
            this.open = false;
            if (this.uploadId != null) {
                if (this.position > 0) {
                    uploadPart();
                }
                LOG.debug("Completing multipart");
                this.s3Client.completeMultipartUpload(new CompleteMultipartUploadRequest(bucket, path, uploadId, etags));
            }
            else {
                LOG.debug("Uploading object at once to {}/{}",this.bucket,this.path);
                final ObjectMetadata metadata = new ObjectMetadata();
                metadata.setContentLength(this.position);
                final PutObjectRequest request = new PutObjectRequest(this.bucket, this.path, new ByteArrayInputStream(this.buf, 0, this.position), metadata)
                        .withCannedAcl(CannedAccessControlList.BucketOwnerFullControl);
                this.s3Client.putObject(request);
            }
        }
    }

    public void cancel() {
        this.open = false;
        if (this.uploadId != null) {
            LOG.debug("Aborting multipart upload");
            this.s3Client.abortMultipartUpload(new AbortMultipartUploadRequest(this.bucket, this.path, this.uploadId));
        }
    }

    @Override
    public void write(int b) {
        this.assertOpen();
        if (position >= this.buf.length) {
            flushBufferAndRewind();
        }
        this.buf[position++] = (byte)b;
    }

    private void assertOpen() {
        if (!this.open) {
            throw new IllegalStateException("Closed");
        }
    }
}