DynamoDB并行扫描-Java同步

DynamoDB并行扫描-Java同步,java,amazon-dynamodb,Java,Amazon Dynamodb,我尝试使用DynamoDB并行扫描示例: http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/LowLevelJavaScanning.html 我有200000件物品,我进行了顺序代码扫描,并根据我的使用情况对其进行了轻微修改: Map<String, AttributeValue> lastKeyEvaluated = null; do { ScanRequest scanRequest = ne

我尝试使用DynamoDB并行扫描示例:

http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/LowLevelJavaScanning.html

我有200000件物品,我进行了顺序代码扫描,并根据我的使用情况对其进行了轻微修改:

Map<String, AttributeValue> lastKeyEvaluated = null;
do
{
    ScanRequest scanRequest = new ScanRequest()
    .withTableName(tableName)
    .withExclusiveStartKey(lastKeyEvaluated);

    ScanResult result = client.scan(scanRequest);


    double counter = 0;
    for(Map<String, AttributeValue> item : result.getItems())
    {
        itemSerialize.add("Set:"+counter);
        for (Map.Entry<String, AttributeValue> getItem : item.entrySet()) 
        {
            String attributeName = getItem.getKey();
            AttributeValue value = getItem.getValue();

            itemSerialize.add(attributeName
                    + (value.getS() == null ? "" : ":" + value.getS())
                    + (value.getN() == null ? "" : ":" + value.getN())
                    + (value.getB() == null ? "" : ":" + value.getB())
                    + (value.getSS() == null ? "" : ":" + value.getSS())
                    + (value.getNS() == null ? "" : ":" + value.getNS())
                    + (value.getBS() == null ? "" : ":" + value.getBS()));
        }
        counter += 1;
    }

    lastKeyEvaluated = result.getLastEvaluatedKey();
}
while(lastKeyEvaluated != null);
但是,每次运行这段代码时,项目的数量都会发生变化(总共变化60000个,每个线程6000个,创建了10个线程)。删除同步也不会改变结果

同步或AmazonAWSAPI是否存在缺陷

谢谢大家

编辑:

新函数调用:

ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();

try
{
    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
    int totalSegments = numberOfThreads;

    for (int segment = 0; segment < totalSegments; segment++) 
    {
        // Runnable task that will only scan one segment
        task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);

        // Execute the task
        Future<ArrayList<String>> future = executor.submit(task);

        list.addAll(future.get());
    }
    shutDownExecutorService(executor);
}
ScanSegmentTask任务=null;
ArrayList=新建ArrayList();
尝试
{
ExecutorService executor=Executors.newFixedThreadPool(线程数);
int totalSegments=线程数;
对于(int段=0;段<总段;段++)
{
//仅扫描一个段的可运行任务
任务=新的扫描分段任务(表名、项限制、总分段、分段);
//执行任务
未来=执行者提交(任务);
list.addAll(future.get());
}
关闭执行人服务(执行人);
}
新类别:

// Runnable task for scanning a single segment of a DynamoDB table
private static class ScanSegmentTask implements Callable<ArrayList<String>>
{

    // DynamoDB table to scan
    private String tableName;

    // number of items each scan request should return
    private int itemLimit;

    // Total number of segments
    // Equals to total number of threads scanning the table in parallel
    private int totalSegments;

    // Segment that will be scanned with by this task
    private int segment;

    ArrayList<String> list_2 = new ArrayList<String>();

    static int counter = 0;

    public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
    {
        this.tableName = tableName;
        this.itemLimit = itemLimit;
        this.totalSegments = totalSegments;
        this.segment = segment;
    }

    @SuppressWarnings("finally")
    public ArrayList<String> call() 
    {
        System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
        Map<String, AttributeValue> exclusiveStartKey = null;

        try 
        {
            while(true) 
            {
                ScanRequest scanRequest = new ScanRequest()
                    .withTableName(tableName)
                    .withLimit(itemLimit)
                    .withExclusiveStartKey(exclusiveStartKey)
                    .withTotalSegments(totalSegments)
                    .withSegment(segment);

                ScanResult result = client.scan(scanRequest);

                for(Map<String, AttributeValue> item : result.getItems())
                {
                    list_2.add("Set:"+counter);
                    for (Map.Entry<String, AttributeValue> getItem : item.entrySet()) 
                    {
                        String attributeName = getItem.getKey();
                        AttributeValue value = getItem.getValue();

                        list_2.add(attributeName
                                + (value.getS() == null ? "" : ":" + value.getS())
                                + (value.getN() == null ? "" : ":" + value.getN())
                                + (value.getB() == null ? "" : ":" + value.getB())
                                + (value.getSS() == null ? "" : ":" + value.getSS())
                                + (value.getNS() == null ? "" : ":" + value.getNS())
                                + (value.getBS() == null ? "" : ":" + value.getBS()));
                    }
                    counter += 1;
                }

                exclusiveStartKey = result.getLastEvaluatedKey();
                if (exclusiveStartKey == null) 
                {
                    break;
                }
            }
        } 
        catch (AmazonServiceException ase) 
        {
            System.err.println(ase.getMessage());
        } 
        finally 
        {
            return list_2;
        }
    }
}
//用于扫描DynamoDB表的单个段的可运行任务
私有静态类ScanSegmentTask实现可调用
{
//要扫描的DynamoDB表
私有字符串表名;
//每个扫描请求应返回的项目数
私人投资限额;
//分段总数
//等于并行扫描表的线程总数
私人部门;
//此任务将使用扫描的段
私有int段;
ArrayList_2=新的ArrayList();
静态整数计数器=0;
公共扫描段任务(字符串表名、int itemLimit、int totalSegments、int段)
{
this.tableName=tableName;
this.itemLimit=itemLimit;
this.totalSegments=totalSegments;
本段=段;
}
@抑制警告(“最终”)
公共ArrayList调用()
{
System.out.println(“一次扫描“+表名+”段“+段+”出段“+总段+”段“+项限制+”项…”);
Map exclusiveStartKey=null;
尝试
{
while(true)
{
ScanRequest ScanRequest=新的ScanRequest()
.withTableName(tableName)
.withLimit(itemLimit)
.使用exclusiveStartKey(exclusiveStartKey)
.withTotalSegments(totalSegments)
.带分段(分段);
ScanResult结果=client.scan(scanRequest);
对于(映射项:result.getItems())
{
列表2.添加(“设置:+计数器”);
对于(Map.Entry getItem:item.entrySet())
{
字符串attributeName=getItem.getKey();
AttributeValue=getItem.getValue();
列表2.添加(属性名称)
+(value.getS()==null?“:”:“+value.getS())
+(value.getN()==null?“:”:“+value.getN())
+(value.getB()==null?“:”:“+value.getB())
+(value.getSS()==null?“:”:“+value.getSS())
+(value.getNS()==null?“:”:“+value.getNS())
+(value.getBS()==null?“:”:“+value.getBS());
}
计数器+=1;
}
exclusiveStartKey=result.getLastEvaluatedKey();
if(exclusiveStartKey==null)
{
打破
}
}
} 
捕获(AmazonServiceException ase)
{
System.err.println(ase.getMessage());
} 
最后
{
返回列表2;
}
}
}
最终编辑:

函数调用:

ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
try
{
    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
    int totalSegments = numberOfThreads;

    for (int segment = 0; segment < totalSegments; segment++) 
    {
        // Runnable task that will only scan one segment
        task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment, list);

        // Execute the task
        executor.execute(task);
    }
    shutDownExecutorService(executor);
}
.......Catches something if error
return list;
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
ArrayList<Future<ArrayList<String>>> holdFuture = new ArrayList<Future<ArrayList<String>>>();

try
{
    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
    int totalSegments = numberOfThreads;

    for (int segment = 0; segment < totalSegments; segment++) 
    {
        // Runnable task that will only scan one segment
        task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);

        // Execute the task
        Future<ArrayList<String>> future = executor.submit(task);
        holdFuture.add(future);
    }

    for (int i = 0 ; i < holdFuture.size(); i++)
    {
        boolean flag = false;
        while(flag == false)
        {
            Thread.sleep(1000);
            if(holdFuture.get(i).isDone())
            {
                list.addAll(holdFuture.get(i).get());
                flag = true;
            }
        }
    }
    shutDownExecutorService(executor);
}
ScanSegmentTask任务=null;
ArrayList=新建ArrayList();
ArrayList holdFuture=新的ArrayList();
尝试
{
ExecutorService executor=Executors.newFixedThreadPool(线程数);
int totalSegments=线程数;
对于(int段=0;段<总段;段++)
{
//仅扫描一个段的可运行任务
任务=新的扫描分段任务(表名、项限制、总分段、分段);
//执行任务
未来=执行者提交(任务);
holdFuture.add(future);
}
for(int i=0;i
类别: 私有静态类ScanSegmentTask实现可调用> {

//要扫描的DynamoDB表
私有字符串表名;
//每个扫描请求应返回的项目数
私人投资限额;
//分段总数
//等于并行扫描表的线程总数
私人部门;
//此任务将使用扫描的段
私有int段;
ArrayList_2=新的ArrayList();
静态AtomicInteger计数器=新的AtomicInteger(0);
公共扫描段任务(字符串表名、int itemLimit、int totalSegments、int段)
{
this.tableName=tableName;
this.itemLimit=itemLimit;
this.totalSegments=totalSegments;
本段=段;
}
@抑制警告(“最终”)
公共ArrayList调用()
{
系统输出打印项次(“扫描”+ta
// Runnable task for scanning a single segment of a DynamoDB table
private static class ScanSegmentTask implements Callable<ArrayList<String>>
{

    // DynamoDB table to scan
    private String tableName;

    // number of items each scan request should return
    private int itemLimit;

    // Total number of segments
    // Equals to total number of threads scanning the table in parallel
    private int totalSegments;

    // Segment that will be scanned with by this task
    private int segment;

    ArrayList<String> list_2 = new ArrayList<String>();

    static int counter = 0;

    public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
    {
        this.tableName = tableName;
        this.itemLimit = itemLimit;
        this.totalSegments = totalSegments;
        this.segment = segment;
    }

    @SuppressWarnings("finally")
    public ArrayList<String> call() 
    {
        System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
        Map<String, AttributeValue> exclusiveStartKey = null;

        try 
        {
            while(true) 
            {
                ScanRequest scanRequest = new ScanRequest()
                    .withTableName(tableName)
                    .withLimit(itemLimit)
                    .withExclusiveStartKey(exclusiveStartKey)
                    .withTotalSegments(totalSegments)
                    .withSegment(segment);

                ScanResult result = client.scan(scanRequest);

                for(Map<String, AttributeValue> item : result.getItems())
                {
                    list_2.add("Set:"+counter);
                    for (Map.Entry<String, AttributeValue> getItem : item.entrySet()) 
                    {
                        String attributeName = getItem.getKey();
                        AttributeValue value = getItem.getValue();

                        list_2.add(attributeName
                                + (value.getS() == null ? "" : ":" + value.getS())
                                + (value.getN() == null ? "" : ":" + value.getN())
                                + (value.getB() == null ? "" : ":" + value.getB())
                                + (value.getSS() == null ? "" : ":" + value.getSS())
                                + (value.getNS() == null ? "" : ":" + value.getNS())
                                + (value.getBS() == null ? "" : ":" + value.getBS()));
                    }
                    counter += 1;
                }

                exclusiveStartKey = result.getLastEvaluatedKey();
                if (exclusiveStartKey == null) 
                {
                    break;
                }
            }
        } 
        catch (AmazonServiceException ase) 
        {
            System.err.println(ase.getMessage());
        } 
        finally 
        {
            return list_2;
        }
    }
}
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
ArrayList<Future<ArrayList<String>>> holdFuture = new ArrayList<Future<ArrayList<String>>>();

try
{
    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
    int totalSegments = numberOfThreads;

    for (int segment = 0; segment < totalSegments; segment++) 
    {
        // Runnable task that will only scan one segment
        task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);

        // Execute the task
        Future<ArrayList<String>> future = executor.submit(task);
        holdFuture.add(future);
    }

    for (int i = 0 ; i < holdFuture.size(); i++)
    {
        boolean flag = false;
        while(flag == false)
        {
            Thread.sleep(1000);
            if(holdFuture.get(i).isDone())
            {
                list.addAll(holdFuture.get(i).get());
                flag = true;
            }
        }
    }
    shutDownExecutorService(executor);
}
    // DynamoDB table to scan
    private String tableName;

    // number of items each scan request should return
    private int itemLimit;

    // Total number of segments
    // Equals to total number of threads scanning the table in parallel
    private int totalSegments;

    // Segment that will be scanned with by this task
    private int segment;

    ArrayList<String> list_2 = new ArrayList<String>();

    static AtomicInteger counter = new AtomicInteger(0);

    public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
    {
        this.tableName = tableName;
        this.itemLimit = itemLimit;
        this.totalSegments = totalSegments;
        this.segment = segment;
    }

    @SuppressWarnings("finally")
    public ArrayList<String> call() 
    {
        System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
        Map<String, AttributeValue> exclusiveStartKey = null;

        try 
        {
            while(true) 
            {
                ScanRequest scanRequest = new ScanRequest()
                    .withTableName(tableName)
                    .withLimit(itemLimit)
                    .withExclusiveStartKey(exclusiveStartKey)
                    .withTotalSegments(totalSegments)
                    .withSegment(segment);

                ScanResult result = client.scan(scanRequest);

                for(Map<String, AttributeValue> item : result.getItems())
                {
                    list_2.add("Set:"+counter);
                    for (Map.Entry<String, AttributeValue> getItem : item.entrySet()) 
                    {
                        String attributeName = getItem.getKey();
                        AttributeValue value = getItem.getValue();

                        list_2.add(attributeName
                                + (value.getS() == null ? "" : ":" + value.getS())
                                + (value.getN() == null ? "" : ":" + value.getN())
                                + (value.getB() == null ? "" : ":" + value.getB())
                                + (value.getSS() == null ? "" : ":" + value.getSS())
                                + (value.getNS() == null ? "" : ":" + value.getNS())
                                + (value.getBS() == null ? "" : ":" + value.getBS()));
                    }
                    counter.addAndGet(1);
                }

                exclusiveStartKey = result.getLastEvaluatedKey();
                if (exclusiveStartKey == null) 
                {
                    break;
                }
            }
        } 
        catch (AmazonServiceException ase) 
        {
            System.err.println(ase.getMessage());
        } 
        finally 
        {
            return list_2;
        }
    }
}