DynamoDB并行扫描-Java同步
我尝试使用DynamoDB并行扫描示例:DynamoDB并行扫描-Java同步,java,amazon-dynamodb,Java,Amazon Dynamodb,我尝试使用DynamoDB并行扫描示例: http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/LowLevelJavaScanning.html 我有200000件物品,我进行了顺序代码扫描,并根据我的使用情况对其进行了轻微修改: Map<String, AttributeValue> lastKeyEvaluated = null; do { ScanRequest scanRequest = ne
http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/LowLevelJavaScanning.html
我有200000件物品,我进行了顺序代码扫描,并根据我的使用情况对其进行了轻微修改:
Map<String, AttributeValue> lastKeyEvaluated = null;
do
{
ScanRequest scanRequest = new ScanRequest()
.withTableName(tableName)
.withExclusiveStartKey(lastKeyEvaluated);
ScanResult result = client.scan(scanRequest);
double counter = 0;
for(Map<String, AttributeValue> item : result.getItems())
{
itemSerialize.add("Set:"+counter);
for (Map.Entry<String, AttributeValue> getItem : item.entrySet())
{
String attributeName = getItem.getKey();
AttributeValue value = getItem.getValue();
itemSerialize.add(attributeName
+ (value.getS() == null ? "" : ":" + value.getS())
+ (value.getN() == null ? "" : ":" + value.getN())
+ (value.getB() == null ? "" : ":" + value.getB())
+ (value.getSS() == null ? "" : ":" + value.getSS())
+ (value.getNS() == null ? "" : ":" + value.getNS())
+ (value.getBS() == null ? "" : ":" + value.getBS()));
}
counter += 1;
}
lastKeyEvaluated = result.getLastEvaluatedKey();
}
while(lastKeyEvaluated != null);
但是,每次运行这段代码时,项目的数量都会发生变化(总共变化60000个,每个线程6000个,创建了10个线程)。删除同步也不会改变结果
同步或AmazonAWSAPI是否存在缺陷
谢谢大家
编辑:
新函数调用:
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
try
{
ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
int totalSegments = numberOfThreads;
for (int segment = 0; segment < totalSegments; segment++)
{
// Runnable task that will only scan one segment
task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);
// Execute the task
Future<ArrayList<String>> future = executor.submit(task);
list.addAll(future.get());
}
shutDownExecutorService(executor);
}
ScanSegmentTask任务=null;
ArrayList=新建ArrayList();
尝试
{
ExecutorService executor=Executors.newFixedThreadPool(线程数);
int totalSegments=线程数;
对于(int段=0;段<总段;段++)
{
//仅扫描一个段的可运行任务
任务=新的扫描分段任务(表名、项限制、总分段、分段);
//执行任务
未来=执行者提交(任务);
list.addAll(future.get());
}
关闭执行人服务(执行人);
}
新类别:
// Runnable task for scanning a single segment of a DynamoDB table
private static class ScanSegmentTask implements Callable<ArrayList<String>>
{
// DynamoDB table to scan
private String tableName;
// number of items each scan request should return
private int itemLimit;
// Total number of segments
// Equals to total number of threads scanning the table in parallel
private int totalSegments;
// Segment that will be scanned with by this task
private int segment;
ArrayList<String> list_2 = new ArrayList<String>();
static int counter = 0;
public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
{
this.tableName = tableName;
this.itemLimit = itemLimit;
this.totalSegments = totalSegments;
this.segment = segment;
}
@SuppressWarnings("finally")
public ArrayList<String> call()
{
System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
Map<String, AttributeValue> exclusiveStartKey = null;
try
{
while(true)
{
ScanRequest scanRequest = new ScanRequest()
.withTableName(tableName)
.withLimit(itemLimit)
.withExclusiveStartKey(exclusiveStartKey)
.withTotalSegments(totalSegments)
.withSegment(segment);
ScanResult result = client.scan(scanRequest);
for(Map<String, AttributeValue> item : result.getItems())
{
list_2.add("Set:"+counter);
for (Map.Entry<String, AttributeValue> getItem : item.entrySet())
{
String attributeName = getItem.getKey();
AttributeValue value = getItem.getValue();
list_2.add(attributeName
+ (value.getS() == null ? "" : ":" + value.getS())
+ (value.getN() == null ? "" : ":" + value.getN())
+ (value.getB() == null ? "" : ":" + value.getB())
+ (value.getSS() == null ? "" : ":" + value.getSS())
+ (value.getNS() == null ? "" : ":" + value.getNS())
+ (value.getBS() == null ? "" : ":" + value.getBS()));
}
counter += 1;
}
exclusiveStartKey = result.getLastEvaluatedKey();
if (exclusiveStartKey == null)
{
break;
}
}
}
catch (AmazonServiceException ase)
{
System.err.println(ase.getMessage());
}
finally
{
return list_2;
}
}
}
//用于扫描DynamoDB表的单个段的可运行任务
私有静态类ScanSegmentTask实现可调用
{
//要扫描的DynamoDB表
私有字符串表名;
//每个扫描请求应返回的项目数
私人投资限额;
//分段总数
//等于并行扫描表的线程总数
私人部门;
//此任务将使用扫描的段
私有int段;
ArrayList_2=新的ArrayList();
静态整数计数器=0;
公共扫描段任务(字符串表名、int itemLimit、int totalSegments、int段)
{
this.tableName=tableName;
this.itemLimit=itemLimit;
this.totalSegments=totalSegments;
本段=段;
}
@抑制警告(“最终”)
公共ArrayList调用()
{
System.out.println(“一次扫描“+表名+”段“+段+”出段“+总段+”段“+项限制+”项…”);
Map exclusiveStartKey=null;
尝试
{
while(true)
{
ScanRequest ScanRequest=新的ScanRequest()
.withTableName(tableName)
.withLimit(itemLimit)
.使用exclusiveStartKey(exclusiveStartKey)
.withTotalSegments(totalSegments)
.带分段(分段);
ScanResult结果=client.scan(scanRequest);
对于(映射项:result.getItems())
{
列表2.添加(“设置:+计数器”);
对于(Map.Entry getItem:item.entrySet())
{
字符串attributeName=getItem.getKey();
AttributeValue=getItem.getValue();
列表2.添加(属性名称)
+(value.getS()==null?“:”:“+value.getS())
+(value.getN()==null?“:”:“+value.getN())
+(value.getB()==null?“:”:“+value.getB())
+(value.getSS()==null?“:”:“+value.getSS())
+(value.getNS()==null?“:”:“+value.getNS())
+(value.getBS()==null?“:”:“+value.getBS());
}
计数器+=1;
}
exclusiveStartKey=result.getLastEvaluatedKey();
if(exclusiveStartKey==null)
{
打破
}
}
}
捕获(AmazonServiceException ase)
{
System.err.println(ase.getMessage());
}
最后
{
返回列表2;
}
}
}
最终编辑:
函数调用:
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
try
{
ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
int totalSegments = numberOfThreads;
for (int segment = 0; segment < totalSegments; segment++)
{
// Runnable task that will only scan one segment
task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment, list);
// Execute the task
executor.execute(task);
}
shutDownExecutorService(executor);
}
.......Catches something if error
return list;
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
ArrayList<Future<ArrayList<String>>> holdFuture = new ArrayList<Future<ArrayList<String>>>();
try
{
ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
int totalSegments = numberOfThreads;
for (int segment = 0; segment < totalSegments; segment++)
{
// Runnable task that will only scan one segment
task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);
// Execute the task
Future<ArrayList<String>> future = executor.submit(task);
holdFuture.add(future);
}
for (int i = 0 ; i < holdFuture.size(); i++)
{
boolean flag = false;
while(flag == false)
{
Thread.sleep(1000);
if(holdFuture.get(i).isDone())
{
list.addAll(holdFuture.get(i).get());
flag = true;
}
}
}
shutDownExecutorService(executor);
}
ScanSegmentTask任务=null;
ArrayList=新建ArrayList();
ArrayList holdFuture=新的ArrayList();
尝试
{
ExecutorService executor=Executors.newFixedThreadPool(线程数);
int totalSegments=线程数;
对于(int段=0;段<总段;段++)
{
//仅扫描一个段的可运行任务
任务=新的扫描分段任务(表名、项限制、总分段、分段);
//执行任务
未来=执行者提交(任务);
holdFuture.add(future);
}
for(int i=0;i
类别:
私有静态类ScanSegmentTask实现可调用>
{
//要扫描的DynamoDB表
私有字符串表名;
//每个扫描请求应返回的项目数
私人投资限额;
//分段总数
//等于并行扫描表的线程总数
私人部门;
//此任务将使用扫描的段
私有int段;
ArrayList_2=新的ArrayList();
静态AtomicInteger计数器=新的AtomicInteger(0);
公共扫描段任务(字符串表名、int itemLimit、int totalSegments、int段)
{
this.tableName=tableName;
this.itemLimit=itemLimit;
this.totalSegments=totalSegments;
本段=段;
}
@抑制警告(“最终”)
公共ArrayList调用()
{
系统输出打印项次(“扫描”+ta
// Runnable task for scanning a single segment of a DynamoDB table
private static class ScanSegmentTask implements Callable<ArrayList<String>>
{
// DynamoDB table to scan
private String tableName;
// number of items each scan request should return
private int itemLimit;
// Total number of segments
// Equals to total number of threads scanning the table in parallel
private int totalSegments;
// Segment that will be scanned with by this task
private int segment;
ArrayList<String> list_2 = new ArrayList<String>();
static int counter = 0;
public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
{
this.tableName = tableName;
this.itemLimit = itemLimit;
this.totalSegments = totalSegments;
this.segment = segment;
}
@SuppressWarnings("finally")
public ArrayList<String> call()
{
System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
Map<String, AttributeValue> exclusiveStartKey = null;
try
{
while(true)
{
ScanRequest scanRequest = new ScanRequest()
.withTableName(tableName)
.withLimit(itemLimit)
.withExclusiveStartKey(exclusiveStartKey)
.withTotalSegments(totalSegments)
.withSegment(segment);
ScanResult result = client.scan(scanRequest);
for(Map<String, AttributeValue> item : result.getItems())
{
list_2.add("Set:"+counter);
for (Map.Entry<String, AttributeValue> getItem : item.entrySet())
{
String attributeName = getItem.getKey();
AttributeValue value = getItem.getValue();
list_2.add(attributeName
+ (value.getS() == null ? "" : ":" + value.getS())
+ (value.getN() == null ? "" : ":" + value.getN())
+ (value.getB() == null ? "" : ":" + value.getB())
+ (value.getSS() == null ? "" : ":" + value.getSS())
+ (value.getNS() == null ? "" : ":" + value.getNS())
+ (value.getBS() == null ? "" : ":" + value.getBS()));
}
counter += 1;
}
exclusiveStartKey = result.getLastEvaluatedKey();
if (exclusiveStartKey == null)
{
break;
}
}
}
catch (AmazonServiceException ase)
{
System.err.println(ase.getMessage());
}
finally
{
return list_2;
}
}
}
ScanSegmentTask task = null;
ArrayList<String> list = new ArrayList<String>();
ArrayList<Future<ArrayList<String>>> holdFuture = new ArrayList<Future<ArrayList<String>>>();
try
{
ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
int totalSegments = numberOfThreads;
for (int segment = 0; segment < totalSegments; segment++)
{
// Runnable task that will only scan one segment
task = new ScanSegmentTask(tableName, itemLimit, totalSegments, segment);
// Execute the task
Future<ArrayList<String>> future = executor.submit(task);
holdFuture.add(future);
}
for (int i = 0 ; i < holdFuture.size(); i++)
{
boolean flag = false;
while(flag == false)
{
Thread.sleep(1000);
if(holdFuture.get(i).isDone())
{
list.addAll(holdFuture.get(i).get());
flag = true;
}
}
}
shutDownExecutorService(executor);
}
// DynamoDB table to scan
private String tableName;
// number of items each scan request should return
private int itemLimit;
// Total number of segments
// Equals to total number of threads scanning the table in parallel
private int totalSegments;
// Segment that will be scanned with by this task
private int segment;
ArrayList<String> list_2 = new ArrayList<String>();
static AtomicInteger counter = new AtomicInteger(0);
public ScanSegmentTask(String tableName, int itemLimit, int totalSegments, int segment)
{
this.tableName = tableName;
this.itemLimit = itemLimit;
this.totalSegments = totalSegments;
this.segment = segment;
}
@SuppressWarnings("finally")
public ArrayList<String> call()
{
System.out.println("Scanning " + tableName + " segment " + segment + " out of " + totalSegments + " segments " + itemLimit + " items at a time...");
Map<String, AttributeValue> exclusiveStartKey = null;
try
{
while(true)
{
ScanRequest scanRequest = new ScanRequest()
.withTableName(tableName)
.withLimit(itemLimit)
.withExclusiveStartKey(exclusiveStartKey)
.withTotalSegments(totalSegments)
.withSegment(segment);
ScanResult result = client.scan(scanRequest);
for(Map<String, AttributeValue> item : result.getItems())
{
list_2.add("Set:"+counter);
for (Map.Entry<String, AttributeValue> getItem : item.entrySet())
{
String attributeName = getItem.getKey();
AttributeValue value = getItem.getValue();
list_2.add(attributeName
+ (value.getS() == null ? "" : ":" + value.getS())
+ (value.getN() == null ? "" : ":" + value.getN())
+ (value.getB() == null ? "" : ":" + value.getB())
+ (value.getSS() == null ? "" : ":" + value.getSS())
+ (value.getNS() == null ? "" : ":" + value.getNS())
+ (value.getBS() == null ? "" : ":" + value.getBS()));
}
counter.addAndGet(1);
}
exclusiveStartKey = result.getLastEvaluatedKey();
if (exclusiveStartKey == null)
{
break;
}
}
}
catch (AmazonServiceException ase)
{
System.err.println(ase.getMessage());
}
finally
{
return list_2;
}
}
}