Amazon web services AWS CloudSearch导出/下载数据
我在AWS CloudSearch索引中有大约150万个文档。这花费了我太多,我希望迁移出服务。我无法查看如何从索引下载或导出文档。有可能吗?亚马逊(仍然)没有提供从Cloudsearch域导出所有数据的方法,但是,自己编写一个实用程序来实现这一点并不困难。出于类似的需要,我不得不浏览整个Cloudsearch域(超过10000个限制)来生成一个文件 我使用了一个nodeJS脚本来处理这个问题,如下所示:Amazon web services AWS CloudSearch导出/下载数据,amazon-web-services,amazon-cloudsearch,Amazon Web Services,Amazon Cloudsearch,我在AWS CloudSearch索引中有大约150万个文档。这花费了我太多,我希望迁移出服务。我无法查看如何从索引下载或导出文档。有可能吗?亚马逊(仍然)没有提供从Cloudsearch域导出所有数据的方法,但是,自己编写一个实用程序来实现这一点并不困难。出于类似的需要,我不得不浏览整个Cloudsearch域(超过10000个限制)来生成一个文件 我使用了一个nodeJS脚本来处理这个问题,如下所示: var AWS = require('aws-sdk'); var fs = requir
var AWS = require('aws-sdk');
var fs = require('fs');
AWS.config.update({
accessKeyId: '<yourAccessKey>', secretAccessKey: '<yourSecretAccessKey>',
region: '<yourRegion>',endpoint: '<YourSearchDomainEndPoint>'
});
var batchSize = 5000; //Number of item on every search... Max:10000
var compteur = 0;
var result = [];
var params = {query:""};
var cloudsearchdomain = new AWS.CloudSearchDomain(params);
function launchSearch(theContext) {
process.stdout.write('Launch AWS.CloudSearch ');
if (theContext==null) {
process.stdout.write('initial request ... ');
} else {
var current = (theContext.start/batchSize) +2 ;
var totalRun = (Math.ceil(theContext.found/batchSize * 10) / 10) + 1;
process.stdout.write('( ' + current + ' / ' + totalRun + ' ) ... ');
}
params = {
query:"-aQueryStringImpossibleToFind",
cursor: (theContext==null)?"initial":theContext.cursor,
size:batchSize
};
var forCursor = new AWS.CloudSearchDomain(params);
forCursor.search(params, function(err, data) {
if (err) {
console.log("Failed with params :" );
console.log(err);
} else {
resultMessage = data;
compteur = compteur + data.hits.hit.length;
for(var i=0;i<data.hits.hit.length;i++){
result.push(data.hits.hit[i]
});
}
}
process.stdout.write(resultMessage.hits.hit.length + ' hits found.');
if (resultMessage.hits.hit.length==0) {
process.stdout.write(' Done.\n\nLet\'s create thte file...\n');
writeTheFile(result);
} else {
process.stdout.write('\n');
var myContext = {};
myContext.cursor = resultMessage.hits.cursor;
myContext.start = resultMessage.hits.start;
myContext.found = resultMessage.hits.found;
myContext.retrived = resultMessage.hits.hit.length;
launchSearch(myContext);
}
});
}
function writeTheFile(myResult) {
fs.writeFile(process.argv[2], JSON.stringify(myResult), function(err) {
if(err) {
return console.log(err);
}
});
process.stdout.write("DONE : File '"+ process.argv[2] + "' generated ( " + compteur + " elements ).\n");
}
/*Check parameters*/
if (!process.argv[2]) {
//console.log(process.argv);
process.stdout.write('ERROR : the output filename is expected as argumment.\n');
process.exit();
} else {
launchSearch();
}
var AWS=require('AWS-sdk');
var fs=需要('fs');
AWS.config.update({
accessKeyId:“”,secretAccessKey:“”,
区域:“”,终结点:“”
});
var batchSize=5000//每次搜索的项目数。。。最多:10000
var compteur=0;
var结果=[];
var params={query::};
var cloudsearchdomain=new AWS.cloudsearchdomain(params);
函数启动搜索(上下文){
process.stdout.write('Launch AWS.CloudSearch');
if(上下文==null){
process.stdout.write('initialrequest…');
}否则{
var current=(context.start/batchSize)+2;
var totalRun=(Math.ceil(context.found/batchSize*10)/10)+1;
process.stdout.write('('+current+'/'+totalRun+')…);
}
参数={
查询:“-aQueryStringImpossibleToFind”,
游标:(theContext==null)?“initial”:theContext.cursor,
大小:批量大小
};
var forCursor=new AWS.CloudSearchDomain(params);
搜索(参数,函数(错误,数据){
如果(错误){
log(“使用参数失败:”);
控制台日志(err);
}否则{
结果消息=数据;
compteur=compteur+data.hits.hit.length;
对于(var i=0;i来说,我刚刚修正了两件事,@Nek的反应得到了充分肯定
先决条件,节点+aws sdk插件
export-all.js
请注意,为了使用return:“\u all_fields”
param获得完整转储,此字段必须在架构的索引选项中启用标志return
var AWS = require('aws-sdk');
var fs = require('fs');
AWS.config.update({
accessKeyId: 'xx',
secretAccessKey: 'xx',
region: 'xx',
endpoint: 'xxx'
});
var batchSize = 10000;
var compteur = 0;
var result = [];
var resultMessage = [];
var params = {query:""};
var cloudsearchdomain = new AWS.CloudSearchDomain(params);
function launchSearch(theContext) {
process.stdout.write('Launch AWS.CloudSearch ');
if (theContext==null) {
process.stdout.write('initial request ... ');
} else {
var current = (theContext.start/batchSize) +2 ;
var totalRun = (Math.ceil(theContext.found/batchSize * 10) / 10) + 1;
process.stdout.write('( ' + current + ' / ' + totalRun + ' ) ... ');
}
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/CloudSearchDomain.html#search-property
params = {
query:"matchall",
cursor: (theContext==null)?"initial":theContext.cursor,
size:batchSize,
queryParser: "structured",
return: "_all_fields"
};
var forCursor = new AWS.CloudSearchDomain(params);
forCursor.search(params, function(err, data) {
if (err) {
console.log("Failed with params :" );
console.log(err);
} else {
resultMessage = data;
compteur = compteur + data.hits.hit.length;
for(var i=0;i<data.hits.hit.length;i++){
result.push(data.hits.hit[i]);
};
}
process.stdout.write(resultMessage.hits.hit.length + ' hits found.');
if (resultMessage.hits.hit.length==0) {
process.stdout.write(' Done.\n\nLet\'s create thte file...\n');
writeTheFile(result);
} else {
process.stdout.write('\n');
var myContext = {};
myContext.cursor = resultMessage.hits.cursor;
myContext.start = resultMessage.hits.start;
myContext.found = resultMessage.hits.found;
myContext.retrived = resultMessage.hits.hit.length;
launchSearch(myContext);
}
});
}
function writeTheFile(myResult) {
fs.writeFile(process.argv[2], JSON.stringify(myResult), function(err) {
if(err) {
return console.log(err);
}
});
process.stdout.write("DONE : File '"+ process.argv[2] + "' generated ( " + compteur + " elements ).\n");
}
/*Check parameters*/
if (!process.argv[2]) {
//console.log(process.argv);
process.stdout.write('ERROR : the output filename is expected as argument.\n');
process.exit();
} else {
launchSearch();
}
这是什么版本的API?
var AWS = require('aws-sdk');
var fs = require('fs');
AWS.config.update({
accessKeyId: 'xx',
secretAccessKey: 'xx',
region: 'xx',
endpoint: 'xxx'
});
var batchSize = 10000;
var compteur = 0;
var result = [];
var resultMessage = [];
var params = {query:""};
var cloudsearchdomain = new AWS.CloudSearchDomain(params);
function launchSearch(theContext) {
process.stdout.write('Launch AWS.CloudSearch ');
if (theContext==null) {
process.stdout.write('initial request ... ');
} else {
var current = (theContext.start/batchSize) +2 ;
var totalRun = (Math.ceil(theContext.found/batchSize * 10) / 10) + 1;
process.stdout.write('( ' + current + ' / ' + totalRun + ' ) ... ');
}
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/CloudSearchDomain.html#search-property
params = {
query:"matchall",
cursor: (theContext==null)?"initial":theContext.cursor,
size:batchSize,
queryParser: "structured",
return: "_all_fields"
};
var forCursor = new AWS.CloudSearchDomain(params);
forCursor.search(params, function(err, data) {
if (err) {
console.log("Failed with params :" );
console.log(err);
} else {
resultMessage = data;
compteur = compteur + data.hits.hit.length;
for(var i=0;i<data.hits.hit.length;i++){
result.push(data.hits.hit[i]);
};
}
process.stdout.write(resultMessage.hits.hit.length + ' hits found.');
if (resultMessage.hits.hit.length==0) {
process.stdout.write(' Done.\n\nLet\'s create thte file...\n');
writeTheFile(result);
} else {
process.stdout.write('\n');
var myContext = {};
myContext.cursor = resultMessage.hits.cursor;
myContext.start = resultMessage.hits.start;
myContext.found = resultMessage.hits.found;
myContext.retrived = resultMessage.hits.hit.length;
launchSearch(myContext);
}
});
}
function writeTheFile(myResult) {
fs.writeFile(process.argv[2], JSON.stringify(myResult), function(err) {
if(err) {
return console.log(err);
}
});
process.stdout.write("DONE : File '"+ process.argv[2] + "' generated ( " + compteur + " elements ).\n");
}
/*Check parameters*/
if (!process.argv[2]) {
//console.log(process.argv);
process.stdout.write('ERROR : the output filename is expected as argument.\n');
process.exit();
} else {
launchSearch();
}
$ node export-all.js all-data.json