Javascript MongoDB到Elasticsearch索引
卡在elasticsearch中索引数据收集的点上 下面是我试图索引mongo数据的代码Javascript MongoDB到Elasticsearch索引,javascript,node.js,mongodb,
elasticsearch,search,Javascript,Node.js,Mongodb,
elasticsearch,Search,卡在elasticsearch中索引数据收集的点上 下面是我试图索引mongo数据的代码 const elasticsearch = require('elasticsearch'); // instantiate an Elas var bulk = []; var MongoClient = require('mongodb').MongoClient; var ObjectID = require('mongodb').ObjectID; var mongoDBName = 'myd
const elasticsearch = require('elasticsearch');
// instantiate an Elas
var bulk = [];
var MongoClient = require('mongodb').MongoClient;
var ObjectID = require('mongodb').ObjectID;
var mongoDBName = 'mydb'; // Name of mongodb goes here
var mongoCollectionName = 'mycollection'; // Collection name of mongodb goes here
var connectionString = 'mongodb://127.0.0.1:27017/'; // put username and password for mongo here
var esIndexName = 'new-collection'; // Elasticsearch index name will go here
var bulk = [];
const client = new elasticsearch.Client({
hosts: [ 'http://localhost:9200']
});
// ping the client to be sure Elasticsearch is up
client.ping({
requestTimeout: 30000,
}, function(error) {
// At this point, eastic search is down, please check your Elasticsearch service
if (error) {
console.error('Elasticsearch cluster is down!');
} else {
console.log('Everything is ok');
}
});
MongoClient.connect(connectionString+mongoDBName, function(err, db) {
if(err) throw err;
// for each object in a collection
var collection = db.collection(mongoCollectionName);
var counter = 0;
collection.find().each(function(err, item, response, status) {
console.log(item)
Array.from(item).forEach(itemdata => {
bulk.push({index:{
_index: esIndexName,
_type: mongoCollectionName,
}
})
bulk.push(itemdata)
})
//perform bulk indexing of the data passed
client.bulk({body:bulk}, function( err, response ){
if( err ){
console.log("Failed Bulk operation".red, err)
} else {
console.log("Successfully imported %s".green, mongoCollectionName.length);
}
console.log(response);
});
if(item != null) {
if(counter % 100 == 0) console.log( "Syncing object id: "+ item['_id'] + " #: " + counter);
client.indices.create(
{ index: esIndexName },
function(error, response) {
if (error) {
console.log(error);
} else {
console.log("created a new index", response);
}
}
);
}
counter += 1;
});
});
所以在这里,我试图将数据索引到elasticsearch中,我能够创建集合索引,但未能将数据插入到elasticsearch的索引中。有人能帮我吗?
我错在哪里,我在这里犯了什么错误。
我在这里使用nodejs,只是简单的函数来测试,稍后将添加lambda函数来更新/删除任何更改。首先,我建议整理您的代码;很难看出这些块是如何嵌套的 现在,您的代码有几个问题:
Array.from(item).forEach(itemdata=>{
?item
是来自Mongo的文档对象,因此对其执行Array.from
没有效果中调用批量API。每个回调;意味着您将对每个文档执行API调用。我认为这不是您想要的
ping
调用很好,但是如果集群关闭,它不会阻止其余代码运行n
文档时,调用bulk
API并重置您的主体您可以使用logstash将数据从mongo db导入elasticsearch。请查找附件配置以供参考
input {
mongodb {
codec => “json”
uri => ‘mongodb://localhost:27017/NewDb’
placeholder_db_dir => ‘/home/devbrt.shukla/Desktop/scalaoutput/ELK/logstash-6.4.1/db_dir’
placeholder_db_name => ‘Employee_sqlite.db’
collection => ‘Employee’
batch_size => 5000
generateId => ‘true’
parse_method => “simple”
}
}
filter {
mutate {
remove_field => [ “_id” ]
}
}
output {
elasticsearch {
hosts => [“localhost:9200”]
index => “employee-%{+YYYY.MM.dd}”
}
stdout { codec => rubydebug } }
在Logstash中,我们将分为三个部分:输入、过滤和输出
输入:从sql、mongodb、mysql等获取数据。过滤器:在本节中,我们可以将定制的json框起来索引到elasticsearch中。
输出:在本节中,我们将输入输出节的索引名、文档类型和Ip地址,即elasticsearch。这是您正在寻找的解决方案 index.js
//MongoDB client config
var MongoClient = require('mongodb').MongoClient;
var mongoDBName = 'mydb'; // Name of mongodb goes here
var mongoCollectionName = 'mycollection'; // Collection name of mongodb goes here
var connectionString = 'mongodb://127.0.0.1:27017/'; // put username and password for mongo here
//Elasticsearch client config
const { Client } = require('@elastic/elasticsearch')
const esClient = new Client({ node: 'http://localhost:9200' });
var esIndexName = 'new-collection'; // Elasticsearch index name will go here
let bulk = [];
async function indexData() {
const client = await MongoClient.connect(connectionString, { useNewUrlParser: true })
.catch(err => { console.log(err); });
if (!client) {
return;
}
try {
const db = client.db(mongoDBName);
let collection = db.collection(mongoCollectionName);
await collection.find().forEach((doc) => {
bulk.push({
index: {
_index: esIndexName,
}
})
let { _id, ...data } = doc;
bulk.push(data);
})
console.log(bulk);
await esClient.indices.create({
index: esIndexName,
}, { ignore: [400] })
const { body: bulkResponse } = await esClient.bulk({ refresh: true, body: bulk })
if (bulkResponse.errors) {
const erroredDocuments = []
// The items array has the same order of the dataset we just indexed.
// The presence of the `error` key indicates that the operation
// that we did for the document has failed.
bulkResponse.items.forEach((action, i) => {
const operation = Object.keys(action)[0]
if (action[operation].error) {
erroredDocuments.push({
// If the status is 429 it means that you can retry the document,
// otherwise it's very likely a mapping error, and you should
// fix the document before to try it again.
status: action[operation].status,
error: action[operation].error,
operation: bulk[i * 2],
document: bulk[i * 2 + 1]
})
}
})
console.log(erroredDocuments)
}
const { body: count } = await esClient.count({ index: esIndexName })
console.log(count)
} catch (err) {
console.log(err);
} finally {
client.close();
}
}
indexData();
package.json
{
"name": "elastic-node-mongo",
"version": "1.0.0",
"description": "Simple example to connect ElasticSearch, MongoDB and NodeJS",
"main": "index.js",
"dependencies": {
"@elastic/elasticsearch": "^7.3.0",
"mongodb": "^3.3.2",
"nodemon": "1.18.3"
},
"scripts": {
"dev": "nodemon",
"start": "node index.js"
},
"keywords": [
"nodejs",
"node",
"mongodb",
"elasticsearch",
"docker"
],
"author": "Sathishkumar Rakkiasmy",
"license": "ISC"
}
澄清
我能够创建集合索引,但无法插入数据
在弹性搜索的索引中
上面的句子很有意义。因为批量变量是不变的
请参阅以下链接,了解为什么批量变量未更改
了解有关异步编程的更多信息
有任何错误吗???@Subburaj它没有得到索引,没有错误,但我认为它也要求主体在ES中索引数据。@farhan你有机会研究这个吗?研究这个问题你能帮我吗?