Javascript 为数组的每个元素发出查询

Javascript 为数组的每个元素发出查询,javascript,node.js,mongodb,Javascript,Node.js,Mongodb,我正在查询mondo db,查找一个集合中的URL数组,该集合返回一个数组。然后,我想使用该数组遍历另一个集合,并为上一个查询返回的数组中的每个元素找到匹配的元素。在数组上使用forEach并执行单个查询是否合适? 我的代码是这样的,第一个函数getUrls非常好用。我得到的当前错误是: (节点:10754)未处理的PromisejectionWarning:未处理的承诺拒绝(拒绝id:1):TypeError:无法读取未定义的 (节点:10754)[DEP0018]弃用警告:未处理的承诺拒绝已

我正在查询mondo db,查找一个集合中的URL数组,该集合返回一个数组。然后,我想使用该数组遍历另一个集合,并为上一个查询返回的数组中的每个元素找到匹配的元素。在数组上使用forEach并执行单个查询是否合适? 我的代码是这样的,第一个函数getUrls非常好用。我得到的当前错误是:

(节点:10754)未处理的PromisejectionWarning:未处理的承诺拒绝(拒绝id:1):TypeError:无法读取未定义的 (节点:10754)[DEP0018]弃用警告:未处理的承诺拒绝已弃用。将来,未处理的承诺拒绝将使用非零退出代码终止Node.js进程


如前所述,问题中的代码有几个问题,大多数问题可以通过查看本响应末尾提供的完整示例列表来解决。你在这里要求的是“前N个结果”问题的一个变体,对于这个问题,有两种方法可以“实际”处理这个问题

所以从“最差”到“最佳”的排名:

聚合$slice 因此,与“循环”函数的结果不同,您可以交替地使用all将结果提供给查询。这减轻了“循环输入”的需要,但这里需要的另一件事是“每输出top-N”

MongoDB中确实还没有一个“稳定”的机制来解决这个问题,但是“如果”它在给定集合的大小上是合理的,那么实际上您可以简单地在匹配所提供参数的“不同”键上,然后将所有文档放入一个数组中,并得到以下结果:

let results = await db.collection('circleback').aggregate([
  { "$match": { "email_domain": { "$in": domains } } },
  { "$group": {
    "_id": "$email_domain",
    "docs": { "$push": "$$ROOT" }
  }},
  { "$sort": { "_id": 1 } },
  { "$addFields": { "docs": { "$slice": [ "$docs", 0, 1100 ] } } }
]).toArray();
这里“更广泛”的问题是MongoDB无法“限制”初始位置上的数组内容。事实上,这是一个长期悬而未决的问题

因此,虽然我们可以“在理论上”进行此类操作,但它通常根本不实际,因为16MB BSON限制通常会限制“初始”阵列大小,即使结果确实会保持在该上限之下

串行循环执行异步/等待 您的代码显示您正在这个环境下运行,因此我建议您实际使用它。在源代码的每个循环迭代中,只需等待:

let results = [];
for ( let domain of domains ) {
  results = results.concat(
    await db.collection('circleback').find({ "email_domain": domain })
      .limit(1100).toArray()
  );
}
简单的函数允许您执行此操作,例如通过
.toArray()
.find()
的标准光标结果作为数组返回,然后使用
.concat()
与以前的结果数组合并

它简单有效,但我们可能做得更好一些

异步方法的并发执行 因此,您不必对每个被调用的异步函数使用“循环”和
wait
,而是可以同时执行它们(或至少“大部分”)。事实上,这是您目前遇到的问题的一部分,因为实际上没有什么东西“等待”循环迭代

我们可以使用它来有效地做到这一点,但是,如果实际上有“非常多”的承诺将同时运行,这将遇到与经验相同的问题,超过调用堆栈

为了避免这种情况,我们仍然可以使用蓝鸟承诺。它有一个“并发限制器”选项,只允许指定数量的操作同时执行:

let results = [].concat.apply([],
  await Promise.map(domains, domain => 
    db.collection('circleback').find({ "email_domain": domain })
      .limit(1100).toArray()
  ,{ concurrency: 10 })
);
事实上,您甚至应该能够使用诸如Bluebird promises之类的库将
.map()
功能“插入”到任何其他返回
Promise
的功能,例如返回
域列表的“源”函数。然后您可以像后面的示例中所示那样“链接”

未来MongoDB MongoDB的未来版本(来自MongoDB 3.6)实际上有一种新的“非相关”形式,允许在这里使用特殊情况。因此,回到最初的聚合示例,我们可以获得每个匹配键的“不同”值,然后使用
“pipeline”
参数,该参数将允许对结果应用

let results = await db.collection('circleback').aggregate([
  { "$match": { "email_domain": { "$in": domains } } },
  { "$group": { "_id": "$email_domain"  }},
  { "$sort": { "_id": 1 } },
  { "$lookup": {
     "from": "circleback",
     "let": {
       "domain": "$_id"
     },
     "pipeline": [
       { "$redact": {
         "$cond": {
           "if": { "$eq": [ "$email_domain", "$$domain" ] },
           "then": "$$KEEP",
           "else": "$$PRUNE"
         }
       }},
       { "$limit": 1100 }
     ],
     "as": "docs"
  }}
]).toArray();
这将始终保持在16MB BSON限制之下,当然,假定参数允许这种情况发生

示例列表 作为一个完整的示例清单,您可以运行,并且通常可以使用,因为默认的数据集创建非常大。它演示了上面描述的所有技术以及要遵循的一些一般使用模式

const mongodb = require('mongodb'),
      Promise = require('bluebird'),
      MongoClient = mongodb.MongoClient,
      Logger = mongodb.Logger;

const uri = 'mongodb://localhost/bigpara';

function log(data) {
  console.log(JSON.stringify(data,undefined,2))
}

(async function() {

  let db;

  try {
    db = await MongoClient.connect(uri,{ promiseLibrary: Promise });

    Logger.setLevel('info');
    let source = db.collection('source');
    let data = db.collection('data');

    // Clean collections
    await Promise.all(
      [source,data].map( coll => coll.remove({}) )
    );

    // Create some data to work with

    await source.insertMany(
      Array.apply([],Array(500)).map((e,i) => ({ item: i+1 }))
    );

    let ops = [];
    for (let i=1; i <= 10000; i++) {
      ops.push({
        item: Math.floor(Math.random() * 500) + 1,
        index: i,
        amount: Math.floor(Math.random() * (200 - 100 + 1)) + 100
      });
      if ( i % 1000 === 0 ) {
        await data.insertMany(ops,{ ordered: false });
        ops = [];
      }
    }

    /* Fetch 5 and 5 example
     *
     * Note that the async method to supply to $in is a simulation
     * of any real source that is returning an array
     *
     * Not the best since it means ALL documents go into the array
     * for the selection. Then you $slice off only what you need.
     */
    console.log('\nAggregate $in Example');
    await (async function(source,data) {
      let results = await data.aggregate([
        { "$match": {
          "item": {
            "$in": (await source.find().limit(5).toArray()).map(d => d.item)
          }
        }},
        { "$group": {
          "_id": "$item",
          "docs": { "$push": "$$ROOT" }
        }},
        { "$addFields": {
          "docs": { "$slice": [ "$docs", 0, 5 ] }
        }},
        { "$sort": { "_id": 1 } }
      ]).toArray();
      log(results);
    })(source,data);


    /*
     * Fetch 10 by 2 example
     *
     * Much better usage of concurrent processes and only get's
     * what is needed. But it is actually 1 request per item
     */
    console.log('\nPromise.map concurrency example');
    await (async function(source,data) {
      let results = [].concat.apply([],
        await source.find().limit(10).toArray().map(d =>
          data.find({ item: d.item }).limit(2).toArray()
        ,{ concurrency: 5 })
      );
      log(results);
    })(source,data);

    /*
     * Plain loop async/await serial example
     *
     * Still one request per item, requests are serial
     * and therefore take longer to complete than concurrent
     */
    console.log('\nasync/await serial loop');
    await (async function(source,data) {
      let items = (await source.find().limit(10).toArray());
      let results = [];
      for ( item of items ) {
        results = results.concat(
          await data.find({ item: item.item }).limit(2).toArray()
        );
      }
      log(results);
    })(source,data);


    /*
     * Non-Correlated $lookup example
     *
     * Uses aggregate to get the "distinct" matching results and then does
     * a $lookup operation to retrive the matching documents to the
     * specified $limit
     *
     * Typically not as efficient as the concurrent example, but does
     * actually run completely on the server, and does not require
     * additional connections.
     *
     */

    let version = (await db.db('admin').command({'buildinfo': 1})).version;
    if ( version >= "3.5" ) {
      console.log('\nNon-Correlated $lookup example $limit')
      await (async function(source,data) {
        let items = (await source.find().limit(5).toArray()).map(d => d.item);

        let results = await data.aggregate([
          { "$match": { "item": { "$in": items } } },
          { "$group": { "_id": "$item" } },
          { "$sort": { "_id": 1 } },
          { "$lookup": {
            "from": "data",
            "let": {
              "itemId": "$_id",
            },
            "pipeline": [
              { "$redact": {
                "$cond": {
                  "if": { "$eq": [ "$item", "$$itemId" ] },
                  "then": "$$KEEP",
                  "else": "$$PRUNE"
                }
              }},
              { "$limit": 5 }
            ],
            "as": "docs"
          }}
        ]).toArray();
        log(results);

      })(source,data);
    } else {
      console.log('\nSkipped Non-Correlated $lookup demo');
    }

  } catch(e) {
    console.error(e);
  } finally {
    db.close();
  }

})();
const mongodb=require('mongodb'),
承诺=要求(“蓝鸟”),
MongoClient=mongodb.MongoClient,
Logger=mongodb.Logger;
常量uri=mongodb://localhost/bigpara';
功能日志(数据){
log(JSON.stringify(数据,未定义,2))
}
(异步函数(){
让db;
试一试{
db=await-MongoClient.connect(uri,{promisebrary:Promise});
Logger.setLevel('info');
让source=db.collection('source');
设data=db.collection('data');
//清册
等待承诺(
[源,数据].map(coll=>coll.remove({}))
);
//创建一些要使用的数据
等待source.insertMany(
数组.apply([],数组(500)).map((e,i)=>({item:i+1}))
);
让ops=[];
对于(设i=1;i d.项)
}
}},
{“$组”:{
“_id”:“$item”,
“文档”:{“$push”:“$$ROOT”}
}},
{“$addFields”:{
“文档”:{“$slice”:[“$docs”,0,5]}
}},
{“$sort”:{“\u id”:1}
]).toArray();
日志(结果);
})(来源、数据);
/*
*以2为例取10
*
*更好地使用并发进程和仅get
*需要什么。但事实上,每个项目只有一个请求
*/
log('\n promise.map并发示例');
等待(异步函数(源、数据){
让结果=[].concat.apply([],,
等待source.find().limit(10.toArray().map(d=>
data.find({item:d.item}).limit(2).toArray()
,{并发:5})
);
const mongodb = require('mongodb'),
      Promise = require('bluebird'),
      MongoClient = mongodb.MongoClient,
      Logger = mongodb.Logger;

const uri = 'mongodb://localhost/bigpara';

function log(data) {
  console.log(JSON.stringify(data,undefined,2))
}

(async function() {

  let db;

  try {
    db = await MongoClient.connect(uri,{ promiseLibrary: Promise });

    Logger.setLevel('info');
    let source = db.collection('source');
    let data = db.collection('data');

    // Clean collections
    await Promise.all(
      [source,data].map( coll => coll.remove({}) )
    );

    // Create some data to work with

    await source.insertMany(
      Array.apply([],Array(500)).map((e,i) => ({ item: i+1 }))
    );

    let ops = [];
    for (let i=1; i <= 10000; i++) {
      ops.push({
        item: Math.floor(Math.random() * 500) + 1,
        index: i,
        amount: Math.floor(Math.random() * (200 - 100 + 1)) + 100
      });
      if ( i % 1000 === 0 ) {
        await data.insertMany(ops,{ ordered: false });
        ops = [];
      }
    }

    /* Fetch 5 and 5 example
     *
     * Note that the async method to supply to $in is a simulation
     * of any real source that is returning an array
     *
     * Not the best since it means ALL documents go into the array
     * for the selection. Then you $slice off only what you need.
     */
    console.log('\nAggregate $in Example');
    await (async function(source,data) {
      let results = await data.aggregate([
        { "$match": {
          "item": {
            "$in": (await source.find().limit(5).toArray()).map(d => d.item)
          }
        }},
        { "$group": {
          "_id": "$item",
          "docs": { "$push": "$$ROOT" }
        }},
        { "$addFields": {
          "docs": { "$slice": [ "$docs", 0, 5 ] }
        }},
        { "$sort": { "_id": 1 } }
      ]).toArray();
      log(results);
    })(source,data);


    /*
     * Fetch 10 by 2 example
     *
     * Much better usage of concurrent processes and only get's
     * what is needed. But it is actually 1 request per item
     */
    console.log('\nPromise.map concurrency example');
    await (async function(source,data) {
      let results = [].concat.apply([],
        await source.find().limit(10).toArray().map(d =>
          data.find({ item: d.item }).limit(2).toArray()
        ,{ concurrency: 5 })
      );
      log(results);
    })(source,data);

    /*
     * Plain loop async/await serial example
     *
     * Still one request per item, requests are serial
     * and therefore take longer to complete than concurrent
     */
    console.log('\nasync/await serial loop');
    await (async function(source,data) {
      let items = (await source.find().limit(10).toArray());
      let results = [];
      for ( item of items ) {
        results = results.concat(
          await data.find({ item: item.item }).limit(2).toArray()
        );
      }
      log(results);
    })(source,data);


    /*
     * Non-Correlated $lookup example
     *
     * Uses aggregate to get the "distinct" matching results and then does
     * a $lookup operation to retrive the matching documents to the
     * specified $limit
     *
     * Typically not as efficient as the concurrent example, but does
     * actually run completely on the server, and does not require
     * additional connections.
     *
     */

    let version = (await db.db('admin').command({'buildinfo': 1})).version;
    if ( version >= "3.5" ) {
      console.log('\nNon-Correlated $lookup example $limit')
      await (async function(source,data) {
        let items = (await source.find().limit(5).toArray()).map(d => d.item);

        let results = await data.aggregate([
          { "$match": { "item": { "$in": items } } },
          { "$group": { "_id": "$item" } },
          { "$sort": { "_id": 1 } },
          { "$lookup": {
            "from": "data",
            "let": {
              "itemId": "$_id",
            },
            "pipeline": [
              { "$redact": {
                "$cond": {
                  "if": { "$eq": [ "$item", "$$itemId" ] },
                  "then": "$$KEEP",
                  "else": "$$PRUNE"
                }
              }},
              { "$limit": 5 }
            ],
            "as": "docs"
          }}
        ]).toArray();
        log(results);

      })(source,data);
    } else {
      console.log('\nSkipped Non-Correlated $lookup demo');
    }

  } catch(e) {
    console.error(e);
  } finally {
    db.close();
  }

})();