MongoDB:无键数学运算

MongoDB:无键数学运算,mongodb,Mongodb,mongoDB集合中的输入数据如下所示: { "_id" : ObjectId("dummyObjectID"), "data" : [ { "src_ip" : "Source IP address", "dst_ip" : "Destination IP address", }, { "switchId" : 1, "egress_t

mongoDB集合中的输入数据如下所示:

{
    "_id" : ObjectId("dummyObjectID"),
    "data" : [
        {
            "src_ip" : "Source IP address",
            "dst_ip" : "Destination IP address",
        },
        {
            "switchId" : 1,
            "egress_timeStamp" : et1,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it1
        },
        {
            "switchId" : 2,
            "egress_timeStamp" : et2,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it2
        },
        {
            "switchId" : 3,
            "egress_timeStamp" : et3,
            "hop_latency" : 1122,
            "ingress_timeStamp" : it3
        },
        {
            "switchId" : 4,
            "egress_timeStamp" : et4,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it4
        },
        {
            "switchId" : 5,
            "egress_timeStamp" : et5,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it5
        }
    ],
    "time" : dummyTime
}
我的期望值如下:

{
    "_id" : ObjectId("dummyObjectID"),
    "data" : [
        {
            "src_ip" : "Source IP address",
            "dst_ip" : "Destination IP address",
        },
        {
            "switchId" : 1,
            "egress_timeStamp" : et1,
            "link_latency" : 0, # Here 0 because there is no switch before this switch
            "hop_latency" : someValue,
            "ingress_timeStamp" : it1
        },
        {
            "switchId" : 2,
            "egress_timeStamp" : et2,
            "link_latency" : it2 - et1,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it2
        },
        {
            "switchId" : 3,
            "egress_timeStamp" : et3,
            "link_latency" : it3 - et2,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it3
        },
        {
            "switchId" : 4,
            "egress_timeStamp" : et4,
            "link_latency" : it4 - et3,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it4
        },
        {
            "switchId" : 5,
            "egress_timeStamp" : et5,
            "link_latency" : it5 - et4,
            "hop_latency" : someValue,
            "ingress_timeStamp" : it5
        }
    ],
    "time" : dummyTime
}
我想计算每个switchID的链路延迟,即['IngressStimestamp of current switch'-'ExgressTimestamp of previous switch']。对于第一个switchID,linkLatency必须为“0”。 我面临的问题是,交换数据没有密钥;因此无法执行此操作

我是mongodb的新手。 我能够使用pyMongo和Python数据帧获得所需的输出,但这需要很多时间。
我认为mongoDB本身必须有一些好的方法来获得所需的输出。

无需使用pandas。您可以从当前列表项构建新的列表项;希望这更有效(注意,它不会在第一条记录中创建多余的零条目;如果确实需要,请酌情修改):

数据设置的完整示例:

import pymongo
import datetime
from random import randint

db = pymongo.MongoClient()['mydatabase']

t = [datetime.datetime.utcnow()]

for i in range(1, 10):
    t.append(t[i - 1] + datetime.timedelta(milliseconds=randint(100, 1000)))

db.mycollection.insert_one({
    "data": [
        {
            "src_ip": "Source IP address",
            "dst_ip": "Destination IP address",
        },
        {
            "switchId": 1,
            "egress_timeStamp": t[0],
            "hop_latency": randint(100, 1000),
            "ingress_timeStamp": t[1]
        },
        {
            "switchId": 2,
            "egress_timeStamp": t[2],
            "hop_latency": randint(100, 1000),
            "ingress_timeStamp": t[3],
        },
        {
            "switchId": 3,
            "egress_timeStamp": t[4],
            "hop_latency": randint(100, 1000),
            "ingress_timeStamp": t[5],
        },
        {
            "switchId": 4,
            "egress_timeStamp": t[6],
            "hop_latency": randint(100, 1000),
            "ingress_timeStamp": t[7],
        },
        {
            "switchId": 5,
            "egress_timeStamp": t[8],
            "hop_latency": randint(100, 1000),
            "ingress_timeStamp": t[9],
        }
    ],
    "time": datetime.datetime.utcnow()
})

for record in db.mycollection.find({}):
    prev_et = None
    new_data = []
    for x in sorted(record['data'], key=lambda o: o.get('switch_id', '')):
        if prev_et is not None:
            x['link_latency'] = (x['ingress_timeStamp'] - prev_et).total_seconds()
        prev_et = x.get('egress_timeStamp', None)
        new_data.append(x)
    record['data'] = new_data
    db.mycollection.replace_one({'_id': record['_id']}, record, upsert=True)
给出:

> db.mycollection.findOne()
{
        "_id" : ObjectId("5e06a4bf54e0e497307e43d8"),
        "data" : [
                {
                        "src_ip" : "Source IP address",
                        "dst_ip" : "Destination IP address"
                },
                {
                        "switchId" : 1,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:35.615Z"),
                        "hop_latency" : 949,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:36.303Z")
                },
                {
                        "switchId" : 2,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:36.955Z"),
                        "hop_latency" : 953,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:37.162Z"),
                        "link_latency" : 1.547
                },
                {
                        "switchId" : 3,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:37.475Z"),
                        "hop_latency" : 751,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:38.032Z"),
                        "link_latency" : 1.077
                },
                {
                        "switchId" : 4,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:38.878Z"),
                        "hop_latency" : 961,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:39.682Z"),
                        "link_latency" : 2.207
                },
                {
                        "switchId" : 5,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:40.096Z"),
                        "hop_latency" : 728,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:40.561Z"),
                        "link_latency" : 1.683
                }
        ],
        "time" : ISODate("2019-12-28T00:41:35.615Z")
}

您的输入数据不是有效的JSON。请用准确的记录更新你的问题。对不起,我错了。更新了JSON。非常感谢。建议的逻辑对我来说很有效。:)
> db.mycollection.findOne()
{
        "_id" : ObjectId("5e06a4bf54e0e497307e43d8"),
        "data" : [
                {
                        "src_ip" : "Source IP address",
                        "dst_ip" : "Destination IP address"
                },
                {
                        "switchId" : 1,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:35.615Z"),
                        "hop_latency" : 949,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:36.303Z")
                },
                {
                        "switchId" : 2,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:36.955Z"),
                        "hop_latency" : 953,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:37.162Z"),
                        "link_latency" : 1.547
                },
                {
                        "switchId" : 3,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:37.475Z"),
                        "hop_latency" : 751,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:38.032Z"),
                        "link_latency" : 1.077
                },
                {
                        "switchId" : 4,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:38.878Z"),
                        "hop_latency" : 961,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:39.682Z"),
                        "link_latency" : 2.207
                },
                {
                        "switchId" : 5,
                        "egress_timeStamp" : ISODate("2019-12-28T00:41:40.096Z"),
                        "hop_latency" : 728,
                        "ingress_timeStamp" : ISODate("2019-12-28T00:41:40.561Z"),
                        "link_latency" : 1.683
                }
        ],
        "time" : ISODate("2019-12-28T00:41:35.615Z")
}