MongoDB:无键数学运算
mongoDB集合中的输入数据如下所示:MongoDB:无键数学运算,mongodb,Mongodb,mongoDB集合中的输入数据如下所示: { "_id" : ObjectId("dummyObjectID"), "data" : [ { "src_ip" : "Source IP address", "dst_ip" : "Destination IP address", }, { "switchId" : 1, "egress_t
{
"_id" : ObjectId("dummyObjectID"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address",
},
{
"switchId" : 1,
"egress_timeStamp" : et1,
"hop_latency" : someValue,
"ingress_timeStamp" : it1
},
{
"switchId" : 2,
"egress_timeStamp" : et2,
"hop_latency" : someValue,
"ingress_timeStamp" : it2
},
{
"switchId" : 3,
"egress_timeStamp" : et3,
"hop_latency" : 1122,
"ingress_timeStamp" : it3
},
{
"switchId" : 4,
"egress_timeStamp" : et4,
"hop_latency" : someValue,
"ingress_timeStamp" : it4
},
{
"switchId" : 5,
"egress_timeStamp" : et5,
"hop_latency" : someValue,
"ingress_timeStamp" : it5
}
],
"time" : dummyTime
}
我的期望值如下:
{
"_id" : ObjectId("dummyObjectID"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address",
},
{
"switchId" : 1,
"egress_timeStamp" : et1,
"link_latency" : 0, # Here 0 because there is no switch before this switch
"hop_latency" : someValue,
"ingress_timeStamp" : it1
},
{
"switchId" : 2,
"egress_timeStamp" : et2,
"link_latency" : it2 - et1,
"hop_latency" : someValue,
"ingress_timeStamp" : it2
},
{
"switchId" : 3,
"egress_timeStamp" : et3,
"link_latency" : it3 - et2,
"hop_latency" : someValue,
"ingress_timeStamp" : it3
},
{
"switchId" : 4,
"egress_timeStamp" : et4,
"link_latency" : it4 - et3,
"hop_latency" : someValue,
"ingress_timeStamp" : it4
},
{
"switchId" : 5,
"egress_timeStamp" : et5,
"link_latency" : it5 - et4,
"hop_latency" : someValue,
"ingress_timeStamp" : it5
}
],
"time" : dummyTime
}
我想计算每个switchID的链路延迟,即['IngressStimestamp of current switch'-'ExgressTimestamp of previous switch']。对于第一个switchID,linkLatency必须为“0”。
我面临的问题是,交换数据没有密钥;因此无法执行此操作
我是mongodb的新手。
我能够使用pyMongo和Python数据帧获得所需的输出,但这需要很多时间。
我认为mongoDB本身必须有一些好的方法来获得所需的输出。无需使用pandas。您可以从当前列表项构建新的列表项;希望这更有效(注意,它不会在第一条记录中创建多余的零条目;如果确实需要,请酌情修改): 数据设置的完整示例:
import pymongo
import datetime
from random import randint
db = pymongo.MongoClient()['mydatabase']
t = [datetime.datetime.utcnow()]
for i in range(1, 10):
t.append(t[i - 1] + datetime.timedelta(milliseconds=randint(100, 1000)))
db.mycollection.insert_one({
"data": [
{
"src_ip": "Source IP address",
"dst_ip": "Destination IP address",
},
{
"switchId": 1,
"egress_timeStamp": t[0],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[1]
},
{
"switchId": 2,
"egress_timeStamp": t[2],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[3],
},
{
"switchId": 3,
"egress_timeStamp": t[4],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[5],
},
{
"switchId": 4,
"egress_timeStamp": t[6],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[7],
},
{
"switchId": 5,
"egress_timeStamp": t[8],
"hop_latency": randint(100, 1000),
"ingress_timeStamp": t[9],
}
],
"time": datetime.datetime.utcnow()
})
for record in db.mycollection.find({}):
prev_et = None
new_data = []
for x in sorted(record['data'], key=lambda o: o.get('switch_id', '')):
if prev_et is not None:
x['link_latency'] = (x['ingress_timeStamp'] - prev_et).total_seconds()
prev_et = x.get('egress_timeStamp', None)
new_data.append(x)
record['data'] = new_data
db.mycollection.replace_one({'_id': record['_id']}, record, upsert=True)
给出:
> db.mycollection.findOne()
{
"_id" : ObjectId("5e06a4bf54e0e497307e43d8"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address"
},
{
"switchId" : 1,
"egress_timeStamp" : ISODate("2019-12-28T00:41:35.615Z"),
"hop_latency" : 949,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:36.303Z")
},
{
"switchId" : 2,
"egress_timeStamp" : ISODate("2019-12-28T00:41:36.955Z"),
"hop_latency" : 953,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:37.162Z"),
"link_latency" : 1.547
},
{
"switchId" : 3,
"egress_timeStamp" : ISODate("2019-12-28T00:41:37.475Z"),
"hop_latency" : 751,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:38.032Z"),
"link_latency" : 1.077
},
{
"switchId" : 4,
"egress_timeStamp" : ISODate("2019-12-28T00:41:38.878Z"),
"hop_latency" : 961,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:39.682Z"),
"link_latency" : 2.207
},
{
"switchId" : 5,
"egress_timeStamp" : ISODate("2019-12-28T00:41:40.096Z"),
"hop_latency" : 728,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:40.561Z"),
"link_latency" : 1.683
}
],
"time" : ISODate("2019-12-28T00:41:35.615Z")
}
您的输入数据不是有效的JSON。请用准确的记录更新你的问题。对不起,我错了。更新了JSON。非常感谢。建议的逻辑对我来说很有效。:)
> db.mycollection.findOne()
{
"_id" : ObjectId("5e06a4bf54e0e497307e43d8"),
"data" : [
{
"src_ip" : "Source IP address",
"dst_ip" : "Destination IP address"
},
{
"switchId" : 1,
"egress_timeStamp" : ISODate("2019-12-28T00:41:35.615Z"),
"hop_latency" : 949,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:36.303Z")
},
{
"switchId" : 2,
"egress_timeStamp" : ISODate("2019-12-28T00:41:36.955Z"),
"hop_latency" : 953,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:37.162Z"),
"link_latency" : 1.547
},
{
"switchId" : 3,
"egress_timeStamp" : ISODate("2019-12-28T00:41:37.475Z"),
"hop_latency" : 751,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:38.032Z"),
"link_latency" : 1.077
},
{
"switchId" : 4,
"egress_timeStamp" : ISODate("2019-12-28T00:41:38.878Z"),
"hop_latency" : 961,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:39.682Z"),
"link_latency" : 2.207
},
{
"switchId" : 5,
"egress_timeStamp" : ISODate("2019-12-28T00:41:40.096Z"),
"hop_latency" : 728,
"ingress_timeStamp" : ISODate("2019-12-28T00:41:40.561Z"),
"link_latency" : 1.683
}
],
"time" : ISODate("2019-12-28T00:41:35.615Z")
}