Python平均嵌套dict列表的列表
我有一个结构如下的列表:Python平均嵌套dict列表的列表,python,python-3.x,Python,Python 3.x,我有一个结构如下的列表: data = [[ { "id": 713, "prediction": 4.8, "confidence": [ {"percentile": "75", "lower": 4.8, "upper": 5.7}
data = [[
{
"id": 713,
"prediction": 4.8,
"confidence": [
{"percentile": "75", "lower": 4.8, "upper": 5.7}
],
},
{
"id": 714,
"prediction": 4.93,
"confidence": [
{"percentile": "75", "lower": 4.9, "upper": 5.7}
],
},
],
[
{
"id": 713,
"prediction": 5.8,
"confidence": [
{"percentile": "75", "lower": 4.2, "upper": 6.7}
],
},
{
"id": 714,
"prediction": 2.93,
"confidence": [
{"percentile": "75", "lower": 1.9, "upper": 3.7}
],
},
]]
这里我们有一个包含两个列表的列表,但也可能是更多的列表。每个列表包括一个带有id的预测和另一个带有dict的列表中的置信区间
我需要的是合并这些列表,这样每个id就有一个dict和数值的平均值
我尝试过搜索,但没有找到与此嵌套结构匹配的答案
预期输出如下所示:
merged_data = [
{
"id": 713,
"prediction": 5.3,
"confidence": [
{"percentile": "75", "lower": 4.5, "upper": 6.2}
],
},
{
"id": 714,
"prediction": 3.93,
"confidence": [
{"percentile": "75", "lower": 3.4, "upper": 4.7}
],
},
]
{713:{'prediction':[4.8,5.8],'lower':[4.8,4.2],'upper':[5.7,6.7],'average_prediction':5.3,'average_lower':4.5,'average_upper':6.2},714:{'prediction':[4.936893921359024,2.936893921359024],'lower':[4.9,1.9],'upper':[5.7,3.7]“平均预测”:3.936893921359024,“平均下限”:3.4000000000000004,“平均上限”:4.7}这个问题有三个部分
def walk(avgs, new, n):
"""
Most of this algorithm is just walking the object structure.
We keep any keys, lists, etc the same and only average the
numeric elements.
"""
if isinstance(avgs, dict):
return {k:walk(avgs[k], new[k], n) for k in avgs}
if isinstance(avgs, list):
return [walk(x, y, n) for x,y in zip(avgs, new)]
if isinstance(avgs, float): # integers and whatnot also satisfy this
"""
This is the only place that averaging actually happens.
At the risk of some accumulated errors, this directly
computes the total of the last n+1 items and divides
by n+1.
"""
return (avgs*n+new)/(n+1.)
return avgs
def merge(L):
if not L:
# never happens using the above grouping code
return None
d = L[0]
for n, new in enumerate(L[1:], 1):
d = walks(d, new, n)
return d
averaged = {k:merge(v) for k,v in groups.items()}
您可能只希望对预测之类的某些关键点进行平均。您可以预先对分组对象进行过滤,也可以事后进行过滤(事先进行过滤可能更有效):
为了说明效率,我创建了一系列中间列表,但这些并不是真正必要的。您完全可以应用滚动更新算法并节省一些内存,而不是先分组再聚合
averaged = {}
# `data` is the outer list in your nested structure
for d in (d for L in data for d in L):
key = d['id']
d = {s:d[s] for s in ('prediction', 'confidence')} # any desired transforms
if key not in averaged:
averaged[key] = (d, 1)
else:
agg, n = groups[key]
averaged[key] = (walk(agg, d, n), n+1)
averaged = {k:v[0] for k,v in averaged.items()}
from copy import deepcopy
input = [[
{
"id": 713,
"prediction": 4.8,
"confidence": [
{"percentile": "75", "lower": 4.8, "upper": 5.7}
],
},
{
"id": 714,
"prediction": 4.936893921359024,
"confidence": [
{"percentile": "75", "lower": 4.9, "upper": 5.7}
],
},
],
[
{
"id": 713,
"prediction": 5.8,
"confidence": [
{"percentile": "75", "lower": 4.2, "upper": 6.7}
],
},
{
"id": 714,
"prediction": 2.936893921359024,
"confidence": [
{"percentile": "75", "lower": 1.9, "upper": 3.7}
],
},
]]
final_dict_list = []
processed_id = []
for item in input:
for dict_ele in item:
if dict_ele["id"] in processed_id:
for final_item in final_dict_list:
if final_item['id'] == dict_ele["id"]:
final_item["prediction"] += dict_ele["prediction"]
final_item["confidence"][0]["lower"] += dict_ele["confidence"][0]["lower"]
final_item["confidence"][0]["upper"] += dict_ele["confidence"][0]["upper"]
else:
final_dict = deepcopy(dict_ele)
final_dict_list.append(final_dict)
processed_id.append(dict_ele["id"])
numer_of_items = len(input)
for item in final_dict_list:
item["prediction"] /= numer_of_items
item["confidence"][0]["lower"] /= numer_of_items
item["confidence"][0]["upper"] /= numer_of_items
print(final_dict_list)
输出:
[
{'confidence': [{'upper': 6.2, 'lower': 4.5, 'percentile': '75'}], 'id': 713, 'prediction': 5.3},
{'confidence': [{'upper': 4.7, 'lower': 3.4000000000000004, 'percentile': '75'}], 'id': 714, 'prediction': 3.936893921359024}]
简而言之,如果数据结构的创建有点不同的话,可能会容易得多。嵌套的
for
循环就可以做到这一点。请显示您尝试的代码。请同时发布您希望结果的外观,因为问题描述中不清楚。@VishalSingh Doneth缺少“下限”和“上限”值的平均值。抱歉说得不够清楚。其他两个变量的处理程序相同
averaged = {}
# `data` is the outer list in your nested structure
for d in (d for L in data for d in L):
key = d['id']
d = {s:d[s] for s in ('prediction', 'confidence')} # any desired transforms
if key not in averaged:
averaged[key] = (d, 1)
else:
agg, n = groups[key]
averaged[key] = (walk(agg, d, n), n+1)
averaged = {k:v[0] for k,v in averaged.items()}
def inline_key(d, key):
# not a pure function, but we're lazy, and the original
# values are never used
d['id'] = key
return d
final_result = [inline_key(d, k) for k,d in averaged.items()]
def merge_items(items):
result = {}
if len(items):
result['id'] = items[0]['id']
result['prediction'] = round(sum([item['prediction'] for item in items]) / len(items), 2)
result['confidence'] = []
result['confidence'].append({
'percentile': items[0]['confidence'][0]['percentile'],
'lower': round(sum(item['confidence'][0]['lower'] for item in items) / len(items), 2),
'upper': round(sum(item['confidence'][0]['upper'] for item in items) / len(items), 2),
})
return result
result = []
ids = list(set([el['id'] for item in data for el in item]))
for id in ids:
to_merge = [sub_item for item in data for sub_item in item if sub_item['id'] == id]
result.append(merge_items(to_merge))
print(result)
from copy import deepcopy
input = [[
{
"id": 713,
"prediction": 4.8,
"confidence": [
{"percentile": "75", "lower": 4.8, "upper": 5.7}
],
},
{
"id": 714,
"prediction": 4.936893921359024,
"confidence": [
{"percentile": "75", "lower": 4.9, "upper": 5.7}
],
},
],
[
{
"id": 713,
"prediction": 5.8,
"confidence": [
{"percentile": "75", "lower": 4.2, "upper": 6.7}
],
},
{
"id": 714,
"prediction": 2.936893921359024,
"confidence": [
{"percentile": "75", "lower": 1.9, "upper": 3.7}
],
},
]]
final_dict_list = []
processed_id = []
for item in input:
for dict_ele in item:
if dict_ele["id"] in processed_id:
for final_item in final_dict_list:
if final_item['id'] == dict_ele["id"]:
final_item["prediction"] += dict_ele["prediction"]
final_item["confidence"][0]["lower"] += dict_ele["confidence"][0]["lower"]
final_item["confidence"][0]["upper"] += dict_ele["confidence"][0]["upper"]
else:
final_dict = deepcopy(dict_ele)
final_dict_list.append(final_dict)
processed_id.append(dict_ele["id"])
numer_of_items = len(input)
for item in final_dict_list:
item["prediction"] /= numer_of_items
item["confidence"][0]["lower"] /= numer_of_items
item["confidence"][0]["upper"] /= numer_of_items
print(final_dict_list)
[
{'confidence': [{'upper': 6.2, 'lower': 4.5, 'percentile': '75'}], 'id': 713, 'prediction': 5.3},
{'confidence': [{'upper': 4.7, 'lower': 3.4000000000000004, 'percentile': '75'}], 'id': 714, 'prediction': 3.936893921359024}]