Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/list/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 如何以较少的延迟动态格式化dict嵌套列表_Python_List_Dictionary_Time Complexity_Itertools - Fatal编程技术网

Python 如何以较少的延迟动态格式化dict嵌套列表

Python 如何以较少的延迟动态格式化dict嵌套列表,python,list,dictionary,time-complexity,itertools,Python,List,Dictionary,Time Complexity,Itertools,我需要你的专业知识来简化嵌套字典的格式设置。我有输入信号列表,需要根据分钟精度在u_id和时间戳字段上分组,并转换为各自的输出格式。我已经发布了我尝试过的格式。我需要尽可能快地格式化和处理它,因为这涉及到时间复杂性。非常感谢你的帮助 代码片段 final_output = [] sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8])) data = itertools.groupb

我需要你的专业知识来简化嵌套字典的格式设置。我有输入信号列表,需要根据分钟精度在u_id和时间戳字段上分组,并转换为各自的输出格式。我已经发布了我尝试过的格式。我需要尽可能快地格式化和处理它,因为这涉及到时间复杂性。非常感谢你的帮助

代码片段

final_output = []

sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))

data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))

def format_signals(v):
    result =[]
    for i in v:
        temp_dict = {}
        temp_dict.update({'timestamp_utc': i['start_ts']})
        for data in i['sign']:
            temp_dict.update({data['name'].split('.')[0]: data['val']})
        result.append(temp_dict)
    return result


for k, v in data:

    output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
                'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
    final_output.append(output_format)

print(final_output)
signals = [
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 9},
                    {'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 287, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed.', 'val': 8.2},
                    {'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'}
       ]
   [{
    'ui_id': 287,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
    [{
    'ui_id': 287,
    'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
输入

final_output = []

sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))

data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))

def format_signals(v):
    result =[]
    for i in v:
        temp_dict = {}
        temp_dict.update({'timestamp_utc': i['start_ts']})
        for data in i['sign']:
            temp_dict.update({data['name'].split('.')[0]: data['val']})
        result.append(temp_dict)
    return result


for k, v in data:

    output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
                'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
    final_output.append(output_format)

print(final_output)
signals = [
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 9},
                    {'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 287, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed.', 'val': 8.2},
                    {'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'}
       ]
   [{
    'ui_id': 287,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
    [{
    'ui_id': 287,
    'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
电流输出

final_output = []

sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))

data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))

def format_signals(v):
    result =[]
    for i in v:
        temp_dict = {}
        temp_dict.update({'timestamp_utc': i['start_ts']})
        for data in i['sign']:
            temp_dict.update({data['name'].split('.')[0]: data['val']})
        result.append(temp_dict)
    return result


for k, v in data:

    output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
                'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
    final_output.append(output_format)

print(final_output)
signals = [
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 9},
                    {'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 287, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed.', 'val': 8.2},
                    {'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'}
       ]
   [{
    'ui_id': 287,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
    [{
    'ui_id': 287,
    'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
所需输出

final_output = []

sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))

data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))

def format_signals(v):
    result =[]
    for i in v:
        temp_dict = {}
        temp_dict.update({'timestamp_utc': i['start_ts']})
        for data in i['sign']:
            temp_dict.update({data['name'].split('.')[0]: data['val']})
        result.append(temp_dict)
    return result


for k, v in data:

    output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
                'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
    final_output.append(output_format)

print(final_output)
signals = [
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 9},
                    {'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 287, 'f_id': 331,
        'sign': [{'name': 'speed', 'val': 10},
                    {'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'},
       {'c_id': '1234', 'u_id': 288, 'f_id': 331,
        'sign': [{'name': 'speed.', 'val': 8.2},
                    {'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
        'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
        'msg_cnt': 2, 'window': 'na', 'type': 'na'}
       ]
   [{
    'ui_id': 287,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
    [{
    'ui_id': 287,
    'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440344,
        'speed': 10,
        'pwr': 1417
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
    'data': [{
        'timestamp_utc': 1598440244,
        'speed': 9,
        'pwr': 1415
    }, {
        'timestamp_utc': 1598440243,
        'speed': 10,
        'pwr': 1416
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
    'ui_id': 288,
     'f_id': 311,
    'c_id': 1234,
    'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
    'data': [{
        'timestamp_utc': 1598440345,
        'speed': 8.2,
        'pwr': 925
    }],
    'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]

也许这可以帮助您以更直接的方式编写代码。如果你只需要遍历信号并将它们组织在一个循环中,也许你不需要更重的排序和分组

当您希望根据u_id收集信号时,字典可以方便地为每个u_id获取一个条目。这可以做到这一点,您只需添加创建基于此有组织的信号记录的输出:

organized = {}
      
for s in signals:
  u_id = s['u_id']
  
  entry = organized.get(u_id, None)
  if entry is None:
    entry = []
    organized[u_id] = entry  
  entry.append(s)      

pprint.pprint(organized)
是可执行的,输出粘贴在下面


因此,让我们定义一个简单的函数,它将从分组所需的每个对象键中提取:

def extract(obj):
    return obj['u_id'], obj['f_id'], obj['c_id'], obj['start_ts'] // 60 * 60
注意:为了实现“分钟精度”,我将时间戳除以60以减少秒数,再乘以60以获得有效的时间戳

然后,让我们将对象分组并形成最终列表:

from itertools import groupby
from datetime import datetime
...
final_output = []
for (uid, fid, cid, ts), ss in groupby(sorted(signals, key=extract), extract):
    obj = {
        'ui_id': uid,
        'f_id': fid,
        'c_id': int(cid),
        'minute_utc': datetime.utcfromtimestamp(ts),
        'data': [],
        'processing_timestamp_utc': datetime.utcnow()
    }
    for s in ss:
        obj['data'].append({
            'timestamp_utc': s['start_ts'],
            **{i['name']: i['val'] for i in s['sign']}
        })
    final_output.append(obj)
要以可读形式打印
最终输出
,我们可以使用
pprint

from pprint import pprint
...
pprint(final_output, sort_dicts=False)

不清楚您如何在所需的输出中对项目进行分组。我在输入中没有看到具有相同
u\u id
start\u ts
的项目,但您不知何故将具有相同
u\u id
的两个项目分组到一个对象中,并将第三个项目放入另一个对象中。@OlvinRoght项目应根据u id和时间戳分钟进行分组。如果您看到,u_id 288的159844024、1598440243属于同一分钟您是否分析了时间花在哪里?我可以试着写一个简单的循环,通过输入并根据id存储信号-可能排序是不必要的,而且需要花费很多时间。@basic,
groupby(signals,lambda x:(x['u\u id',x['start\u ts']//60*60))
可以工作。@basic,你没有对你得到的任何答案做出反应,问题解决了吗?他希望信号按
'u\u id'
'f\u id'
'c\u id'
'start\u ts'
的时间戳进行分组,精确到分钟。不过他说他只想按u\u id分组。我不理解时间戳的逻辑,但可能会逐步按照u_ID进行分组,然后处理时间戳,dunnoI已经在评论中问过了。@Anton,谢谢你的回复。