Python 将嵌套JSON解析为数据帧
我试图将JSON字符串解析为最低粒度的panda数据帧 尝试 首先,我试着阅读以下内容:Python 将嵌套JSON解析为数据帧,python,json,pandas,Python,Json,Pandas,我试图将JSON字符串解析为最低粒度的panda数据帧 尝试 首先,我试着阅读以下内容: jsonData = pd.read_json(apiRequest) 但大量数据仍然嵌套在networkRank下 然后我尝试了json_规范化,但是这次我丢失了更高级别的数据,比如纬度和经度 result = json_normalize(json_data['networkRank']) 我还尝试将嵌套结构“解析为”并从头构造数据帧,但这段代码导致错误: result_nested = jso
jsonData = pd.read_json(apiRequest)
但大量数据仍然嵌套在networkRank下
然后我尝试了json_规范化,但是这次我丢失了更高级别的数据,比如纬度和经度
result = json_normalize(json_data['networkRank'])
我还尝试将嵌套结构“解析为”并从头构造数据帧,但这段代码导致错误:
result_nested = json_normalize(json_data, 'networkRank', ['longitude', 'latitude', ['networkRank', 'type3G', 'downloadSpeed']])
目标
将JSON数据解析到一个包含所有字段的平面表中,这意味着图中的每一行数据都附加了纬度、经度和距离数据
JSON字符串
1) 将JSON字符串解析为python结构
2) Iterete在“networkRank”字典列表上搜索,并将要添加的每个键放入哈希中
for data_row in deserialized_json['networkRank']:
data_row['latitude'] = deserialized_json['latitude']
# etc
(三)
1) 将JSON字符串解析为python结构
2) Iterete在“networkRank”字典列表上搜索,并将要添加的每个键放入哈希中
for data_row in deserialized_json['networkRank']:
data_row['latitude'] = deserialized_json['latitude']
# etc
(三)
此函数递归地调用自身以展平字典和列表
from collections import OrderedDict
def flatten(json_object, container=None, name=''):
if container is None:
container = OrderedDict()
if isinstance(json_object, dict):
for key in json_object:
flatten(json_object[key], container=container, name=name + key + '_')
elif isinstance(json_object, list):
for n, item in enumerate(json_object, 1):
flatten(item, container=container, name=name + str(n) + '_')
else:
container[str(name[:-1])] = str(json_object)
return container
示例:
flatten([1, 2, 3])
OrderedDict([('1', '1'), ('2', '2'), ('3', '3')])
flatten([1, 2, 3], name='x')
OrderedDict([('x1', '1'), ('x2', '2'), ('x3', '3')])
flatten({'a': [1, 2, 3], 'b': 4, 'c': {'d': [5, 6], 'e': 7}}, name='x')
OrderedDict([('xa_1', '1'),
('xa_2', '2'),
('xa_3', '3'),
('xc_e', '7'),
('xc_d_1', '5'),
('xc_d_2', '6'),
('xb', '4')])
答复:
# j = json string
>>> pd.DataFrame(flatten(j), index=[0]).T
0
perMinuteLimit 10
distance 10
perMonthCurrent 0
longitude 35.751607
perMonthLimit 2000
latitude -6.162959
perMinuteCurrent 0
networkRank_1_networkId 6402
networkRank_1_type3G_sampleSizeSpeed 29
networkRank_1_type3G_averageRssiAsu 9.5429091136
networkRank_1_type3G_pingTime 320.9600
networkRank_1_type3G_networkType 3
networkRank_1_type3G_averageRssiDb -69.5664329624972
networkRank_1_type3G_networkName Vodacom
networkRank_1_type3G_networkId 6402
networkRank_1_type3G_downloadSpeed 1508.1304
networkRank_1_type3G_uploadSpeed 893.7692
networkRank_1_type3G_reliability 0.804236452826138
networkRank_1_type3G_sampleSizeRSSI 948
networkRank_1_networkName Vodacom
networkRank_2_networkId 6400
networkRank_2_type3G_sampleSizeSpeed 21
networkRank_2_type3G_averageRssiAsu 15.3537142857
networkRank_2_type3G_pingTime 259.0000
networkRank_2_type3G_networkType 3
networkRank_2_type3G_averageRssiDb -61.4563389583101
networkRank_2_type3G_networkName tiGO
networkRank_2_type3G_networkId 6400
networkRank_2_type3G_downloadSpeed 516.0000
networkRank_2_type3G_uploadSpeed 320.4211
networkRank_2_type3G_reliability 0.911904765537807
networkRank_2_type3G_sampleSizeRSSI 935
networkRank_2_networkName tiGO
networkRank_3_networkId 6403
networkRank_3_type3G_sampleSizeSpeed 21
networkRank_3_type3G_averageRssiAsu 13.2729999375
networkRank_3_type3G_pingTime 194.5556
networkRank_3_type3G_networkType 3
networkRank_3_type3G_averageRssiDb -58.1521092977699
networkRank_3_type3G_networkName Airtel
networkRank_3_type3G_networkId 6403
networkRank_3_type3G_downloadSpeed 1080.2500
networkRank_3_type3G_uploadSpeed 572.1579
networkRank_3_type3G_reliability 0.554680264185345
networkRank_3_type3G_sampleSizeRSSI 587
networkRank_3_networkName Airtel
network_type None
apiVersion 2
此函数递归地调用自身以展平字典和列表
from collections import OrderedDict
def flatten(json_object, container=None, name=''):
if container is None:
container = OrderedDict()
if isinstance(json_object, dict):
for key in json_object:
flatten(json_object[key], container=container, name=name + key + '_')
elif isinstance(json_object, list):
for n, item in enumerate(json_object, 1):
flatten(item, container=container, name=name + str(n) + '_')
else:
container[str(name[:-1])] = str(json_object)
return container
示例:
flatten([1, 2, 3])
OrderedDict([('1', '1'), ('2', '2'), ('3', '3')])
flatten([1, 2, 3], name='x')
OrderedDict([('x1', '1'), ('x2', '2'), ('x3', '3')])
flatten({'a': [1, 2, 3], 'b': 4, 'c': {'d': [5, 6], 'e': 7}}, name='x')
OrderedDict([('xa_1', '1'),
('xa_2', '2'),
('xa_3', '3'),
('xc_e', '7'),
('xc_d_1', '5'),
('xc_d_2', '6'),
('xb', '4')])
答复:
# j = json string
>>> pd.DataFrame(flatten(j), index=[0]).T
0
perMinuteLimit 10
distance 10
perMonthCurrent 0
longitude 35.751607
perMonthLimit 2000
latitude -6.162959
perMinuteCurrent 0
networkRank_1_networkId 6402
networkRank_1_type3G_sampleSizeSpeed 29
networkRank_1_type3G_averageRssiAsu 9.5429091136
networkRank_1_type3G_pingTime 320.9600
networkRank_1_type3G_networkType 3
networkRank_1_type3G_averageRssiDb -69.5664329624972
networkRank_1_type3G_networkName Vodacom
networkRank_1_type3G_networkId 6402
networkRank_1_type3G_downloadSpeed 1508.1304
networkRank_1_type3G_uploadSpeed 893.7692
networkRank_1_type3G_reliability 0.804236452826138
networkRank_1_type3G_sampleSizeRSSI 948
networkRank_1_networkName Vodacom
networkRank_2_networkId 6400
networkRank_2_type3G_sampleSizeSpeed 21
networkRank_2_type3G_averageRssiAsu 15.3537142857
networkRank_2_type3G_pingTime 259.0000
networkRank_2_type3G_networkType 3
networkRank_2_type3G_averageRssiDb -61.4563389583101
networkRank_2_type3G_networkName tiGO
networkRank_2_type3G_networkId 6400
networkRank_2_type3G_downloadSpeed 516.0000
networkRank_2_type3G_uploadSpeed 320.4211
networkRank_2_type3G_reliability 0.911904765537807
networkRank_2_type3G_sampleSizeRSSI 935
networkRank_2_networkName tiGO
networkRank_3_networkId 6403
networkRank_3_type3G_sampleSizeSpeed 21
networkRank_3_type3G_averageRssiAsu 13.2729999375
networkRank_3_type3G_pingTime 194.5556
networkRank_3_type3G_networkType 3
networkRank_3_type3G_averageRssiDb -58.1521092977699
networkRank_3_type3G_networkName Airtel
networkRank_3_type3G_networkId 6403
networkRank_3_type3G_downloadSpeed 1080.2500
networkRank_3_type3G_uploadSpeed 572.1579
networkRank_3_type3G_reliability 0.554680264185345
networkRank_3_type3G_sampleSizeRSSI 587
networkRank_3_networkName Airtel
network_type None
apiVersion 2
这就是你想要的吗
In [22]: df = json_normalize(json_data['networkRank'])
In [23]: df['distance'] = json_data['distance']
In [24]: df['latitude'] = json_data['latitude']
In [25]: df['longitude'] = json_data['longitude']
In [26]: df
Out[26]:
networkId networkName type3G.averageRssiAsu type3G.averageRssiDb \
0 6402 Vodacom 9.5429091136 -69.5664329624972
1 6400 tiGO 15.3537142857 -61.4563389583101
2 6403 Airtel 13.2729999375 -58.1521092977699
type3G.downloadSpeed type3G.networkId type3G.networkName type3G.networkType \
0 1508.1304 6402 Vodacom 3
1 516.0000 6400 tiGO 3
2 1080.2500 6403 Airtel 3
type3G.pingTime type3G.reliability type3G.sampleSizeRSSI \
0 320.9600 0.804236452826138 948
1 259.0000 0.911904765537807 935
2 194.5556 0.554680264185345 587
type3G.sampleSizeSpeed type3G.uploadSpeed distance latitude longitude
0 29 893.7692 10 -6.162959 35.751607
1 21 320.4211 10 -6.162959 35.751607
2 21 572.1579 10 -6.162959 35.751607
这就是你想要的吗
In [22]: df = json_normalize(json_data['networkRank'])
In [23]: df['distance'] = json_data['distance']
In [24]: df['latitude'] = json_data['latitude']
In [25]: df['longitude'] = json_data['longitude']
In [26]: df
Out[26]:
networkId networkName type3G.averageRssiAsu type3G.averageRssiDb \
0 6402 Vodacom 9.5429091136 -69.5664329624972
1 6400 tiGO 15.3537142857 -61.4563389583101
2 6403 Airtel 13.2729999375 -58.1521092977699
type3G.downloadSpeed type3G.networkId type3G.networkName type3G.networkType \
0 1508.1304 6402 Vodacom 3
1 516.0000 6400 tiGO 3
2 1080.2500 6403 Airtel 3
type3G.pingTime type3G.reliability type3G.sampleSizeRSSI \
0 320.9600 0.804236452826138 948
1 259.0000 0.911904765537807 935
2 194.5556 0.554680264185345 587
type3G.sampleSizeSpeed type3G.uploadSpeed distance latitude longitude
0 29 893.7692 10 -6.162959 35.751607
1 21 320.4211 10 -6.162959 35.751607
2 21 572.1579 10 -6.162959 35.751607