Python 映射和组合多个json文件
我有两个包含多个json文件的文件夹 第一个文件夹是Python 映射和组合多个json文件,python,json,dictionary,directory,Python,Json,Dictionary,Directory,我有两个包含多个json文件的文件夹 第一个文件夹是/Users/aus10/Desktop/MLB_Data/Clean_Team_Data 第二个文件夹是/Users/aus10/Desktop/MLB_Data/Slate_Logs 第一个文件夹中有30个类似这样的json文件 { "Team": "ARI", "Games": [ { "Date": "2019-03-2
/Users/aus10/Desktop/MLB_Data/Clean_Team_Data
第二个文件夹是/Users/aus10/Desktop/MLB_Data/Slate_Logs
第一个文件夹中有30个类似这样的json文件
{
"Team": "ARI",
"Games": [
{
"Date": "2019-03-28",
"Opponent": "@ LA Dodgers",
"Results": "L",
"Score": "12-5",
"Line": 150,
"Over_Under": "O",
"Total": 7,
"Opponent_Score": 12,
"Team_Score": 5,
"Total_Score": 17,
"Home_Away": "A",
"players": []
},
{
"Date": "2019-03-29",
"Opponent": "@ LA Dodgers",
"Results": "W",
"Score": "5-4",
"Line": 155,
"Over_Under": "O",
"Total": 7,
"Team_Score": 5,
"Opponent_Score": 4,
"Total_Score": 9,
"Home_Away": "A",
"players": []
}]
[
{
"StatID": 2593242,
"TeamID": 4,
"PlayerID": 10002075,
"SeasonType": 1,
"Season": 2019,
"Name": "Colin Moran",
"Team": "PIT",
"Position": "3B",
"PositionCategory": "IF",
"Started": 1,
"InjuryStatus": null,
"GameID": 54207,
"OpponentID": 31,
"Opponent": "STL",
"Day": "2019-04-01T00:00:00",
"DateTime": "2019-04-01T13:05:00",
"HomeOrAway": "HOME",
"Games": 1,
"FantasyPoints": 12,
"AtBats": 3,
"Runs": 1,
"Hits": 2,
"Singles": 0,
"Doubles": 1,
"Triples": 0,
"HomeRuns": 1,
"RunsBattedIn": 3,
"BattingAverage": 0.667,
"Outs": 1,
"Strikeouts": 0,
"Walks": 2,
"HitByPitch": 0,
"Sacrifices": 0,
"SacrificeFlies": 0,
"GroundIntoDoublePlay": 0,
"StolenBases": 0,
"CaughtStealing": 0,
"OnBasePercentage": 0.8,
"SluggingPercentage": 2,
"OnBasePlusSlugging": 2.8,
"Wins": 0,
"Losses": 0,
"Saves": 0,
"InningsPitchedDecimal": 0,
"TotalOutsPitched": 0,
"InningsPitchedFull": 0,
"InningsPitchedOuts": 0,
"EarnedRunAverage": 0,
"PitchingHits": 0,
"PitchingRuns": 0,
"PitchingEarnedRuns": 0,
"PitchingWalks": 0,
"PitchingStrikeouts": 0,
"PitchingHomeRuns": 0,
"PitchesThrown": 0,
"PitchesThrownStrikes": 0,
"WalksHitsPerInningsPitched": 0,
"PitchingBattingAverageAgainst": 0,
"FantasyPointsFanDuel": 37.7,
"FantasyPointsDraftKings": 27,
"WeightedOnBasePercentage": 0.8,
"PitchingCompleteGames": 0,
"PitchingShutOuts": 0,
"PitchingOnBasePercentage": 0,
"PitchingSluggingPercentage": 0,
"PitchingOnBasePlusSlugging": 0,
"PitchingStrikeoutsPerNineInnings": 0,
"PitchingWalksPerNineInnings": 0,
"PitchingWeightedOnBasePercentage": 0
}]
第二个文件夹包含218个类似这样的json文件
{
"Team": "ARI",
"Games": [
{
"Date": "2019-03-28",
"Opponent": "@ LA Dodgers",
"Results": "L",
"Score": "12-5",
"Line": 150,
"Over_Under": "O",
"Total": 7,
"Opponent_Score": 12,
"Team_Score": 5,
"Total_Score": 17,
"Home_Away": "A",
"players": []
},
{
"Date": "2019-03-29",
"Opponent": "@ LA Dodgers",
"Results": "W",
"Score": "5-4",
"Line": 155,
"Over_Under": "O",
"Total": 7,
"Team_Score": 5,
"Opponent_Score": 4,
"Total_Score": 9,
"Home_Away": "A",
"players": []
}]
[
{
"StatID": 2593242,
"TeamID": 4,
"PlayerID": 10002075,
"SeasonType": 1,
"Season": 2019,
"Name": "Colin Moran",
"Team": "PIT",
"Position": "3B",
"PositionCategory": "IF",
"Started": 1,
"InjuryStatus": null,
"GameID": 54207,
"OpponentID": 31,
"Opponent": "STL",
"Day": "2019-04-01T00:00:00",
"DateTime": "2019-04-01T13:05:00",
"HomeOrAway": "HOME",
"Games": 1,
"FantasyPoints": 12,
"AtBats": 3,
"Runs": 1,
"Hits": 2,
"Singles": 0,
"Doubles": 1,
"Triples": 0,
"HomeRuns": 1,
"RunsBattedIn": 3,
"BattingAverage": 0.667,
"Outs": 1,
"Strikeouts": 0,
"Walks": 2,
"HitByPitch": 0,
"Sacrifices": 0,
"SacrificeFlies": 0,
"GroundIntoDoublePlay": 0,
"StolenBases": 0,
"CaughtStealing": 0,
"OnBasePercentage": 0.8,
"SluggingPercentage": 2,
"OnBasePlusSlugging": 2.8,
"Wins": 0,
"Losses": 0,
"Saves": 0,
"InningsPitchedDecimal": 0,
"TotalOutsPitched": 0,
"InningsPitchedFull": 0,
"InningsPitchedOuts": 0,
"EarnedRunAverage": 0,
"PitchingHits": 0,
"PitchingRuns": 0,
"PitchingEarnedRuns": 0,
"PitchingWalks": 0,
"PitchingStrikeouts": 0,
"PitchingHomeRuns": 0,
"PitchesThrown": 0,
"PitchesThrownStrikes": 0,
"WalksHitsPerInningsPitched": 0,
"PitchingBattingAverageAgainst": 0,
"FantasyPointsFanDuel": 37.7,
"FantasyPointsDraftKings": 27,
"WeightedOnBasePercentage": 0.8,
"PitchingCompleteGames": 0,
"PitchingShutOuts": 0,
"PitchingOnBasePercentage": 0,
"PitchingSluggingPercentage": 0,
"PitchingOnBasePlusSlugging": 0,
"PitchingStrikeoutsPerNineInnings": 0,
"PitchingWalksPerNineInnings": 0,
"PitchingWeightedOnBasePercentage": 0
}]
我需要迭代第一个文件夹中的每个文件,如果第一个对象中的日期和团队与第二个文件夹中任何dict
文件中的日期和“团队”匹配,我想将该dict
附加到第一个文件夹中的玩家列表中dict
等等,直到我浏览完第一个文件夹中的每个文件。我使用了一个嵌套的for循环,它只匹配一个日期2019-08-18
我不知道为什么。我知道这不是最有效的解决方案,所以请随意提出一种更有效的方法
这是密码
import json
import pandas as pd
import os
path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
for file in Game_logs_json_files:
for file_1 in FPTS_json_files:
with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
team_data = json.load(json_file)
with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
fantasy_data = json.load(json_file)
for obj in team_data['Games']:
for player in fantasy_data:
if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
obj['players'].append(player)
with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
json.dump(team_data, my_file)
注意:我还没有考虑创建日期检查所需的日期格式(我假设您将相应地更改代码)
这只是解决问题的有效方法
创建字典。
dict_players={}
迭代包含播放器数据的所有文件。
迭代玩家,对每个玩家执行以下操作
for player in players:
k = date + '%' + team_name
if dict_players.has_key(k):
dict_players[k].append(player)
else:
dict_players[k] = [player]
现在dict_玩家字典将有日期和球队名称组合的玩家列表。(date+'%'+团队名称
)。这正是我们在查看团队数据文件时所需要的
现在我们将浏览游戏数据,但对于每一场游戏的团队和日期组合,我们已经在字典中列出了玩家列表(dict_players)。我们需要做的就是访问它
for game in games:
game['players'] = dict_players[game['date'] + '%' + team]
这样,您只需在每个文件上迭代一次。这大大减少了所需的时间。(与嵌套循环相比)注意:我没有考虑创建日期检查所需的日期格式。(我假设您将相应地更改代码)
这只是解决问题的有效方法
创建字典。
dict_players={}
迭代包含播放器数据的所有文件。
迭代玩家,对每个玩家执行以下操作
for player in players:
k = date + '%' + team_name
if dict_players.has_key(k):
dict_players[k].append(player)
else:
dict_players[k] = [player]
现在dict_玩家字典将有日期和球队名称组合的玩家列表。(date+'%'+团队名称
)。这正是我们在查看团队数据文件时所需要的
现在我们将浏览游戏数据,但对于每一场游戏的团队和日期组合,我们已经在字典中列出了玩家列表(dict_players)。我们需要做的就是访问它
for game in games:
game['players'] = dict_players[game['date'] + '%' + team]
这样,您只需在每个文件上迭代一次。这大大减少了所花费的时间。(与嵌套循环相比)尝试以下方法:
import json
import pandas as pd
import os
path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
for file in Game_logs_json_files:
with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
team_data = json.load(json_file)
for file_1 in FPTS_json_files:
with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
fantasy_data = json.load(json_file)
for obj in team_data['Games']:
for player in fantasy_data:
if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
obj['players'].append(player)
with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
json.dump(team_data, my_file)
试试这个:
import json
import pandas as pd
import os
path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
for file in Game_logs_json_files:
with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
team_data = json.load(json_file)
for file_1 in FPTS_json_files:
with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
fantasy_data = json.load(json_file)
for obj in team_data['Games']:
for player in fantasy_data:
if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
obj['players'].append(player)
with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
json.dump(team_data, my_file)
该死,我以为我试过了,但我想我没有。这成功了!该死,我以为我试过了,但我想我没有。这成功了!