Python 映射和组合多个json文件

Python 映射和组合多个json文件,python,json,dictionary,directory,Python,Json,Dictionary,Directory,我有两个包含多个json文件的文件夹 第一个文件夹是/Users/aus10/Desktop/MLB_Data/Clean_Team_Data 第二个文件夹是/Users/aus10/Desktop/MLB_Data/Slate_Logs 第一个文件夹中有30个类似这样的json文件 { "Team": "ARI", "Games": [ { "Date": "2019-03-2

我有两个包含多个json文件的文件夹

第一个文件夹是
/Users/aus10/Desktop/MLB_Data/Clean_Team_Data

第二个文件夹是
/Users/aus10/Desktop/MLB_Data/Slate_Logs

第一个文件夹中有30个类似这样的json文件

{
  "Team": "ARI",
  "Games": [
    {
      "Date": "2019-03-28",
      "Opponent": "@ LA Dodgers",
      "Results": "L",
      "Score": "12-5",
      "Line": 150,
      "Over_Under": "O",
      "Total": 7,
      "Opponent_Score": 12,
      "Team_Score": 5,
      "Total_Score": 17,
      "Home_Away": "A",
      "players": []
    },
    {
      "Date": "2019-03-29",
      "Opponent": "@ LA Dodgers",
      "Results": "W",
      "Score": "5-4",
      "Line": 155,
      "Over_Under": "O",
      "Total": 7,
      "Team_Score": 5,
      "Opponent_Score": 4,
      "Total_Score": 9,
      "Home_Away": "A",
      "players": []
    }]
[
  {
    "StatID": 2593242,
    "TeamID": 4,
    "PlayerID": 10002075,
    "SeasonType": 1,
    "Season": 2019,
    "Name": "Colin Moran",
    "Team": "PIT",
    "Position": "3B",
    "PositionCategory": "IF",
    "Started": 1,
    "InjuryStatus": null,
    "GameID": 54207,
    "OpponentID": 31,
    "Opponent": "STL",
    "Day": "2019-04-01T00:00:00",
    "DateTime": "2019-04-01T13:05:00",
    "HomeOrAway": "HOME",
    "Games": 1,
    "FantasyPoints": 12,
    "AtBats": 3,
    "Runs": 1,
    "Hits": 2,
    "Singles": 0,
    "Doubles": 1,
    "Triples": 0,
    "HomeRuns": 1,
    "RunsBattedIn": 3,
    "BattingAverage": 0.667,
    "Outs": 1,
    "Strikeouts": 0,
    "Walks": 2,
    "HitByPitch": 0,
    "Sacrifices": 0,
    "SacrificeFlies": 0,
    "GroundIntoDoublePlay": 0,
    "StolenBases": 0,
    "CaughtStealing": 0,
    "OnBasePercentage": 0.8,
    "SluggingPercentage": 2,
    "OnBasePlusSlugging": 2.8,
    "Wins": 0,
    "Losses": 0,
    "Saves": 0,
    "InningsPitchedDecimal": 0,
    "TotalOutsPitched": 0,
    "InningsPitchedFull": 0,
    "InningsPitchedOuts": 0,
    "EarnedRunAverage": 0,
    "PitchingHits": 0,
    "PitchingRuns": 0,
    "PitchingEarnedRuns": 0,
    "PitchingWalks": 0,
    "PitchingStrikeouts": 0,
    "PitchingHomeRuns": 0,
    "PitchesThrown": 0,
    "PitchesThrownStrikes": 0,
    "WalksHitsPerInningsPitched": 0,
    "PitchingBattingAverageAgainst": 0,
    "FantasyPointsFanDuel": 37.7,
    "FantasyPointsDraftKings": 27,
    "WeightedOnBasePercentage": 0.8,
    "PitchingCompleteGames": 0,
    "PitchingShutOuts": 0,
    "PitchingOnBasePercentage": 0,
    "PitchingSluggingPercentage": 0,
    "PitchingOnBasePlusSlugging": 0,
    "PitchingStrikeoutsPerNineInnings": 0,
    "PitchingWalksPerNineInnings": 0,
    "PitchingWeightedOnBasePercentage": 0
  }]
第二个文件夹包含218个类似这样的json文件

{
  "Team": "ARI",
  "Games": [
    {
      "Date": "2019-03-28",
      "Opponent": "@ LA Dodgers",
      "Results": "L",
      "Score": "12-5",
      "Line": 150,
      "Over_Under": "O",
      "Total": 7,
      "Opponent_Score": 12,
      "Team_Score": 5,
      "Total_Score": 17,
      "Home_Away": "A",
      "players": []
    },
    {
      "Date": "2019-03-29",
      "Opponent": "@ LA Dodgers",
      "Results": "W",
      "Score": "5-4",
      "Line": 155,
      "Over_Under": "O",
      "Total": 7,
      "Team_Score": 5,
      "Opponent_Score": 4,
      "Total_Score": 9,
      "Home_Away": "A",
      "players": []
    }]
[
  {
    "StatID": 2593242,
    "TeamID": 4,
    "PlayerID": 10002075,
    "SeasonType": 1,
    "Season": 2019,
    "Name": "Colin Moran",
    "Team": "PIT",
    "Position": "3B",
    "PositionCategory": "IF",
    "Started": 1,
    "InjuryStatus": null,
    "GameID": 54207,
    "OpponentID": 31,
    "Opponent": "STL",
    "Day": "2019-04-01T00:00:00",
    "DateTime": "2019-04-01T13:05:00",
    "HomeOrAway": "HOME",
    "Games": 1,
    "FantasyPoints": 12,
    "AtBats": 3,
    "Runs": 1,
    "Hits": 2,
    "Singles": 0,
    "Doubles": 1,
    "Triples": 0,
    "HomeRuns": 1,
    "RunsBattedIn": 3,
    "BattingAverage": 0.667,
    "Outs": 1,
    "Strikeouts": 0,
    "Walks": 2,
    "HitByPitch": 0,
    "Sacrifices": 0,
    "SacrificeFlies": 0,
    "GroundIntoDoublePlay": 0,
    "StolenBases": 0,
    "CaughtStealing": 0,
    "OnBasePercentage": 0.8,
    "SluggingPercentage": 2,
    "OnBasePlusSlugging": 2.8,
    "Wins": 0,
    "Losses": 0,
    "Saves": 0,
    "InningsPitchedDecimal": 0,
    "TotalOutsPitched": 0,
    "InningsPitchedFull": 0,
    "InningsPitchedOuts": 0,
    "EarnedRunAverage": 0,
    "PitchingHits": 0,
    "PitchingRuns": 0,
    "PitchingEarnedRuns": 0,
    "PitchingWalks": 0,
    "PitchingStrikeouts": 0,
    "PitchingHomeRuns": 0,
    "PitchesThrown": 0,
    "PitchesThrownStrikes": 0,
    "WalksHitsPerInningsPitched": 0,
    "PitchingBattingAverageAgainst": 0,
    "FantasyPointsFanDuel": 37.7,
    "FantasyPointsDraftKings": 27,
    "WeightedOnBasePercentage": 0.8,
    "PitchingCompleteGames": 0,
    "PitchingShutOuts": 0,
    "PitchingOnBasePercentage": 0,
    "PitchingSluggingPercentage": 0,
    "PitchingOnBasePlusSlugging": 0,
    "PitchingStrikeoutsPerNineInnings": 0,
    "PitchingWalksPerNineInnings": 0,
    "PitchingWeightedOnBasePercentage": 0
  }]
我需要迭代第一个文件夹中的每个文件,如果第一个对象中的
日期
团队
与第二个文件夹中任何
dict
文件中的
日期
和“团队”匹配,我想将该
dict
附加到第一个文件夹中的
玩家
列表中
dict
等等,直到我浏览完第一个文件夹中的每个文件。我使用了一个嵌套的for循环,它只匹配一个日期
2019-08-18
我不知道为什么。我知道这不是最有效的解决方案,所以请随意提出一种更有效的方法

这是密码

import json
import pandas as pd
import os

path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

for file in Game_logs_json_files:
    for file_1 in FPTS_json_files:

        with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
            team_data = json.load(json_file)

        with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
            fantasy_data = json.load(json_file)

            for obj in team_data['Games']:
                for player in fantasy_data:
                    if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
                        obj['players'].append(player)

    with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
        json.dump(team_data, my_file)


    



    

注意:我还没有考虑创建日期检查所需的日期格式(我假设您将相应地更改代码)

这只是解决问题的有效方法

创建字典。 dict_players={}

迭代包含播放器数据的所有文件。 迭代玩家,对每个玩家执行以下操作

for player in players:
    k = date + '%' + team_name
    if dict_players.has_key(k):
        dict_players[k].append(player)
    else:
        dict_players[k] = [player]
现在dict_玩家字典将有日期和球队名称组合的玩家列表。(
date+'%'+团队名称
)。这正是我们在查看团队数据文件时所需要的

现在我们将浏览游戏数据,但对于每一场游戏的团队和日期组合,我们已经在字典中列出了玩家列表(dict_players)。我们需要做的就是访问它

for game in games:
   game['players'] = dict_players[game['date'] + '%' + team]

这样,您只需在每个文件上迭代一次。这大大减少了所需的时间。(与嵌套循环相比)

注意:我没有考虑创建日期检查所需的日期格式。(我假设您将相应地更改代码)

这只是解决问题的有效方法

创建字典。 dict_players={}

迭代包含播放器数据的所有文件。 迭代玩家,对每个玩家执行以下操作

for player in players:
    k = date + '%' + team_name
    if dict_players.has_key(k):
        dict_players[k].append(player)
    else:
        dict_players[k] = [player]
现在dict_玩家字典将有日期和球队名称组合的玩家列表。(
date+'%'+团队名称
)。这正是我们在查看团队数据文件时所需要的

现在我们将浏览游戏数据,但对于每一场游戏的团队和日期组合,我们已经在字典中列出了玩家列表(dict_players)。我们需要做的就是访问它

for game in games:
   game['players'] = dict_players[game['date'] + '%' + team]
这样,您只需在每个文件上迭代一次。这大大减少了所花费的时间。(与嵌套循环相比)

尝试以下方法:

import json
import pandas as pd
import os

path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

for file in Game_logs_json_files:
    with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
        team_data = json.load(json_file)

    for file_1 in FPTS_json_files:
        with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
            fantasy_data = json.load(json_file)

            for obj in team_data['Games']:
                for player in fantasy_data:
                    if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
                        obj['players'].append(player)

    with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
        json.dump(team_data, my_file)
试试这个:

import json
import pandas as pd
import os

path_to_json = '/Users/aus10/Desktop/MLB_Data/Clean_Team_Data'
Game_logs_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

path_to_json = '/Users/aus10/Desktop/MLB_Data/Slate_Logs'
FPTS_json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

for file in Game_logs_json_files:
    with open('/Users/aus10/Desktop/MLB_Data/Clean_Team_Data/'+file+'') as json_file:
        team_data = json.load(json_file)

    for file_1 in FPTS_json_files:
        with open('/Users/aus10/Desktop/MLB_Data/Slate_Logs/'+file_1+'') as json_file:
            fantasy_data = json.load(json_file)

            for obj in team_data['Games']:
                for player in fantasy_data:
                    if player['Day'].split('T')[0] == obj['Date'] and player['Team'] == team_data['Team']:
                        obj['players'].append(player)

    with open('/Users/aus10/Desktop/MLB_Data/Game_Logs_With_Player_Data/'+file+'', 'w') as my_file:
        json.dump(team_data, my_file)

该死,我以为我试过了,但我想我没有。这成功了!该死,我以为我试过了,但我想我没有。这成功了!