在r中添加额外行和NA的完整连接_R_Dataframe_Join_Dplyr_Merge

在r中添加额外行和NA的完整连接

r dataframe join merge

在r中添加额外行和NA的完整连接,r,dataframe,join,dplyr,merge,R,Dataframe,Join,Dplyr,Merge,我尝试使用full_join连接两个数据帧，以下是我的数据子集： df1 <- structure(list(Team = structure(c(4L, 3L, 5L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 14L, 13L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 22L, 23L, 23L, 24L, 24L,

我尝试使用full_join连接两个数据帧，以下是我的数据子集：

df1 <- structure(list(Team = structure(c(4L, 3L, 5L, 6L, 7L, 7L, 8L, 
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 14L, 13L, 15L, 15L, 
16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 22L, 23L, 
23L, 24L, 24L, 25L, 25L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 
32L, 32L, 33L, 33L, 34L, 34L, 2L, 1L, 26L, 27L), .Label = c("76ers", 
"76ers ", "Bucks", "Bucks ", "Bull ", "Bulls ", "Cavaliers ", 
"Celtics ", "Clippers ", "Grizzlies ", "Hawks ", "Heat ", "Hornets", 
"Hornets ", "Jazz ", "Kings ", "Knicks ", "Lakers ", "Magic ", 
"Mavericks ", "Net ", "Nets ", "Nuggets ", "Pacers ", "Pelicans ", 
"Pistons", "Pistons ", "Raptors ", "Rockets ", "Spurs ", "Thunder ", 
"Timberwolves ", "Warriors ", "Wizards "), class = "factor"), 
    Injury.Count = c(3L, 3L, 1L, 1L, 1L, 2L, 0L, 2L, 1L, 1L, 
    0L, 2L, 1L, 0L, 5L, 4L, 3L, 2L, 3L, 0L, 3L, 3L, 4L, 6L, 5L, 
    0L, 2L, 2L, 1L, 2L, 0L, 1L, 3L, 4L, 2L, 6L, 2L, 1L, 1L, 1L, 
    3L, 3L, 4L, 5L, 1L, 6L, 4L, 2L, 0L, 2L, 2L, 1L, 5L, 6L, 1L, 
    1L), HomevsAway = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L), .Label = c("0", "1"), class = "factor")), row.names = c(NA, 
-56L), class = "data.frame")

df2 <- structure(list(Team = structure(c(1L, 1L, 2L, 2L, 3L, 4L, 4L, 
5L, 6L, 7L, 8L, 9L, 9L, 10L, 10L, 11L, 12L, 12L, 13L, 13L, 14L, 
15L, 15L, 16L, 16L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 
22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 28L, 28L, 
3L, 5L, 6L, 7L, 8L, 11L, 14L, 17L, 27L), .Label = c("76ers", 
"Bucks", "Bulls", "Cavaliers", "Celtics", "Clippers", "Grizzlies", 
"Hawks", "Heat", "Hornets", "Jazz", "Kings", "Knicks", "Lakers", 
"Magic", "Mavericks", "Nets", "Nuggets", "Pacers", "Pelicans", 
"Pistons", "Raptors", "Rockets", "Spurs", "Thunder", "Timberwolves", 
"Warriors", "Wizards"), class = "factor"), HomevsAway = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
1L, 1L, 2L, 2L, 2L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    t_1 = c(55.883, 140.1, 32.2, 37.967, 29.85, 24.317, 57.316, 
    17.967, 19.05, 36.95, 16.167, 95.317, 86.533, 21.334, 52.567, 
    40.75, 28.3, 68.15, 97.067, 102.233, 26.866, 71.033, 34.467, 
    24.233, 42.033, 22.433, 59.033, 41.516, 12.7, 107.996, 6.5, 
    32.783, 0, 23.217, 13.93, 0, 54.88, 23.617, 83.834, 106.794, 
    17.56, 27.76, 85.83, 0.017, 35.183, 22.467, 25.033, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), t_3 = c(197.3164, 388.6827, 126.2663, 
    111.916, 61.95, 91.55, 167.067, 104.083, 71.067, 135.383, 
    45.633, 261.317, 267.399, 114.6997, 159.2, 152.034, 84.8337, 
    204.3003, 351.449, 376.317, 86.333, 213.9, 99.767, 65.1, 
    131.767, 73.317, 126.416, 129.066, 73.383, 347.0994, 4761, 
    113.367, 0, 89.933, 59.8, 0, 188.983, 124.384, 215.666, 289.9667, 
    92, 144.2497, 254.083, 32.0333, 122.1837, 102.533, 82.817, 
    0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -56L), groups = structure(list(
    Team = structure(1:28, .Label = c("76ers", "Bucks", "Bulls", 
    "Cavaliers", "Celtics", "Clippers", "Grizzlies", "Hawks", 
    "Heat", "Hornets", "Jazz", "Kings", "Knicks", "Lakers", "Magic", 
    "Mavericks", "Nets", "Nuggets", "Pacers", "Pelicans", "Pistons", 
    "Raptors", "Rockets", "Spurs", "Thunder", "Timberwolves", 
    "Warriors", "Wizards"), class = "factor"), .rows = structure(list(
        1:2, 3:4, c(5L, 48L), 6:7, c(8L, 49L), c(9L, 50L), c(10L, 
        51L), c(11L, 52L), 12:13, 14:15, c(16L, 53L), 17:18, 
        19:20, c(21L, 54L), 22:23, 24:25, c(26L, 55L), 27:28, 
        29:30, 31:32, 33:34, 35:36, 37:38, 39:40, 41:42, 43:44, 
        c(45L, 56L), 46:47), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 28L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

df1问题在于，在第一个data.frame中，团队名称后面有一个空格。这意味着，您使用的不是字符串“Bucks”
，而是字符串“Bucks”。
这两条线不能连接
以下是如何修复数据。首先使用sub
删除前导空格，然后将团队转换为字符向量。然后，完全连接按计划工作：
df1_new <- df1 %>% as_tibble() %>%
  mutate(Team = sub(" +", "", as.character(Team)))
df2_new <- df2 %>%
  mutate(Team = as.character(Team))

df1_new %>% full_join(df2_new, by = c("Team", "HomevsAway"))

# A tibble: 58 x 5
   Team      Injury.Count HomevsAway   t_1   t_3
   <chr>            <int> <fct>      <dbl> <dbl>
 1 Bucks                3 0           32.2 126. 
 2 Bucks                3 1           38.0 112. 
 3 Bull                 1 0           NA    NA  
 4 Bulls                1 1            0     0  
 5 Cavaliers            1 0           24.3  91.6
 6 Cavaliers            2 1           57.3 167. 
 7 Celtics              0 0            0     0  
 8 Celtics              2 1           18.0 104. 
 9 Clippers             1 0            0     0  
10 Clippers             1 1           19.0  71.1

df1\u新%as\u tible（）%>%
变异（Team=sub（“+”，“”，as.character（Team）））
df2_新%
变异（团队=角色（团队））
df1_新建%>%完全加入（df2_新建，由=c（“团队”、“主场”））
#A tibble:58 x 5
球队受伤，主客场1次3次
1美元3032.2126。
2美元3138.01112。
3公牛10 NA NA
4公牛队1110
5骑士队1024.391.6
6骑士队2157.3167。
7凯尔特人0 0 0
8凯尔特人队2118.0104。
9号快船1 0 0 0
10艘快艇1119.071.1

请注意，仍有一些NAs。这是由于一些打字错误造成的：牛市对牛市，网络对网络。
@Cettt知道为什么我的实际数据会出现这种情况吗？@Cettt有没有一个功能可以将数据从R复制/粘贴到stackoverflow中？dput
。使用dput（mydata）并将结果发布到问题中。@Cettt让我知道这是否有效，更新如上
df1_new <- df1 %>% as_tibble() %>%
  mutate(Team = sub(" +", "", as.character(Team)))
df2_new <- df2 %>%
  mutate(Team = as.character(Team))

df1_new %>% full_join(df2_new, by = c("Team", "HomevsAway"))

# A tibble: 58 x 5
   Team      Injury.Count HomevsAway   t_1   t_3
   <chr>            <int> <fct>      <dbl> <dbl>
 1 Bucks                3 0           32.2 126. 
 2 Bucks                3 1           38.0 112. 
 3 Bull                 1 0           NA    NA  
 4 Bulls                1 1            0     0  
 5 Cavaliers            1 0           24.3  91.6
 6 Cavaliers            2 1           57.3 167. 
 7 Celtics              0 0            0     0  
 8 Celtics              2 1           18.0 104. 
 9 Clippers             1 0            0     0  
10 Clippers             1 1           19.0  71.1