Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/73.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在r中添加额外行和NA的完整连接_R_Dataframe_Join_Dplyr_Merge - Fatal编程技术网

在r中添加额外行和NA的完整连接

在r中添加额外行和NA的完整连接,r,dataframe,join,dplyr,merge,R,Dataframe,Join,Dplyr,Merge,我尝试使用full_join连接两个数据帧,以下是我的数据子集: df1 <- structure(list(Team = structure(c(4L, 3L, 5L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 14L, 13L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 22L, 23L, 23L, 24L, 24L,

我尝试使用full_join连接两个数据帧,以下是我的数据子集:

df1 <- structure(list(Team = structure(c(4L, 3L, 5L, 6L, 7L, 7L, 8L, 
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 14L, 13L, 15L, 15L, 
16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 22L, 23L, 
23L, 24L, 24L, 25L, 25L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 
32L, 32L, 33L, 33L, 34L, 34L, 2L, 1L, 26L, 27L), .Label = c("76ers", 
"76ers ", "Bucks", "Bucks ", "Bull ", "Bulls ", "Cavaliers ", 
"Celtics ", "Clippers ", "Grizzlies ", "Hawks ", "Heat ", "Hornets", 
"Hornets ", "Jazz ", "Kings ", "Knicks ", "Lakers ", "Magic ", 
"Mavericks ", "Net ", "Nets ", "Nuggets ", "Pacers ", "Pelicans ", 
"Pistons", "Pistons ", "Raptors ", "Rockets ", "Spurs ", "Thunder ", 
"Timberwolves ", "Warriors ", "Wizards "), class = "factor"), 
    Injury.Count = c(3L, 3L, 1L, 1L, 1L, 2L, 0L, 2L, 1L, 1L, 
    0L, 2L, 1L, 0L, 5L, 4L, 3L, 2L, 3L, 0L, 3L, 3L, 4L, 6L, 5L, 
    0L, 2L, 2L, 1L, 2L, 0L, 1L, 3L, 4L, 2L, 6L, 2L, 1L, 1L, 1L, 
    3L, 3L, 4L, 5L, 1L, 6L, 4L, 2L, 0L, 2L, 2L, 1L, 5L, 6L, 1L, 
    1L), HomevsAway = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L), .Label = c("0", "1"), class = "factor")), row.names = c(NA, 
-56L), class = "data.frame")

df2 <- structure(list(Team = structure(c(1L, 1L, 2L, 2L, 3L, 4L, 4L, 
5L, 6L, 7L, 8L, 9L, 9L, 10L, 10L, 11L, 12L, 12L, 13L, 13L, 14L, 
15L, 15L, 16L, 16L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 
22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 28L, 28L, 
3L, 5L, 6L, 7L, 8L, 11L, 14L, 17L, 27L), .Label = c("76ers", 
"Bucks", "Bulls", "Cavaliers", "Celtics", "Clippers", "Grizzlies", 
"Hawks", "Heat", "Hornets", "Jazz", "Kings", "Knicks", "Lakers", 
"Magic", "Mavericks", "Nets", "Nuggets", "Pacers", "Pelicans", 
"Pistons", "Raptors", "Rockets", "Spurs", "Thunder", "Timberwolves", 
"Warriors", "Wizards"), class = "factor"), HomevsAway = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
1L, 1L, 2L, 2L, 2L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    t_1 = c(55.883, 140.1, 32.2, 37.967, 29.85, 24.317, 57.316, 
    17.967, 19.05, 36.95, 16.167, 95.317, 86.533, 21.334, 52.567, 
    40.75, 28.3, 68.15, 97.067, 102.233, 26.866, 71.033, 34.467, 
    24.233, 42.033, 22.433, 59.033, 41.516, 12.7, 107.996, 6.5, 
    32.783, 0, 23.217, 13.93, 0, 54.88, 23.617, 83.834, 106.794, 
    17.56, 27.76, 85.83, 0.017, 35.183, 22.467, 25.033, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), t_3 = c(197.3164, 388.6827, 126.2663, 
    111.916, 61.95, 91.55, 167.067, 104.083, 71.067, 135.383, 
    45.633, 261.317, 267.399, 114.6997, 159.2, 152.034, 84.8337, 
    204.3003, 351.449, 376.317, 86.333, 213.9, 99.767, 65.1, 
    131.767, 73.317, 126.416, 129.066, 73.383, 347.0994, 4761, 
    113.367, 0, 89.933, 59.8, 0, 188.983, 124.384, 215.666, 289.9667, 
    92, 144.2497, 254.083, 32.0333, 122.1837, 102.533, 82.817, 
    0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -56L), groups = structure(list(
    Team = structure(1:28, .Label = c("76ers", "Bucks", "Bulls", 
    "Cavaliers", "Celtics", "Clippers", "Grizzlies", "Hawks", 
    "Heat", "Hornets", "Jazz", "Kings", "Knicks", "Lakers", "Magic", 
    "Mavericks", "Nets", "Nuggets", "Pacers", "Pelicans", "Pistons", 
    "Raptors", "Rockets", "Spurs", "Thunder", "Timberwolves", 
    "Warriors", "Wizards"), class = "factor"), .rows = structure(list(
        1:2, 3:4, c(5L, 48L), 6:7, c(8L, 49L), c(9L, 50L), c(10L, 
        51L), c(11L, 52L), 12:13, 14:15, c(16L, 53L), 17:18, 
        19:20, c(21L, 54L), 22:23, 24:25, c(26L, 55L), 27:28, 
        29:30, 31:32, 33:34, 35:36, 37:38, 39:40, 41:42, 43:44, 
        c(45L, 56L), 46:47), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 28L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

df1问题在于,在第一个data.frame中,团队名称后面有一个空格。这意味着,您使用的不是字符串
“Bucks”
,而是字符串
“Bucks”
。 这两条线不能连接

以下是如何修复数据。首先使用
sub
删除前导空格,然后将团队转换为字符向量。然后,完全连接按计划工作:

df1_new <- df1 %>% as_tibble() %>%
  mutate(Team = sub(" +", "", as.character(Team)))
df2_new <- df2 %>%
  mutate(Team = as.character(Team))

df1_new %>% full_join(df2_new, by = c("Team", "HomevsAway"))

# A tibble: 58 x 5
   Team      Injury.Count HomevsAway   t_1   t_3
   <chr>            <int> <fct>      <dbl> <dbl>
 1 Bucks                3 0           32.2 126. 
 2 Bucks                3 1           38.0 112. 
 3 Bull                 1 0           NA    NA  
 4 Bulls                1 1            0     0  
 5 Cavaliers            1 0           24.3  91.6
 6 Cavaliers            2 1           57.3 167. 
 7 Celtics              0 0            0     0  
 8 Celtics              2 1           18.0 104. 
 9 Clippers             1 0            0     0  
10 Clippers             1 1           19.0  71.1
df1\u新%as\u tible()%>%
变异(Team=sub(“+”,“”,as.character(Team)))
df2_新%
变异(团队=角色(团队))
df1_新建%>%完全加入(df2_新建,由=c(“团队”、“主场”))
#A tibble:58 x 5
球队受伤,主客场1次3次
1美元3032.2126。
2美元3138.01112。
3公牛10 NA NA
4公牛队1110
5骑士队1024.391.6
6骑士队2157.3167。
7凯尔特人0 0 0
8凯尔特人队2118.0104。
9号快船1 0 0 0
10艘快艇1119.071.1

请注意,仍有一些NAs。这是由于一些打字错误造成的:牛市对牛市,网络对网络。

@Cettt知道为什么我的实际数据会出现这种情况吗?@Cettt有没有一个功能可以将数据从R复制/粘贴到stackoverflow中?
dput
。使用
dput(mydata)
并将结果发布到问题中。@Cettt让我知道这是否有效,更新如上
df1_new <- df1 %>% as_tibble() %>%
  mutate(Team = sub(" +", "", as.character(Team)))
df2_new <- df2 %>%
  mutate(Team = as.character(Team))

df1_new %>% full_join(df2_new, by = c("Team", "HomevsAway"))

# A tibble: 58 x 5
   Team      Injury.Count HomevsAway   t_1   t_3
   <chr>            <int> <fct>      <dbl> <dbl>
 1 Bucks                3 0           32.2 126. 
 2 Bucks                3 1           38.0 112. 
 3 Bull                 1 0           NA    NA  
 4 Bulls                1 1            0     0  
 5 Cavaliers            1 0           24.3  91.6
 6 Cavaliers            2 1           57.3 167. 
 7 Celtics              0 0            0     0  
 8 Celtics              2 1           18.0 104. 
 9 Clippers             1 0            0     0  
10 Clippers             1 1           19.0  71.1