Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/79.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
基于多个列和行条件展开R dataframe_R_Dplyr_Tidyr - Fatal编程技术网

基于多个列和行条件展开R dataframe

基于多个列和行条件展开R dataframe,r,dplyr,tidyr,R,Dplyr,Tidyr,我在R studio中有以下数据帧 DF1<-data.frame('X_F'=c(1,2,3,4,5, NA, NA, NA, 1,2,3,4,5), "X_A"=c(.1,.2,.3,.4,.5, NA, NA, NA, .2,.3,.4, .5,.6),"Y_F"=c(2,3,5,NA, 7, 1,3, 4, 1,NA,3,4,5), "Y_A"=c(.2,.3,.4,NA, .7, .1,.2,.7,.1,NA, .3,.4,.5),'ID'=c("A", "A", "A", "

我在R studio中有以下数据帧

 DF1<-data.frame('X_F'=c(1,2,3,4,5, NA, NA, NA, 1,2,3,4,5), "X_A"=c(.1,.2,.3,.4,.5, NA, NA, NA, .2,.3,.4, .5,.6),"Y_F"=c(2,3,5,NA, 7, 1,3, 4, 1,NA,3,4,5), "Y_A"=c(.2,.3,.4,NA, .7, .1,.2,.7,.1,NA, .3,.4,.5),'ID'=c("A", "A", "A", "A", "A", "B", "B", "B", "C", "C", "C","C",'C'))
我想通过扩展上面的数据帧来获得下面的数据帧。扩展的数据帧将有一个名为SF的额外列。SF的价值 派生为X_F、Y_F列的范围,按ID分组。此范围由每个步骤的值1分隔

     ID  SF   X_F  X_A   Y_F  Y_A
 1   A    1    1    0.1   1   NA
 2   A    2    2    0.2   2   0.2
 3   A    3    3    0.3   3   0.3
 4   A    4    4    0.4   4   NA
 5   A    5    5    0.5   5   0.4
 6   A    6    6    NA    6   NA
 7   A    7    7    NA    7   0.7
 8   B    1    1    NA    1   0.1
 9   B    2    2    NA    2   NA
 10  B    3    3    NA    3   0.2
 11  B    4    4    NA    4   0.7
 12  C    1    1    0.2   1   0.1
 13  C    2    2    0.3   2   NA
 14  C    3    3    0.4   3   0.3
 15  C    4    4    0.5   4   0.4
 16  C    5    5    0.6   5   0.5
我尝试过这种方法以获得所需的结果

  library(dplyr)
  library(tidyr)
  DF1

    DF2<-DF1%>%group_by(ID)%>% mutate(SF=pmax(X_F, Y_F, na.rm = T))%>%
    complete(SF=(full_seq(SF ,1)))
库(dplyr)
图书馆(tidyr)
DF1
DF2%组由(ID)%>%突变(SF=pmax(X\u F,Y\u F,na.rm=T))%>%
完整(SF=(完整顺序(SF,1)))
与上面的预期输出相比,我得到了以下输出

   ID       SF   X_F   X_A   Y_F   Y_A
  <fct>   <dbl> <dbl> <dbl> <dbl> <dbl>
   A       2     1     0.1   2     0.2
   A       3     2     0.2   3     0.3
   A       4     4     0.4   NA     NA  
   A       5     3     0.3    5    0.4
   A       6    NA     NA    NA    NA  
   A       7     5     0.5   7     0.7
   B       1    NA     NA    1     0.1
   B       2    NA     NA    NA    NA  
   B       3    NA     NA    3     0.2
   B       4    NA     NA    4     0.7
   C       1     1     0.2   1     0.1
   C       2     2     0.3   NA    NA  
   C       3     3     0.4   3     0.3
   C       4     4     0.5   4     0.4
   C       5     5     0.6   5     0.5
ID SF X_F X_A Y_F Y_A
A 21 0.1 2 0.2
A 32 0.2 3 0.3
A 40.4 NA NA
A 53 0.35 0.4
A 6NA NA NA NA
A 75 0.5 7 0.7
B 1 NA 1 0.1
B 2钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠钠
B 3 NA 3 0.2
b4na40.7
C110.2110.1
C20.3Na-NA
C30.430.3
C40.540.4
C50.650.5

我请求某人帮忙。我无法解决此问题,因为在
完成
中获取
SF
max
值,并使用
seq
而不是
full\seq

full_seq(2:4, 1) #gives
#[1] 2 3 4
#whereas
seq(max(2:4)) #gives
#[1] 1 2 3 4
所以试着去做吧

library(dplyr)
library(tidyr)

DF1 %>%
  group_by(ID) %>% 
  mutate(SF= pmax(X_F, Y_F, na.rm = T)) %>%
  complete(SF = seq(max(SF)))


#   ID       SF   X_F   X_A   Y_F   Y_A
#   <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A         1    NA  NA      NA  NA  
# 2 A         2     1   0.1     2   0.2
# 3 A         3     2   0.2     3   0.3
# 4 A         4     4   0.4    NA  NA  
# 5 A         5     3   0.3     5   0.4
# 6 A         6    NA  NA      NA  NA  
# 7 A         7     5   0.5     7   0.7
# 8 B         1    NA  NA       1   0.1
# 9 B         2    NA  NA      NA  NA  
#10 B         3    NA  NA       3   0.2
#11 B         4    NA  NA       4   0.7
#12 C         1     1   0.2     1   0.1
#13 C         2     2   0.3    NA  NA  
#14 C         3     3   0.4     3   0.3
#15 C         4     4   0.5     4   0.4
#16 C         5     5   0.6     5   0.5

获取
complete
SF
max
值,并使用
seq
而不是
full\seq
,因为

full_seq(2:4, 1) #gives
#[1] 2 3 4
#whereas
seq(max(2:4)) #gives
#[1] 1 2 3 4
所以试着去做吧

library(dplyr)
library(tidyr)

DF1 %>%
  group_by(ID) %>% 
  mutate(SF= pmax(X_F, Y_F, na.rm = T)) %>%
  complete(SF = seq(max(SF)))


#   ID       SF   X_F   X_A   Y_F   Y_A
#   <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A         1    NA  NA      NA  NA  
# 2 A         2     1   0.1     2   0.2
# 3 A         3     2   0.2     3   0.3
# 4 A         4     4   0.4    NA  NA  
# 5 A         5     3   0.3     5   0.4
# 6 A         6    NA  NA      NA  NA  
# 7 A         7     5   0.5     7   0.7
# 8 B         1    NA  NA       1   0.1
# 9 B         2    NA  NA      NA  NA  
#10 B         3    NA  NA       3   0.2
#11 B         4    NA  NA       4   0.7
#12 C         1     1   0.2     1   0.1
#13 C         2     2   0.3    NA  NA  
#14 C         3     3   0.4     3   0.3
#15 C         4     4   0.5     4   0.4
#16 C         5     5   0.6     5   0.5

非常感谢。序列(最大(SF))与完整序列有何不同。请让我know@Thilagaraghavan用解释更新了答案。谢谢。序列(最大(SF))与完整序列有何不同。请让我know@Thilagaraghavan用解释更新了答案。