如何根据R中的特定参数组合行_R

如何根据R中的特定参数组合行

如何根据R中的特定参数组合行,r,R,我有一个数据框，看起来像这样： ID Smoker Asthma Age Sex COPD Event_Date 1 1 0 0 65 M 0 12-2009 2 1 0 1 65 M 0 21-2009 3 1 0 1 65 M 0 23-2009 4 2 1 0 67 M 0 19-2010 5 2 1

我有一个数据框，看起来像这样：

   ID Smoker Asthma  Age Sex COPD Event_Date
1   1      0      0  65   M    0   12-2009
2   1      0      1  65   M    0   21-2009
3   1      0      1  65   M    0   23-2009
4   2      1      0  67   M    0   19-2010
5   2      1      0  67   M    0   21-2010
6   2      1      1  67   M    1   01-2011
7   2      1      1  67   M    1   02-2011
8   3      2      1  77   F    0   09-2015
9   3      2      1  77   F    1   10-2015
10  3      2      1  77   F    1   10-2015

  ID Smoker   Asthma  Age Sex  COPD  Event_Data
    1   0      1      65   M   0      12-2009
    2   1      1      66   M   1      19-2010
    3   2      1      77   F   1      09-2015

d <- structure(list(ID         = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), 
                    Smoker     = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), 
                    Asthma     = c(0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), 
                    Age        = c(65L, 65L, 65L, 67L, 67L, 67L, 67L, 77L, 77L, 77L), 
                    Sex        = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), 
                                       .Label = c("F", "M"), class = "factor"), 
                    COPD       = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L), 
                    Event_Date = structure(c(5L, 7L, 9L, 6L, 8L, 1L, 2L, 3L, 4L, 4L), 
                                           .Label = c("01-2011", "02-2011", "09-2015", 
                                                      "10-2015", "12-2009", "19-2010", 
                                                      "21-2009", "21-2010", "23-2009"), 
                                           class = "factor")), 
                class = "data.frame", 
                row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))


d[!duplicated(d$ID), ]

#   ID Smoker Asthma Age Sex COPD Event_Date
# 1  1      0      0  65   M    0    12-2009
# 4  2      1      0  67   M    0    19-2010
# 8  3      2      1  77   F    0    09-2015

我想知道是否有可能合并我的行，以实现如下数据集：

   ID Smoker Asthma  Age Sex COPD Event_Date
1   1      0      0  65   M    0   12-2009
2   1      0      1  65   M    0   21-2009
3   1      0      1  65   M    0   23-2009
4   2      1      0  67   M    0   19-2010
5   2      1      0  67   M    0   21-2010
6   2      1      1  67   M    1   01-2011
7   2      1      1  67   M    1   02-2011
8   3      2      1  77   F    0   09-2015
9   3      2      1  77   F    1   10-2015
10  3      2      1  77   F    1   10-2015

  ID Smoker   Asthma  Age Sex  COPD  Event_Data
    1   0      1      65   M   0      12-2009
    2   1      1      66   M   1      19-2010
    3   2      1      77   F   1      09-2015

d <- structure(list(ID         = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), 
                    Smoker     = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), 
                    Asthma     = c(0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), 
                    Age        = c(65L, 65L, 65L, 67L, 67L, 67L, 67L, 77L, 77L, 77L), 
                    Sex        = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), 
                                       .Label = c("F", "M"), class = "factor"), 
                    COPD       = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L), 
                    Event_Date = structure(c(5L, 7L, 9L, 6L, 8L, 1L, 2L, 3L, 4L, 4L), 
                                           .Label = c("01-2011", "02-2011", "09-2015", 
                                                      "10-2015", "12-2009", "19-2010", 
                                                      "21-2009", "21-2010", "23-2009"), 
                                           class = "factor")), 
                class = "data.frame", 
                row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))


d[!duplicated(d$ID), ]

#   ID Smoker Asthma Age Sex COPD Event_Date
# 1  1      0      0  65   M    0    12-2009
# 4  2      1      0  67   M    0    19-2010
# 8  3      2      1  77   F    0    09-2015

我尝试过使用unique函数，但是这并没有提供我想要的输出，并且重复了多行的ID

这是我尝试过的代码的一个示例

Data2<-unique(Data)

Data2如果要获取每个ID
的（第一）行，可以尝试以下方法：
   ID Smoker Asthma  Age Sex COPD Event_Date
1   1      0      0  65   M    0   12-2009
2   1      0      1  65   M    0   21-2009
3   1      0      1  65   M    0   23-2009
4   2      1      0  67   M    0   19-2010
5   2      1      0  67   M    0   21-2010
6   2      1      1  67   M    1   01-2011
7   2      1      1  67   M    1   02-2011
8   3      2      1  77   F    0   09-2015
9   3      2      1  77   F    1   10-2015
10  3      2      1  77   F    1   10-2015

  ID Smoker   Asthma  Age Sex  COPD  Event_Data
    1   0      1      65   M   0      12-2009
    2   1      1      66   M   1      19-2010
    3   2      1      77   F   1      09-2015

d <- structure(list(ID         = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), 
                    Smoker     = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), 
                    Asthma     = c(0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), 
                    Age        = c(65L, 65L, 65L, 67L, 67L, 67L, 67L, 77L, 77L, 77L), 
                    Sex        = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), 
                                       .Label = c("F", "M"), class = "factor"), 
                    COPD       = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L), 
                    Event_Date = structure(c(5L, 7L, 9L, 6L, 8L, 1L, 2L, 3L, 4L, 4L), 
                                           .Label = c("01-2011", "02-2011", "09-2015", 
                                                      "10-2015", "12-2009", "19-2010", 
                                                      "21-2009", "21-2010", "23-2009"), 
                                           class = "factor")), 
                class = "data.frame", 
                row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))


d[!duplicated(d$ID), ]

#   ID Smoker Asthma Age Sex COPD Event_Date
# 1  1      0      0  65   M    0    12-2009
# 4  2      1      0  67   M    0    19-2010
# 8  3      2      1  77   F    0    09-2015

d当您需要进一步向下的值时，请使用max
，对于其他值，请使用dplyr:：first
，下面是一个示例
library(dplyr)
df %>% group_by(ID) %>% summarise(Smoker=first(Smoker), Asthma=max(Asthma, na.rm = TRUE))

替代解决方案：
library(dplyr)

d %>%
  group_by(ID, Age, Sex, Smoker) %>%
  summarise(Asthma = !is.na(match(1, Asthma)),
            COPD = !is.na(match(1, COPD)),
            Event_Date = first(Event_Date)) %>%
  ungroup %>%
  mutate_if(is.logical, as.numeric)


# A tibble: 3 x 7
     ID   Age Sex   Smoker Asthma  COPD Event_Date
  <int> <int> <fct>  <int>  <dbl> <dbl> <fct>     
1     1    65 M          0      1     0 12-2009   
2     2    67 M          1      1     1 19-2010   
3     3    77 F          2      1     1 09-2015   


库（dplyr）
d%>%
按（身份证、年龄、性别、吸烟者）分组%>%
总结（哮喘=！is.na（匹配（1，哮喘）），
COPD=！is.na（匹配（1，COPD）），
事件日期=第一个（事件日期））%>%
解组%>%
mutate_if（is.logical，as.numeric）
#一个tibble:3x7
ID年龄性别吸烟者哮喘COPD事件日期
1165M01012-2009
2 2 67 M 1 19-2010
3 3 77 F 2 1 09-2015
我不清楚你想要什么。您肯定不想要唯一的条目，因为数据集中的每个条目都是唯一的。您想要每个ID的第一个条目吗？为什么smoker在您想要的数据集中总是0？为什么哮喘是0,1,2？Dead从何而来？@Julian_Hn我不想只看第一行，因为我想在一行中的每一个ID的下一行中包含列状态。@Docendiscimus我已经编辑了我的帖子，所以它更有意义。很抱歉出现任何混淆。当我使用一个更大的数据框（包含与上面类似的附加列）尝试此操作时，我返回错误“column can not modified，因为它是一个分组变量”。有什么方法可以克服这个问题吗？groupby
是一个解决办法。我将每个ID的值都相同的所有列都放在那里，这样我就不必在summary（）
中更改它们，但它们仍然会出现在结果中。如果您更改summary（）
中的列，则该列不在groupby（）
语句中。