R：将数据帧中的前几行相加_R_Dplyr

R：将数据帧中的前几行相加

R：将数据帧中的前几行相加,r,dplyr,R,Dplyr,df 我需要创建一个新列，将患者分组的bi相加我的数据应该如下所示： patient.ID Index.admission. adm_date dish_date bi 1 124 FALSE 2/7/2009 2/8/2009 0 2 124 TRUE 3/5/2009 3/15/2009 1 3 124 FALSE 4/5/2011 4/7/2011

我需要创建一个新列，将患者分组的

bi

相加

我的数据应该如下所示：

   patient.ID Index.admission.  adm_date dish_date bi
1         124            FALSE  2/7/2009  2/8/2009  0
2         124             TRUE  3/5/2009 3/15/2009  1
3         124            FALSE  4/5/2011  4/7/2011  0
4         124            FALSE 3/25/2012 3/27/2012  0
5         124             TRUE  5/5/2012 5/20/2012  1
6         124             TRUE  9/8/2013 9/15/2013  1
7         124            FALSE  1/5/2014 1/15/2014  0
8         233            FALSE  1/1/2010  1/8/2010  0
9         233            FALSE  1/1/2011  1/5/2011  0
10        233             TRUE  2/2/2011 2/25/2011  1
11        233            FALSE 1/25/2012 1/28/2012  0
12        542             TRUE  3/5/2015 3/15/2015  1
13       1243             TRUE  2/5/2009  2/8/2009  1
14       1243             TRUE  2/5/2011 2/19/2011  1

使用

dplyr

我有：

   patient.ID Index.admission.  adm_date dish_date bi  num_index_ad
1         124            FALSE  2/7/2009  2/8/2009  0  0
2         124             TRUE  3/5/2009 3/15/2009  1  1
3         124            FALSE  4/5/2011  4/7/2011  0  1
4         124            FALSE 3/25/2012 3/27/2012  0  1
5         124             TRUE  5/5/2012 5/20/2012  1  2
6         124             TRUE  9/8/2013 9/15/2013  1  3
7         124            FALSE  1/5/2014 1/15/2014  0  3
8         233            FALSE  1/1/2010  1/8/2010  0  0
9         233            FALSE  1/1/2011  1/5/2011  0  0
10        233             TRUE  2/2/2011 2/25/2011  1  1
11        233            FALSE 1/25/2012 1/28/2012  0  1
12        542             TRUE  3/5/2015 3/15/2015  1  1
13       1243             TRUE  2/5/2009  2/8/2009  1  1
14       1243             TRUE  2/5/2011 2/19/2011  1  2

试试这个：

> dput(df)
structure(list(patient.ID = c(124L, 124L, 124L, 124L, 124L, 124L, 
124L, 233L, 233L, 233L, 233L, 542L, 1243L, 1243L), Index.admission. = c(FALSE, 
TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, TRUE, TRUE), adm_date = structure(c(8L, 10L, 12L, 9L, 13L, 
14L, 4L, 1L, 2L, 5L, 3L, 11L, 6L, 7L), .Label = c("1/1/2010", 
"1/1/2011", "1/25/2012", "1/5/2014", "2/2/2011", "2/5/2009", 
"2/5/2011", "2/7/2009", "3/25/2012", "3/5/2009", "3/5/2015", 
"4/5/2011", "5/5/2012", "9/8/2013"), class = "factor"), dish_date = structure(c(7L, 
8L, 11L, 10L, 12L, 13L, 1L, 4L, 3L, 6L, 2L, 9L, 7L, 5L), .Label = c("1/15/2014", 
"1/28/2012", "1/5/2011", "1/8/2010", "2/19/2011", "2/25/2011", 
"2/8/2009", "3/15/2009", "3/15/2015", "3/27/2012", "4/7/2011", 
"5/20/2012", "9/15/2013"), class = "factor"), bi = c(0, 1, 0, 
0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1)), .Names = c("patient.ID", "Index.admission.", 
"adm_date", "dish_date", "bi"), row.names = c(NA, -14L), class = "data.frame")

这给出了（我没有包括其他列）：

试试这个：

> dput(df)
structure(list(patient.ID = c(124L, 124L, 124L, 124L, 124L, 124L, 
124L, 233L, 233L, 233L, 233L, 542L, 1243L, 1243L), Index.admission. = c(FALSE, 
TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, TRUE, TRUE), adm_date = structure(c(8L, 10L, 12L, 9L, 13L, 
14L, 4L, 1L, 2L, 5L, 3L, 11L, 6L, 7L), .Label = c("1/1/2010", 
"1/1/2011", "1/25/2012", "1/5/2014", "2/2/2011", "2/5/2009", 
"2/5/2011", "2/7/2009", "3/25/2012", "3/5/2009", "3/5/2015", 
"4/5/2011", "5/5/2012", "9/8/2013"), class = "factor"), dish_date = structure(c(7L, 
8L, 11L, 10L, 12L, 13L, 1L, 4L, 3L, 6L, 2L, 9L, 7L, 5L), .Label = c("1/15/2014", 
"1/28/2012", "1/5/2011", "1/8/2010", "2/19/2011", "2/25/2011", 
"2/8/2009", "3/15/2009", "3/15/2015", "3/27/2012", "4/7/2011", 
"5/20/2012", "9/15/2013"), class = "factor"), bi = c(0, 1, 0, 
0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1)), .Names = c("patient.ID", "Index.admission.", 
"adm_date", "dish_date", "bi"), row.names = c(NA, -14L), class = "data.frame")

这给出了（我没有包括其他列）：

我没有发现一个普遍的欺骗，所以这里有一些额外的解决方案

> df
   patient.ID Index.admission bi cs
1         124           FALSE  0  0
2         124            TRUE  1  1
3         124           FALSE  0  1
4         124           FALSE  0  1
5         124            TRUE  1  2
6         124            TRUE  1  3
7         124           FALSE  0  3
8         233           FALSE  0  0
9         233           FALSE  0  0
10        233            TRUE  1  1
11        233           FALSE  0  1
12        542            TRUE  1  1
13       1243            TRUE  1  1
14       1243            TRUE  1  2

或

我没有发现一个普遍的欺骗，所以这里有一些额外的解决方案

> df
   patient.ID Index.admission bi cs
1         124           FALSE  0  0
2         124            TRUE  1  1
3         124           FALSE  0  1
4         124           FALSE  0  1
5         124            TRUE  1  2
6         124            TRUE  1  3
7         124           FALSE  0  3
8         233           FALSE  0  0
9         233           FALSE  0  0
10        233            TRUE  1  1
11        233           FALSE  0  1
12        542            TRUE  1  1
13       1243            TRUE  1  1
14       1243            TRUE  1  2

或

Try

可能与（df，ave（bi，patient.ID，FUN=cumsum））重复或df%>%groupby（patient.ID）%%>%mutate（num\u index\u ad=cumsum（bi））
如果需要dplyr
解决方案。或者查看数据。表one@David，谢谢你的帮助。我仍然不确定为什么我的解决方案不起作用。。。但是谢谢你的帮助！在子集设置bi
（似乎您将df$bi
视为有两个维度）中可能还有一些错误，但是当使用for（df中的i）时，
在每次迭代中“i”获取“df”的每一列的值；请参阅（df中的i）打印（i）
。如果需要dplyr
解决方案，请使用（df，ave（bi，patient.ID，FUN=cumsum））或df%>%group\u by（patient.ID）%%>%mutate（num\u index\u ad=cumsum（bi））
进行Try复制。或者查看数据。表one@David，谢谢你的帮助。我仍然不确定为什么我的解决方案不起作用。。。但是谢谢你的帮助！在子集设置bi
（似乎您将df$bi
视为有两个维度）中可能还有一些错误，但是当使用for（df中的i）时，
在每次迭代中“i”获取“df”的每一列的值；请参见（df中的i）打印（i）。可能有人认为我复制了你的答案/评论。有7分钟的差异，但这是我将df复制到R并生成答案所花费的时间。这已经发生在我的另一个答案中，差别不到一分钟。这并不重要。我的评论中没有这个解决方案。（+）反正是1。我认为这是一个很好的解决方案。也许有人认为我抄袭了你的答案/评论。有7分钟的差异，但这是我将df复制到R并生成答案所花费的时间。这已经发生在我的另一个答案中，差别不到一分钟。这并不重要。我的评论中没有这个解决方案。（+）反正是1。我认为这是一个很好的解决办法。
df$num_index_ad <- with(df, ave(bi, patient.ID, FUN = cumsum)) 

library(dplyr)
df %>% 
  group_by(patient.ID) %>%
  mutate(num_index_ad = cumsum(bi))

library(data.table)
setDT(df)[, num_index_ad := cumsum(bi), by = patient.ID]