R 查找包含多个组的最后一行
在上面的代码片段中,我想从其他列创建列R 查找包含多个组的最后一行,r,dplyr,R,Dplyr,在上面的代码片段中,我想从其他列创建列valueid是一个对话,msgId是每个对话中的消息 | id | msgid | source | value | |----|-------|--------|-------| | 1 | 1 | B | 0 | | 1 | 2 | A | 1 | | 1 | 3 | B | 0 | | 2 | 1 | B | 0 | | 2 | 2
value
id
是一个对话,msgId
是每个对话中的消息
| id | msgid | source | value |
|----|-------|--------|-------|
| 1 | 1 | B | 0 |
| 1 | 2 | A | 1 |
| 1 | 3 | B | 0 |
| 2 | 1 | B | 0 |
| 2 | 2 | A | 0 |
| 2 | 3 | A | 1 |
| 2 | 4 | B | 0 |
我希望识别来自source=A
的最后一条消息的行号
我试图解决它。然而,我只能识别对话中的最后一行
| id | msgid | source | value |
|----|-------|--------|-------|
| 1 | 1 | B | 0 |
| 1 | 2 | A | 1 |
| 1 | 3 | B | 0 |
| 2 | 1 | B | 0 |
| 2 | 2 | A | 0 |
| 2 | 3 | A | 1 |
| 2 | 4 | B | 0 |
last_值%group_by(id)%%>%
切片(哪个.max(msgid))%>%
解组%>%
变异(值=cumsum(msgid))
dat$final_val我想出了以下解决方案
last_values <- dat %>% group_by(id) %>%
slice(which.max(msgid)) %>%
ungroup %>%
mutate(value = cumsum(msgid))
dat$final_val <- 0
dat[last_values$value,5] <- 1
库(tidyverse)
#首先,我们创建dataframe,因为问题中没有提供它
df%
#按id和源进行分组
分组依据(id,来源)%>%
变异(
#创建一个新列
value=max(msgid)==msgid&source==A,
#将新列转换为整数
value=as.integer(值)
)
输出:
library(tidyverse)
# first we create the dataframe as it wasn't supplied in the question
df <- tibble(
id = c(1, 1, 1, 2, 2, 2, 2),
msgid = c(1, 2, 3, 1, 2, 3, 4),
source = c("B", "A", "B", "B", "A", "A", "B")
)
df %>%
# group by both id and source
group_by(id, source) %>%
mutate(
# create a new column
value = max(msgid) == msgid & source == "A",
# convert the new column to integers
value = as.integer(value)
)
#一个tible:7 x 4
#组:id,来源[4]
id msgid源值
1 B 0
2 1 2 A 1
3113B0
4 2 1 B 0
52A20
6 2 3 A 1
724B0
我们可以通过
# A tibble: 7 x 4
# Groups: id, source [4]
id msgid source value
<dbl> <dbl> <chr> <int>
1 1 1 B 0
2 1 2 A 1
3 1 3 B 0
4 2 1 B 0
5 2 2 A 0
6 2 3 A 1
7 2 4 B 0
dat%>%
分组依据(id)%>%
mutate(value1=as.integer(source==“A”&!duplicated(source==“A”,fromLast=TRUE)))
#一个tibble:7x5
#组别:id[2]
#id msgid源值值1
#
#111B00
#211211
#3113B00
#42B1B0
#52A2A0
#623a11
#724B0
另一个dplyr解决方案:
dat %>%
group_by(id) %>%
mutate(value1 = as.integer(source == "A" & !duplicated(source == "A", fromLast = TRUE)))
# A tibble: 7 x 5
# Groups: id [2]
# id msgid source value value1
# <int> <int> <chr> <int> <int>
#1 1 1 B 0 0
#2 1 2 A 1 1
#3 1 3 B 0 0
#4 2 1 B 0 0
#5 2 2 A 0 0
#6 2 3 A 1 1
#7 2 4 B 0 0
库(dplyr)
#创建数据
df%#按id和来源分组
mutate(value=as.integer(ifelse((row_number()==n())&source==“A”,1,0)))#如果是组的最后一次出现且源为“A”,则写入1
>df
#一个tibble:7x4
#组:id,来源[4]
id msgid源值
1 B 0
2 1 2 A 1
3113B0
4 2 1 B 0
52A20
6 2 3 A 1
724B0
我使用索引标记查找A
的最终位置,并检查该数字是否与行号匹配,以便将1
分配给值
library(dplyr)
# create data
df <- data.frame(
id = c(1, 1, 1, 2, 2, 2, 2),
msgid = c(1, 2, 3, 1, 2, 3, 4),
source = c("B", "A", "B", "B", "A", "A", "B")
)
df <- df %>%
group_by(id, source) %>% # group by id and source
mutate(value = as.integer(ifelse((row_number() == n()) & source == "A", 1, 0))) # write 1 if it's the last occurence of a group and the source is "A"
> df
# A tibble: 7 x 4
# Groups: id, source [4]
id msgid source value
<dbl> <dbl> <fctr> <dbl>
1 1 1 B 0
2 1 2 A 1
3 1 3 B 0
4 2 1 B 0
5 2 2 A 0
6 2 3 A 1
7 2 4 B 0
库(dplyr)
多年筹资框架%
mutate(value=if_else(last)(grep(source,pattern=“A”))==row_number(),
1, 0)
id msgid源值
1.00 1.00 B 0
2 1.00 2.00 A 1.00
3 1.00 3.00 B 0
4 2.00 1.00 B 0
5 2.00 2.00 A 0
6 2.00 3.00 A 1.00
7.2.00 4.00 B 0