Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/77.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 查找包含多个组的最后一行_R_Dplyr - Fatal编程技术网

R 查找包含多个组的最后一行

R 查找包含多个组的最后一行,r,dplyr,R,Dplyr,在上面的代码片段中,我想从其他列创建列valueid是一个对话,msgId是每个对话中的消息 | id | msgid | source | value | |----|-------|--------|-------| | 1 | 1 | B | 0 | | 1 | 2 | A | 1 | | 1 | 3 | B | 0 | | 2 | 1 | B | 0 | | 2 | 2

在上面的代码片段中,我想从其他列创建列
value
id
是一个对话,
msgId
是每个对话中的消息

| id | msgid | source | value |
|----|-------|--------|-------|
| 1  | 1     | B      | 0     |
| 1  | 2     | A      | 1     |
| 1  | 3     | B      | 0     |
| 2  | 1     | B      | 0     |
| 2  | 2     | A      | 0     |
| 2  | 3     | A      | 1     |
| 2  | 4     | B      | 0     |
我希望识别来自
source=A
的最后一条消息的行号

我试图解决它。然而,我只能识别对话中的最后一行

| id | msgid | source | value |
|----|-------|--------|-------|
| 1  | 1     | B      | 0     |
| 1  | 2     | A      | 1     |
| 1  | 3     | B      | 0     |
| 2  | 1     | B      | 0     |
| 2  | 2     | A      | 0     |
| 2  | 3     | A      | 1     |
| 2  | 4     | B      | 0     |
last_值%group_by(id)%%>%
切片(哪个.max(msgid))%>%
解组%>%
变异(值=cumsum(msgid))

dat$final_val我想出了以下解决方案

last_values <- dat %>% group_by(id) %>% 
   slice(which.max(msgid)) %>%
   ungroup %>%
   mutate(value = cumsum(msgid))

dat$final_val <- 0    
dat[last_values$value,5] <- 1
库(tidyverse)
#首先,我们创建dataframe,因为问题中没有提供它
df%
#按id和源进行分组
分组依据(id,来源)%>%
变异(
#创建一个新列
value=max(msgid)==msgid&source==A,
#将新列转换为整数
value=as.integer(值)
)
输出:

library(tidyverse)

# first we create the dataframe as it wasn't supplied in the question
df <- tibble(
  id = c(1, 1, 1, 2, 2, 2, 2),
  msgid = c(1, 2, 3, 1, 2, 3, 4),
  source = c("B", "A", "B", "B", "A", "A", "B")
)

df %>% 
  # group by both id and source
  group_by(id, source) %>% 
  mutate(
    # create a new column 
    value = max(msgid) == msgid & source == "A",
    # convert the new column to integers
    value = as.integer(value)
  )
#一个tible:7 x 4
#组:id,来源[4]
id msgid源值
1 B 0
2 1 2 A 1
3113B0
4 2 1 B 0
52A20
6 2 3 A 1
724B0

我们可以通过

# A tibble: 7 x 4
# Groups:   id, source [4]
     id msgid source value
  <dbl> <dbl>  <chr> <int>
1     1     1      B     0
2     1     2      A     1
3     1     3      B     0
4     2     1      B     0
5     2     2      A     0
6     2     3      A     1
7     2     4      B     0
dat%>%
分组依据(id)%>%
mutate(value1=as.integer(source==“A”&!duplicated(source==“A”,fromLast=TRUE)))
#一个tibble:7x5
#组别:id[2]
#id msgid源值值1
#        
#111B00
#211211
#3113B00
#42B1B0
#52A2A0
#623a11
#724B0

另一个dplyr解决方案:

dat %>% 
  group_by(id) %>% 
  mutate(value1 = as.integer(source == "A" & !duplicated(source == "A", fromLast = TRUE)))
# A tibble: 7 x 5
# Groups: id [2]
#     id msgid source value value1
#  <int> <int> <chr>  <int>  <int>
#1     1     1 B          0      0
#2     1     2 A          1      1
#3     1     3 B          0      0
#4     2     1 B          0      0
#5     2     2 A          0      0
#6     2     3 A          1      1
#7     2     4 B          0      0
库(dplyr)
#创建数据
df%#按id和来源分组
mutate(value=as.integer(ifelse((row_number()==n())&source==“A”,1,0)))#如果是组的最后一次出现且源为“A”,则写入1
>df
#一个tibble:7x4
#组:id,来源[4]
id msgid源值
1 B 0
2 1 2 A 1
3113B0
4 2 1 B 0
52A20
6 2 3 A 1
724B0

我使用索引标记查找
A
的最终位置,并检查该数字是否与行号匹配,以便将
1
分配给

library(dplyr)

# create data
df <- data.frame(
  id = c(1, 1, 1, 2, 2, 2, 2),
  msgid = c(1, 2, 3, 1, 2, 3, 4),
  source = c("B", "A", "B", "B", "A", "A", "B")
)

df <- df %>% 
  group_by(id, source) %>% # group by id and source
  mutate(value = as.integer(ifelse((row_number() == n()) & source == "A", 1, 0))) # write 1 if it's the last occurence of a group and the source is "A"

> df
# A tibble: 7 x 4
# Groups:   id, source [4]
     id msgid source value
  <dbl> <dbl> <fctr> <dbl>
1     1     1      B     0
2     1     2      A     1
3     1     3      B     0
4     2     1      B     0
5     2     2      A     0
6     2     3      A     1
7     2     4      B     0
库(dplyr)
多年筹资框架%
mutate(value=if_else(last)(grep(source,pattern=“A”))==row_number(),
1, 0)
id msgid源值
1.00 1.00 B 0
2 1.00 2.00 A 1.00
3 1.00 3.00 B 0
4 2.00 1.00 B 0
5 2.00 2.00 A 0
6 2.00 3.00 A 1.00
7.2.00 4.00 B 0