r-检索不同的序列模式

r-检索不同的序列模式,r,distinct,sequence,R,Distinct,Sequence,我想检索序列的不同状态,去掉重复,但仍然保持顺序。让我举例说明 我的两个序列如下所示: library(reshape2) library(dplyr) dt %>% melt(id.vars = 'id') %>% arrange(id) id variable value 1 1 t1 Domestic 2 1 t2 Domestic 3 1 t3 Domes

我想检索序列的不同状态,去掉重复,但仍然保持顺序。让我举例说明

我的两个序列如下所示:

library(reshape2) 
library(dplyr)    

dt %>% melt(id.vars = 'id') %>% arrange(id)

   id variable          value
1   1       t1       Domestic
2   1       t2       Domestic
3   1       t3       Domestic
4   1       t4       Domestic
5   1       t5       Domestic
6   1       t6            Eat
7   1       t7            Eat
8   1       t8            Eat
9   1       t9            Eat
10  1      t10            Eat
11  1      t11       Domestic

12  2       t1 SocialContacts
13  2       t2         Travel
14  2       t3         Travel
15  2       t4       Domestic
16  2       t5         Travel
17  2       t6         Travel
18  2       t7       Domestic
19  2       t8       Domestic
20  2       t9       Domestic
21  2      t10         Travel
22  2      t11         Travel
我想要的是这个(想要的输出

到目前为止,我只做到了这一点:

dt %>% melt(id.vars = 'id') %>% group_by(id, value) %>% arrange(id) %>% distinct()

     id variable          value
   (int)   (fctr)          (chr)
1     1       t1       Domestic
2     1       t6            Eat
3     2       t4       Domestic
4     2       t1 SocialContacts
5     2       t2         Travel
有什么想法吗

dt = structure(list(t1 = c("Domestic", "SocialContacts"), t2 = c("Domestic", 
"Travel"), t3 = c("Domestic", "Travel"), t4 = c("Domestic", "Domestic"
), t5 = c("Domestic", "Travel"), t6 = c("Eat", "Travel"), t7 = c("Eat", 
"Domestic"), t8 = c("Eat", "Domestic"), t9 = c("Eat", "Domestic"
), t10 = c("Eat", "Travel"), t11 = c("Domestic", "Travel"), id = 1:2), .Names= c("t1", 
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11", 
"id"), row.names = 1:2, class = "data.frame")

使用
数据中的
rleid
。表

library(data.table)
library(dplyr)
dt %>% melt(id.vars = 'id') %>% arrange(id) %>% group_by(id, rleid = rleid(value)) %>% 
       summarise(value = unique(value)) %>% select(-rleid)

#    id          value
# 1   1       Domestic
# 6   1            Eat
# 11  1       Domestic
# 12  2 SocialContacts
# 13  2         Travel
# 15  2       Domestic
# 16  2         Travel
# 18  2       Domestic
# 21  2         Travel
library(data.table)
unique(melt(setDT(dt), id.vars = 'id')[order(id)]
  [, .(value), .(id, rleid(value))])[, -'rleid', with=F]

#    id          value
# 1:  1       Domestic
# 2:  1            Eat
# 3:  1       Domestic
# 4:  2 SocialContacts
# 5:  2         Travel
# 6:  2       Domestic
# 7:  2         Travel
# 8:  2       Domestic
# 9:  2         Travel
使用
数据的类似方法。表

library(data.table)
library(dplyr)
dt %>% melt(id.vars = 'id') %>% arrange(id) %>% group_by(id, rleid = rleid(value)) %>% 
       summarise(value = unique(value)) %>% select(-rleid)

#    id          value
# 1   1       Domestic
# 6   1            Eat
# 11  1       Domestic
# 12  2 SocialContacts
# 13  2         Travel
# 15  2       Domestic
# 16  2         Travel
# 18  2       Domestic
# 21  2         Travel
library(data.table)
unique(melt(setDT(dt), id.vars = 'id')[order(id)]
  [, .(value), .(id, rleid(value))])[, -'rleid', with=F]

#    id          value
# 1:  1       Domestic
# 2:  1            Eat
# 3:  1       Domestic
# 4:  2 SocialContacts
# 5:  2         Travel
# 6:  2       Domestic
# 7:  2         Travel
# 8:  2       Domestic
# 9:  2         Travel

下面是一个
数据表
解决方案:

library(data.table)    
setDT(dt)
# get secondary id with rleid
dt[, id2:=rleid(value)]
# subset to first rows in secondary id
dt[dt[, .I[1L], by="id2"]$V1,][, id2 := NULL][]
打印出来的

   id variable          value
1:  1       t1       Domestic
2:  1       t6            Eat
3:  1      t11       Domestic
4:  2       t1 SocialContacts
5:  2       t2         Travel
6:  2       t4       Domestic
7:  2       t5         Travel
8:  2       t7       Domestic
9:  2      t10         Travel
数据

dt <- read.table(header=T, text="   id variable          value
1   1       t1       Domestic
2   1       t2       Domestic
3   1       t3       Domestic
4   1       t4       Domestic
5   1       t5       Domestic
6   1       t6            Eat
7   1       t7            Eat
8   1       t8            Eat
9   1       t9            Eat
10  1      t10            Eat
11  1      t11       Domestic
12  2       t1 SocialContacts
13  2       t2         Travel
14  2       t3         Travel
15  2       t4       Domestic
16  2       t5         Travel
17  2       t6         Travel
18  2       t7       Domestic
19  2       t8       Domestic
20  2       t9       Domestic
21  2      t10         Travel
22  2      t11         Travel")

dtA
dplyr
替代@Psidom的答案:

input <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), variable = c("t1", 
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11", 
"t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", 
"t11"), value = c("Domestic", "Domestic", "Domestic", "Domestic", 
"Domestic", "Eat", "Eat", "Eat", "Eat", "Eat", "Domestic", "SocialContacts", 
"Travel", "Travel", "Domestic", "Travel", "Travel", "Domestic", 
"Domestic", "Domestic", "Travel", "Travel")), .Names = c("id", 
"variable", "value"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22"))
input%
突变(grp=cumsum(值!=lag(值,默认值=value[1]))%>%
分组依据(grp)%>%
切片(1)
#来源:本地数据帧[9 x 4]
#组别:grp[9]
#id变量值grp
#                  
#1 1 t1国内0
#2 1 t6吃1
#3 1 t11国内2
#4 2 t1社会关系3
#5 2 t2行程4
#6 2 t4国内税5
#7 2 t5旅行6
#8 2 t7国内7
#9 2 t10旅行8

这些答案中是否有一个能满足您的需要?请勾选您接受的那个。谢谢