R 新结构变量的数据表解决方案_R_Dplyr_Data.table

R 新结构变量的数据表解决方案

R 新结构变量的数据表解决方案,r,dplyr,data.table,R,Dplyr,Data.table,我有dataframe“data”sans“WANT”，这是我希望使用data.table解决方案创建的这些规则是：如果分数=1，如果分数=2，想要=1，如果分数=3，想要=2，想要=3，如果放弃=1，想要=4 如果在t=2时得分，在t+1时得分=1，这是可以的，但是如果t=3时的分数和任何后续分数的分数小于3，则将其替换和3 这意味着一系列分数：1-2-1-3-1应该是：1-2-1-3-3 data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,

我有dataframe“data”sans“WANT”，这是我希望使用data.table解决方案创建的

这些规则是：

如果分数=1，如果分数=2，想要=1，如果分数=3，想要=2，想要=3，如果放弃=1，想要=4

如果在t=2时得分，在t+1时得分=1，这是可以的，但是

如果t=3时的分数和任何后续分数的分数小于3，则将其替换和3

这意味着一系列分数：

1-2-1-3-1应该是：1-2-1-3-3

data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,4,4,4,4),
"score"=c(1,2,1,1,2,3,2,NA,3,NA,1,3,2,1),
"drop"=c(0,0,0,0,0,0,0,1,0,1,0,0,0,0),
"WANT"=c(1,2,1,1,2,3,3,4,3,4,1,3,3,3))

在根据每个“学生”的“分数”中出现的3个值创建条件后，我们可以使用

replace

    data2=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,4,4,4,4,5,5,5,5),
"score"=c(1,2,1,1,2,3,2,NA,3,NA,1,3,2,1,1,3,NA,2),
"drop"=c(0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0),
"WANT"=c(1,2,1,1,2,3,3,4,3,4,1,3,3,3,1,3,3,3))

库（dplyr）
数据%>%
分组依据（学生）%>%
变异（WANT2=替换（如果（3%分数）替换（分数，
（match（3，score）+1:n（），3）else score，is.na（score）&drop==1，4）
#一个tibble:14x5
#分组：学生[4]
#学生成绩下降，希望2
#         
# 1       1     1     0     1     1
# 2       1     2     0     2     2
# 3       1     1     0     1     1
# 4       1     1     0     1     1
# 5       2     2     0     2     2
# 6       2     3     0     3     3
# 7       2     2     0     3     3
#8 2 NA 14 4
# 9       3     3     0     3     3
#103 NA 14 4
#11       4     1     0     1     1
#12       4     3     0     3     3
#13       4     2     0     3     3
#14       4     1     0     3     3

使用

数据的选项。表

：

library(dplyr)
data %>% 
   group_by(student) %>%
   mutate(WANT2 = replace(if(3 %in% score) replace(score, 
     (match(3, score) +1):n(), 3) else score, is.na(score) & drop == 1, 4))
# A tibble: 14 x 5
# Groups:   student [4]
#   student score  drop  WANT WANT2
#     <dbl> <dbl> <dbl> <dbl> <dbl>
# 1       1     1     0     1     1
# 2       1     2     0     2     2
# 3       1     1     0     1     1
# 4       1     1     0     1     1
# 5       2     2     0     2     2
# 6       2     3     0     3     3
# 7       2     2     0     3     3
# 8       2    NA     1     4     4
# 9       3     3     0     3     3
#10       3    NA     1     4     4
#11       4     1     0     1     1
#12       4     3     0     3     3
#13       4     2     0     3     3
#14       4     1     0     3     3

库（data.table）
#如果分数=1，如果分数=2，想要=1，如果分数=3，想要=2，想要=3
setDT（数据）[，w:=分数]
#如果t=3时的分数和任何后续分数的分数小于3，则将其替换为3。
数据[数据[，.I[cummax（分数）=3L，分数<3L]，学生]$V1，w:=3L]
#它添加了学生“5”，其中有NA值，我希望用之前未缺失的NA值填充
数据[，w:=nafill（w，“locf”）]
#如果drop=1，WANT=4
数据[下降=1L，w:=4L]

这太完美了，非常感谢。还有一块丢失了，如数据2所示。我想要的是：如果分数为“NA”，那么我想用前面的值填充它，如数据2所示。你的

data2

与你的

data

有什么关系？它添加了学生“5”，其中有NA值，我希望用之前未丢失的NA值填充

library(data.table)

#if score = 1, WANT = 1 if score = 2, WANT = 2 if score = 3, WANT = 3
setDT(data)[, w := score]

#if score at t = 3 and score at any later scores are less than 3, they are replaced with 3.
data[data[, .I[cummax(score)==3L & score < 3L], student]$V1, w := 3L]

#it add student '5' which has NA values that I hope to fill with prior non-missing NA value
data[, w := nafill(w, "locf")]

#if drop = 1, WANT=4
data[drop==1L, w := 4L]