在R中找出各组的75%并用中位数替换_R_Dplyr_Data.table_Plyr

在R中找出各组的75%并用中位数替换

在R中找出各组的75%并用中位数替换,r,dplyr,data.table,plyr,R,Dplyr,Data.table,Plyr,这些问题与我自己的主题相似有了这样的区别，我就知道了但是，在这个话题上注：计算在一个行动类别之前按14个零进行，但对所有零行动类别进行中位数替换，并对每组代码+项目执行也就是说，现在我使用所有的零，而不是14，并且不涉及返回的负值和零值通过组变量（操作-0，1）对于0category，我想通过返回变量找到75个百分点，如果值大于75个百分点，则必须用0category替换中位数。因此，存在code变量。必须对代码单独执行此过程。注：负值和零值我不接触 mydat=structure(

这些问题与我自己的主题相似

有了这样的区别，我就知道了

但是，在这个话题上注：计算在一个行动类别之前按14个零进行，但对所有零行动类别进行中位数替换，并对每组代码+项目执行

也就是说，现在我使用所有的零，而不是14，并且不涉及返回的负值和零值

通过组变量（操作-0，1）对于

category，我想通过返回变量找到75个百分点，如果值大于75个百分点，则必须用

category替换中位数。因此，存在

code

变量。必须对代码单独执行此过程。注：负值和零值我不接触

mydat=structure(list(code = c(123L, 123L, 123L, 123L, 123L, 123L, 123L, 
123L, 123L, 123L, 123L, 123L, 124L, 124L, 124L, 124L, 124L, 124L, 
124L, 124L, 124L, 124L, 124L, 124L), action = c(0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L), return = c(-1L, 0L, 23L, 100L, 18L, 15L, -1L, 
0L, 23L, 100L, 18L, 15L, -1L, 0L, 23L, 100L, 18L, 15L, -1L, 0L, 
23L, 100L, 18L, 15L)), .Names = c("code", "action", "return"), class = "data.frame", row.names = c(NA, 
-24L))

如何获得输出。因此，75%的人：

42,25 中位数=20,5

 add  action   return
123   0    -1
123   0    0
123   0    23
123   0    ***20,5
123   0    18
123   0    15
123   1  -1
123   1  0
123   1  23
123   1  100
123   1  18
123   1  15
124   0    -1
124   0    0
124   0    23
124   0    ***20,5
124   0    18
124   0    15
124   1  -1
124   1  0
124   1  23
124   1  100
124   1  18
124   1  15

使用最大的Uwe解决方案，我得到了错误

Error in `[.data.table`(mydat[action == 0, `:=`(output, as.double(return))],  : 
  Column(s) [action] not found in i

如何做我不接触的负值和零值，以及为什么会发生这个错误。

库（data.table）
#在操作期间之前标记零acton行
setDT（mydat）[，zero_before:=cummax（action），by=（代码）]
#计算每个动作周期前最后14行的中位数和90%分位数
agg%
as.list（）%>%
集合名称（c（“med”、“q90”））%>%
c（（0之前的0），by=（代码）]
阿格
#追加输出列
mydat[action==0，output:=as.double（return）][
#在更新非等联接中替换大于q90的输出值
agg，on=（代码，操作，返回>q90），输出：=as.double（med）][
#删除辅助列
，zero_before:=NULL]

如果我理解正确，OP希望根据返回值大于0的所有零操作行计算各组内

返回值的中位数和75%分位数。然后，零作用行中超过相应组75%分位数的任何返回值将被组中值替换
代码可以大大简化，因为我们不必区分操作行前后的零操作行
下面的代码再现了预期结果：
library(data.table)
library(magrittr)
# compute median and 90% quantile for that last 14 rows before each action period 
agg <- setDT(mydat)[action == 0 & return > 0, 
                    quantile(return, c(0.5, 0.75)) %>% 
                      as.list()  %>% 
                      set_names(c("med", "q75")), by = .(code, action)]

# append output column
mydat[, output := as.double(return)][
  # replace output values greater q75 in an update non-equi join
  agg, on = .(code, action, return > q75), output := as.double(med)]
mydat[]

library(data.table)
# mark the zero acton rows before the the action period
setDT(mydat)[, zero_before := cummax(action), by = .(code)]
# compute median and 90% quantile for that last 14 rows before each action period 
agg <- mydat[zero_before == 0, 
             quantile(tail(return), c(0.5, 0.75)) %>% 
               as.list()  %>% 
               set_names(c("med", "q90")) %>% 
               c(.(zero_before = 0)), by = .(code)]
agg


# append output column
mydat[action == 0, output := as.double(return)][
  # replace output values greater q90 in an update non-equi join
  agg, on = .(code,action, return > q90), output := as.double(med)][
    # remove helper column
    , zero_before := NULL]

library(data.table)
library(magrittr)
# compute median and 90% quantile for that last 14 rows before each action period 
agg <- setDT(mydat)[action == 0 & return > 0, 
                    quantile(return, c(0.5, 0.75)) %>% 
                      as.list()  %>% 
                      set_names(c("med", "q75")), by = .(code, action)]

# append output column
mydat[, output := as.double(return)][
  # replace output values greater q75 in an update non-equi join
  agg, on = .(code, action, return > q75), output := as.double(med)]
mydat[]

    code action return output
 1:  123      0     -1   -1.0
 2:  123      0      0    0.0
 3:  123      0     23   23.0
 4:  123      0    100   20.5
 5:  123      0     18   18.0
 6:  123      0     15   15.0
 7:  123      1     -1   -1.0
 8:  123      1      0    0.0
 9:  123      1     23   23.0
10:  123      1    100  100.0
11:  123      1     18   18.0
12:  123      1     15   15.0
13:  124      0     -1   -1.0
14:  124      0      0    0.0
15:  124      0     23   23.0
16:  124      0    100   20.5
17:  124      0     18   18.0
18:  124      0     15   15.0
19:  124      1     -1   -1.0
20:  124      1      0    0.0
21:  124      1     23   23.0
22:  124      1    100  100.0
23:  124      1     18   18.0
24:  124      1     15   15.0
    code action return output