R 优化代码以根据data.table的条件更新组中的以下行

R 优化代码以根据data.table的条件更新组中的以下行,r,data.table,R,Data.table,以下是一些数据,以澄清所需的输入/输出 dt = structure(list(ticker = c("FTNT", "FTNT", "FTNT", "FTNT", "FTNT", "FTNT", "MDB", "MDB", "MDB", "MDB", "MDB", "

以下是一些数据,以澄清所需的输入/输出

dt = structure(list(ticker = c("FTNT", "FTNT", "FTNT", "FTNT", "FTNT", "FTNT", "MDB", "MDB", "MDB", "MDB", "MDB", "MDB", "ZS", "ZS", "ZS", "ZS", "ZS", "ZS"), 
                    date = structure(c(18631, 18632, 18633, 18634, 18635, 18638, 18631, 18632, 18633, 18634, 18635, 18638, 18631, 18632, 18633, 18634, 18635, 18638), tzone = "UTC", tclass = "Date", class = "Date"), 
                    R = c(-0.0199959672793103, -0.0262434257521769, -0.025610299646878,  0.0372167038069893, 0.0341361186736473, 0.000202504389108293, -0.0240363323965936, 0.0146970518570011, -0.0569805661884384,  0.0488220267994761, 0.0232887982973971, 0.00444619037040206,  -0.0181263375550329, -0.0133102149688453, -0.0468265360104722,  0.0467953860405097, 0.022377580589833, 0.00673862306172723),
                    R_acum = c(-0.0199959672793103, -0.0457146303488496, -0.0701541646142473,  -0.0355483675725332, -0.00262573219299644, -0.00242375952618179, -0.0240363323965936, -0.00969254376327744, -0.066120823320278,  -0.0205269491289479, 0.00228380119052418, 0.00674014577578741, -0.0181263375550329, -0.0311952870744229, -0.0765610558513475, -0.0333483739750707, -0.0117170493114047, -0.00505738302838277
                    )), row.names = c(NA, -18L), class = c("data.table", "data.frame"))
数据样本:

目标: 对于每个股票代码,我想将以下行的
R
变量的值设置为零,前提是
R\u acum
满足逻辑条件。 在这种情况下,如果
R_acum
低于-0.03,我希望将以下行的
R
值设置为零,条件满足时除外

这是本例所需的输出:

这段代码生成所需的输出,但我确信有更好/更快的方法来实现它

threshold = -0.03
ls_dt = lapply(split(dt, dt$ticker), function(d){
  idx = d[, ifelse(R_acum < threshold, .I, 0)]
  idx = setdiff(idx, 0)
  if(length(idx > 0)){
    min_idx = min(idx)
    d[, idx:=ifelse(.I > min_idx, 0, 1), by=ticker]
    d[, R:=R * idx]
    d[, idx:=NULL]
  }
  d
})
ls_dt
threshold=-0.03
ls_dt=lappy(拆分(dt,dt$ticker),函数(d){
idx=d[,ifelse(R_acum<阈值,.I,0)]
idx=setdiff(idx,0)
if(长度(idx>0)){
min_idx=min(idx)
d[,idx:=ifelse(.I>min_idx,0,1),by=ticker]
d[,R:=R*idx]
d[,idx:=NULL]
}
D
})
ls_dt
您可以使用

dt[, flag := rleid(R_acum < -0.03), by=ticker]               
dt[flag>1, R := c(R[1], rep(0, .N-1)), by=ticker]
dt[,flag:=rleid(R_acum<-0.03),by=ticker]
dt[flag>1,R:=c(R[1],rep(0,.N-1)),by=ticker]
您可以使用

dt[, flag := rleid(R_acum < -0.03), by=ticker]               
dt[flag>1, R := c(R[1], rep(0, .N-1)), by=ticker]
dt[,flag:=rleid(R_acum<-0.03),by=ticker]
dt[flag>1,R:=c(R[1],rep(0,.N-1)),by=ticker]
data.table 我不知道你对better是什么意思,但我建议

>阈值row.idxs df[row.idxs,R:=0]
>df
股票日期
1:FTNT 2021-01-04-0.01999597-0.019995967
2:FTNT 2021-01-05-0.02624343-0.045714630
3:FTNT 2021-01-06 0.00000000-0.070154165
4:FTNT 2021-01-07 0.00000000-0.035548368
5:FTNT 2021-01-08 0.00000000-0.002625732
6:FTNT 2021-01-11 0.00000000-0.00243760
7:MDB 2021-01-04-0.02403633-0.024036332
8:MDB 2021-01-05 0.014699705-0.009692544
9:MDB 2021-01-06-0.05698057-0.066120823
10:MDB 2021-01-07 0.00000000-0.020526949
11:MDB 2021-01-08 0.00000000 0.002283801
12:MDB 2021-01-11 0.00000000 0.006740146
13:ZS 2021-01-04-0.01812634-0.018126338
14:ZS 2021-01-05-0.01331021-0.031195287
15:ZS 2021-01-06 0.00000000-0.076561056
16:ZS 2021-01-07 0.00000000-0.033348374
17:ZS 2021-01-08 0.00000000-0.011717049
18:ZS 2021-01-11 0.00000000-0.005057383
>dput(df)
结构(列表)(ticker=c(“FTNT”、“FTNT”、“FTNT”、“FTNT”、“FTNT”),
“FTNT”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“ZS”、“ZS”,
“ZS”,“ZS”,“ZS”,“ZS”,日期=结构(c(186311863218633,
18634, 18635, 18638, 18631, 18632, 18633, 18634, 18635, 18638, 
186311863218633186341863518638),tzone=“UTC”,tclass=“Date”,class=“Date”),
R=c(-0.0199959672793103,-0.0262434257521769,0,0,0,
0, -0.0240363323965936, 0.0146970518570011, -0.0569805661884384, 
0, 0, 0, -0.0181263375550329, -0.0133102149688453, 0, 0, 
0,0),R_acum=c(-0.0199959672793103,-0.0457146303488496,
-0.0701541646142473, -0.0355483675725332, -0.00262573219299644, 
-0.00242375952618179, -0.0240363323965936, -0.00969254376327744, 
-0.066120823320278, -0.0205269491289479, 0.00228380119052418, 
0.00674014577578741, -0.0181263375550329, -0.0311952870744229, 
-0.0765610558513475, -0.0333483739750707, -0.0117170493114047, 
-0.00505738302838277),row.names=c(NA,-18L),class=c(“data.table”,
“data.frame”))
这样,您的问题与和相关

基尔 这是一个只使用
基本R
的版本

df data.table
我不知道你对better是什么意思,但我建议

>阈值row.idxs df[row.idxs,R:=0]
>df
股票日期
1:FTNT 2021-01-04-0.01999597-0.019995967
2:FTNT 2021-01-05-0.02624343-0.045714630
3:FTNT 2021-01-06 0.00000000-0.070154165
4:FTNT 2021-01-07 0.00000000-0.035548368
5:FTNT 2021-01-08 0.00000000-0.002625732
6:FTNT 2021-01-11 0.00000000-0.00243760
7:MDB 2021-01-04-0.02403633-0.024036332
8:MDB 2021-01-05 0.014699705-0.009692544
9:MDB 2021-01-06-0.05698057-0.066120823
10:MDB 2021-01-07 0.00000000-0.020526949
11:MDB 2021-01-08 0.00000000 0.002283801
12:MDB 2021-01-11 0.00000000 0.006740146
13:ZS 2021-01-04-0.01812634-0.018126338
14:ZS 2021-01-05-0.01331021-0.031195287
15:ZS 2021-01-06 0.00000000-0.076561056
16:ZS 2021-01-07 0.00000000-0.033348374
17:ZS 2021-01-08 0.00000000-0.011717049
18:ZS 2021-01-11 0.00000000-0.005057383
>dput(df)
结构(列表)(ticker=c(“FTNT”、“FTNT”、“FTNT”、“FTNT”、“FTNT”),
“FTNT”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“MDB”、“ZS”、“ZS”,
“ZS”,“ZS”,“ZS”,“ZS”,日期=结构(c(186311863218633,
18634, 18635, 18638, 18631, 18632, 18633, 18634, 18635, 18638, 
186311863218633186341863518638),tzone=“UTC”,tclass=“Date”,class=“Date”),
R=c(-0.0199959672793103,-0.0262434257521769,0,0,0,
0, -0.0240363323965936, 0.0146970518570011, -0.0569805661884384, 
0, 0, 0, -0.0181263375550329, -0.0133102149688453, 0, 0, 
0,0),R_acum=c(-0.0199959672793103,-0.0457146303488496,
-0.0701541646142473, -0.0355483675725332, -0.00262573219299644, 
-0.00242375952618179, -0.0240363323965936, -0.00969254376327744, 
-0.066120823320278, -0.0205269491289479, 0.00228380119052418, 
0.00674014577578741, -0.0181263375550329, -0.0311952870744229, 
-0.0765610558513475, -0.0333483739750707, -0.0117170493114047, 
-0.00505738302838277),row.names=c(NA,-18L),class=c(“data.table”,
“data.frame”))
这样,您的问题与和相关

基尔 这是一个只使用
基本R
的版本


dfA
数据。使用
shift
+
cumsum

dt[, R := R * (cumsum(shift(R_acum < -0.03, fill = FALSE)) == 0), ticker]

使用
shift
+
cumsum

dt[, R := R * (cumsum(shift(R_acum < -0.03, fill = FALSE)) == 0), ticker]

谢谢这是一个巧妙的把戏!谢谢这是一个巧妙的把戏!这