在R中分配数字并汇总滑动窗口中的计数数
我有一个在R中分配数字并汇总滑动窗口中的计数数,r,sliding-window,rollapply,R,Sliding Window,Rollapply,我有一个df,看起来像这样: df <- (c( "P", "S", "E", "G", "R", "Q", "P", "S", "P", "S", "P", "S", "P", "T", "E&quo
df
,看起来像这样:
df <- (c( "P", "S", "E", "G", "R", "Q", "P", "S", "P", "S", "P", "S", "P", "T", "E", "R", "A", "P", "A",
"S", "E", "E", "E", "F", "Q", "F", "L", "R", "C", "Q", "Q", "C",
"Q", "A", "E", "A", "K", "C", "P", "K", "L", "L", "P", "C", "L"))
df1
1 2 3 4 5
A 0.375 0.000 0.250 0.250 0.125
C 0.200 0.000 0.600 0.000 0.000
D 0.000 0.500 0.000 0.400 0.500
E 0.225 0.250 0.125 0.125 0.000
F 0.000 0.000 0.000 0.000 0.000
G 0.000 0.400 0.250 0.000 0.125
H 0.500 0.000 0.300 0.020 0.000
I 0.000 0.000 0.000 0.000 0.300
K 0.000 0.280 0.000 0.125 0.000
L 0.000 0.000 0.125 0.125 0.125
M 0.600 0.700 0.000 0.030 0.000
N 0.000 0.000 0.030 0.000 0.500
P 0.000 0.000 0.000 0.125 0.125
Q 0.400 0.165 0.125 0.000 0.250
R 0.030 0.000 0.125 0.500 0.125
S 0.350 0.450 0.400 0.000 0.125
T 0.000 0.000 0.000 0.125 0.000
V 0.625 0.125 0.400 0.525 0.100
W 0.400 0.300 0.000 0.000 0.000
Y 0.125 0.000 0.000 0.000 0.000
NIL NA NA NA NA NA
dput(df1)
structure(c(0.375, 0.200, 0, 0.225, 0, 0, 0.5, 0, 0, 0, 0.6, 0, 0, 0.4,
0.03, 0.35, 0, 0.625, 0.4, 0.125, NA, 0, 0, 0.5, 0.25, 0, 0.4, 0, 0, 0.28,
0, 0.7, 0, 0, 0.165, 0, 0.45, 0, 0.125, 0.3, 0, NA, 0.25, 0.6, 0, 0.125,
0, 0.25, 0.3, 0, 0, 0.125, 0, 0.03, 0, 0.125, 0.125, 0.4, 0, 0.4, 0, 0,
NA, 0.25, 0, 0.4, 0.125, 0, 0, 0.02, 0, 0.125, 0.125, 0.03, 0, 0.125,
0, 0.5, 0, 0.125, 0.125, 0, 0, NA, 0.125, 0, 0.5, 0, 0, 0.125, 0,
0.3, 0, 0.125, 0, 0.5, 0.125, 0.25, 0.125, 0.125, 0, 0.1, 0, 0, NA), .Dim = c(21L, 5L), .Dimnames = list(
c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M",
"N", "P", "Q", "R", "S", "T", "V", "W", "Y", "NIL"), c("1",
"2", "3", "4", "5")))
我想将df1
中的数字分配给df
。df1
的列号(共5个)指的是字母位置。我想创建一个5的滑动窗口来分配df1
中的数字,然后对结果求和并遍历整个df
例如:
first 5 letters of `df`: PSEGR
assign numbers from `df1`: 0+0.45+0.125+0+0.125
summary of the first 5 numbers: 0.7
the next step:
letters from df: SEGRQ
assign numbers from `df1`:0.35+0.25+0.25+0.5+0.25
summary: 1.6 etc.
我尝试了以下代码:
sliding_window_df <- rollapply(df, function(x) df1[cbind(match(x, rownames(df1)), 1:ncol(df1))],k=5, align="left", sum)
您是否建议使用与rollapply不同的更合适的功能?请尝试使用
sapply
此处:
n <- 1:ncol(df1)
sapply(seq_along(df), function(x)
sum(df1[cbind(match(df[x:(x+4)], rownames(df1)),n)], na.rm = TRUE))
# [1] 0.700 1.600 0.875 0.375 0.320 1.050 0.575 1.000 0.575 0.875
#[11] 0.575 0.600 0.750 0.750 0.725 0.405 0.625 0.525 1.075 0.850
#[21] 0.850 0.475 0.475 0.415 1.025 0.375 0.850 0.155 0.740 1.290
#[31] 0.775 0.865 0.775 1.000 0.350 1.380 0.250 0.450 0.655 0.250
#[41] 0.125 0.725 0.125 0.200 0.000
n
n <- 1:ncol(df1)
sapply(seq_along(df), function(x)
sum(df1[cbind(match(df[x:(x+4)], rownames(df1)),n)], na.rm = TRUE))
# [1] 0.700 1.600 0.875 0.375 0.320 1.050 0.575 1.000 0.575 0.875
#[11] 0.575 0.600 0.750 0.750 0.725 0.405 0.625 0.525 1.075 0.850
#[21] 0.850 0.475 0.475 0.415 1.025 0.375 0.850 0.155 0.740 1.290
#[31] 0.775 0.865 0.775 1.000 0.350 1.380 0.250 0.450 0.655 0.250
#[41] 0.125 0.725 0.125 0.200 0.000