在R中,如何根据相邻列中包含要匹配的值的间隔在一列中查找值?
在R中,我有一个包含三列的引用表(dataframe)。以下是一个例子:在R中,如何根据相邻列中包含要匹配的值的间隔在一列中查找值?,r,range,lookup,intervals,matching,R,Range,Lookup,Intervals,Matching,在R中,我有一个包含三列的引用表(dataframe)。以下是一个例子: reftable <- data.frame( X_lower = c(0, 101, 181, 231, 280, 300, 340, 390, 500), X_upper = c(100, 180, 230, 279, 299, 339, 389, 499, 600), Percentile = c(2, 3, 4, 6, 8, 11, 15, 20, 25)) # X_lower X_
reftable <- data.frame(
X_lower = c(0, 101, 181, 231, 280, 300, 340, 390, 500),
X_upper = c(100, 180, 230, 279, 299, 339, 389, 499, 600),
Percentile = c(2, 3, 4, 6, 8, 11, 15, 20, 25))
# X_lower X_upper Percentile
# 0 100 2
# 101 180 3
# 181 230 4
# etc.
我尝试使用match或findInterval,但找不到解决方案。我已经搜索了现有的问题。如果以前有人问过这个问题,我肯定没有找到正确的搜索词。您可以尝试:
scores$PercRank=sapply(scores$X,function(x){
i = which(reftable$X_upper>x)[1]
reftable$Percentile[i]
})
> scores
X PercRank
1 58 2
2 127 3
3 175 3
4 245 6
5 300 11
6 90 2
7 405 20
8 284 8
9 330 11
由于reftable
是有序的,因此只需检查大于X
1)sqldf的第一个上限值即可使用SQL左联接:
library(sqldf)
scores$PercRank <- NULL
sqldf("select s.X, r.Percentile as PercRank
from scores as s
left join reftable as r on s.X between r.X_lower and r.X_upper")
2)findInterval基本备选方案是findInterval
:
transform(scores, percRank = with(reftable, Percentile[ findInterval(X, X_lower) ]))
1)一个选项将是数据表中的foverlaps
library(data.table)
scores$PercRank <- foverlaps(scores1, reftable)[order(rn)]$Percentile
scores$rn <- NULL
scores
# X PercRank
#1 58 2
#2 127 3
#3 175 3
#4 245 6
#5 300 11
#6 90 2
#7 405 20
#8 284 8
#9 330 11
数据
scores Tryfoverlaps(setDT(scores1),setDT(reftable),type=“within”)$Percentile#[1]28231162011
transform(scores, percRank = with(reftable, Percentile[ findInterval(X, X_lower) ]))
library(data.table)
scores$PercRank <- foverlaps(scores1, reftable)[order(rn)]$Percentile
scores$rn <- NULL
scores
# X PercRank
#1 58 2
#2 127 3
#3 175 3
#4 245 6
#5 300 11
#6 90 2
#7 405 20
#8 284 8
#9 330 11
setDT(scores)[reftable, PercRank := Percentile, on = .(X >= X_lower, X <= X_upper)]
scores
# X PercRank
#1: 58 2
#2: 127 3
#3: 175 3
#4: 245 6
#5: 300 11
#6: 90 2
#7: 405 20
#8: 284 8
#9: 330 11
library(fuzzyjoin)
library(dplyr)
fuzzy_left_join(scores, reftable, by = c("X" = "X_lower", "X" = "X_upper"),
match_fun = list(`>=`, `<=`)) %>%
select(X, Percentile)
# X Percentile
#1 58 2
#2 127 3
#3 175 3
#4 245 6
#5 300 11
#6 90 2
#7 405 20
#8 284 8
#9 330 11
scores <- data.frame(
X = c(58, 127, 175, 245, 300, 90, 405, 284, 330))
scores$rn <- seq_len(nrow(scores))
scores1 <- data.table(X_lower = scores$X, X_upper = scores$X, rn = scores$rn)
setkeyv(scores1, c("X_lower", "X_upper"))
setkeyv(reftable, c("X_lower", "X_upper"))