在R中,如何根据相邻列中包含要匹配的值的间隔在一列中查找值?

在R中,如何根据相邻列中包含要匹配的值的间隔在一列中查找值?,r,range,lookup,intervals,matching,R,Range,Lookup,Intervals,Matching,在R中,我有一个包含三列的引用表(dataframe)。以下是一个例子: reftable <- data.frame( X_lower = c(0, 101, 181, 231, 280, 300, 340, 390, 500), X_upper = c(100, 180, 230, 279, 299, 339, 389, 499, 600), Percentile = c(2, 3, 4, 6, 8, 11, 15, 20, 25)) # X_lower X_

在R中,我有一个包含三列的引用表(dataframe)。以下是一个例子:

reftable <- data.frame(
  X_lower = c(0, 101, 181, 231, 280, 300, 340, 390, 500), 
  X_upper = c(100, 180, 230, 279, 299, 339, 389, 499, 600), 
  Percentile = c(2, 3, 4, 6, 8, 11, 15, 20, 25))

# X_lower     X_upper     Percentile
# 0           100         2
# 101         180         3
# 181         230         4
# etc.
我尝试使用match或findInterval,但找不到解决方案。我已经搜索了现有的问题。如果以前有人问过这个问题,我肯定没有找到正确的搜索词。

您可以尝试:

scores$PercRank=sapply(scores$X,function(x){
  i = which(reftable$X_upper>x)[1]
  reftable$Percentile[i]
})

> scores
    X PercRank
1  58        2
2 127        3
3 175        3
4 245        6
5 300       11
6  90        2
7 405       20
8 284        8
9 330       11
由于
reftable
是有序的,因此只需检查大于
X

1)sqldf的第一个上限值即可使用SQL左联接:

library(sqldf)

scores$PercRank <- NULL

sqldf("select s.X, r.Percentile as PercRank
  from scores as s
  left join reftable as r on s.X between r.X_lower and r.X_upper")
2)findInterval基本备选方案是
findInterval

transform(scores, percRank = with(reftable, Percentile[ findInterval(X, X_lower) ]))
1)一个选项将是
数据表中的
foverlaps

library(data.table)
scores$PercRank <- foverlaps(scores1, reftable)[order(rn)]$Percentile
scores$rn <- NULL
scores
#     X PercRank
#1  58        2
#2 127        3
#3 175        3
#4 245        6
#5 300       11
#6  90        2
#7 405       20
#8 284        8
#9 330       11
数据
scores Try
foverlaps(setDT(scores1),setDT(reftable),type=“within”)$Percentile#[1]28231162011
transform(scores, percRank = with(reftable, Percentile[ findInterval(X, X_lower) ]))
library(data.table)
scores$PercRank <- foverlaps(scores1, reftable)[order(rn)]$Percentile
scores$rn <- NULL
scores
#     X PercRank
#1  58        2
#2 127        3
#3 175        3
#4 245        6
#5 300       11
#6  90        2
#7 405       20
#8 284        8
#9 330       11
setDT(scores)[reftable, PercRank := Percentile, on = .(X >= X_lower, X <= X_upper)]
scores
#     X PercRank
#1:  58        2
#2: 127        3
#3: 175        3
#4: 245        6
#5: 300       11
#6:  90        2
#7: 405       20
#8: 284        8
#9: 330       11
library(fuzzyjoin)
library(dplyr)
fuzzy_left_join(scores, reftable, by = c("X" = "X_lower", "X" = "X_upper"),
        match_fun = list(`>=`, `<=`)) %>% 
      select(X, Percentile)
#     X Percentile
#1  58          2
#2 127          3
#3 175          3
#4 245          6
#5 300         11
#6  90          2
#7 405         20
#8 284          8
#9 330         11
scores <- data.frame(
     X = c(58, 127, 175, 245, 300, 90, 405, 284, 330))
scores$rn <- seq_len(nrow(scores))
scores1 <- data.table(X_lower = scores$X, X_upper = scores$X, rn = scores$rn)
setkeyv(scores1, c("X_lower", "X_upper"))
setkeyv(reftable, c("X_lower", "X_upper"))