R 比较一组条件并在IF ELSE语句中创建多个输出

R 比较一组条件并在IF ELSE语句中创建多个输出,r,R,我有一个简化的数据帧1 df1: set.seed(100) df1 = data.frame(greater_than_or_equal_to= unique(cummax(sample.int(100)))) df1$less_than = c(df1$greater_than_or_equal_to[2:nrow(df1)], df1$greater_than_or_equal_to[nrow(df1)]+3) df1$score1 = roun

我有一个简化的数据帧1 df1:

set.seed(100)  
df1 = data.frame(greater_than_or_equal_to= unique(cummax(sample.int(100))))
df1$less_than = c(df1$greater_than_or_equal_to[2:nrow(df1)], 
                  df1$greater_than_or_equal_to[nrow(df1)]+3)
df1$score1 =  round(runif(10,1,10),1)
df1$score2 =  round(runif(10,2,20),1)
df1 = df1[1:5,]

df1
greater_than_or_equal_to less_than score1 score2
                    31        55    3.9   17.1
                    55        77    4.5   16.5
                    77        79    1.4    3.5
                    79        93    4.3    6.3
                    93        94    6.1   19.4
我有另一个简化的数据帧df2:

df2 = data.frame(benchmark = runif(5,31,94))
df2 = round(df2,1)
df2
benchmark
  50.4
  47.2
  65.8
  34.6
  60.5
我想将df2中的基准与df1中前两列之间的范围进行比较,并使用df1中的score1和score2在df2中创建多个新列

示例代码和输出:

for (i in 1:nrow(df2)){
  if (df2$benchmark[i] < 55) {
    df2$cav[i] = df1$score1[1]
    df2$gold[i] = df1$score2[1]
  } else if (df2$benchmark[i] < 77) {
    df2$cav[i] = df1$score1[2]
    df2$gold[i] = df1$score2[2]
  } else if (df2$benchmark[i] < 79) {
    df2$cav[i] = df1$score1[3]
    df2$gold[i] = df1$score2[3]
  } else if (df2$benchmark[i] < 93) {
    df2$cav[i] = df1$score1[4]
    df2$gold[i] = df1$score2[4]
  } else {
    df2$cav[i] = df1$score1[5]
    df2$gold[i] = df1$score2[5]
  }
}

df2
benchmark gold cav
     50.4 17.1 3.9
     47.2 17.1 3.9
     65.8 16.5 4.5
     34.6 17.1 3.9
     60.5 16.5 4.5
这段代码生成了我想要的内容,但如果df1和df2有1000行要匹配,则效率低下。你知道我如何有效地扩展这个过程吗


提前谢谢

您可以确保正在处理的列已排序,并使用稍微不同的循环策略在那里获得一些收益。下面是一些代码,每个代码有5000行。它运行几秒钟,这对大多数人来说已经足够好了

set.seed(100)  
df1 = data.frame(greater_than_or_equal_to= sort(runif(5000, 5, 1500)))
df1$less_than = c(df1$greater_than_or_equal_to[2:nrow(df1)], 
                  df1$greater_than_or_equal_to[nrow(df1)]+3)
df1$score1 =  round(runif(5000,1,10),1)
df1$score2 =  round(runif(5000,2,20),1)
df1 = df1[1:5000,]

df2 = data.frame(benchmark = runif(5000,1,940))
df2 = round(df2,1)
这是新的循环

#init the new columns
df2$cav<-NA
df2$gold<-NA

#sort the cols we need to be sorted
df1<-df1[order(df1$less_than),]
df2<-df2[order(df2$benchmark),]

#loop through df1 and fill in the corresponding values
for(i in 1:nrow(df1)){
  val<-df1[i,]$less_than
  rows<-which(is.na(df2$cav) & df2$benchmark < val)

  #might not be any matches
  if(length(rows) > 0){
    df2[rows,]$cav<-df1[i,]$score1
    df2[rows,]$gold<-df1[i,]$score2
  }

  #we can stop when it is full
  if(all(!is.na(df2))){
    break
  }
}