R：通过匹配另一个数据帧的列，对数据帧中的值进行内部和外推_R_Interpolation_Extrapolation

R：通过匹配另一个数据帧的列，对数据帧中的值进行内部和外推

R：通过匹配另一个数据帧的列，对数据帧中的值进行内部和外推,r,interpolation,extrapolation,R,Interpolation,Extrapolation,我有两个数据帧： df1 <- data.frame(levels = c(1, 3, 5, 7, 9), values = c(2.2, 5.3, 7.9, 5.4, 8.7)) df2 <- data.frame(levels = c(1, 4, 8, 12)) # other columns not necessary df1也许，根据两个数据集的级别的联合完成，然后使用na.approx（来自zoo）和规则=2（用于外推）库（dp

我有两个数据帧：

df1 <- data.frame(levels = c(1, 3, 5, 7, 9), 
                  values = c(2.2, 5.3, 7.9, 5.4, 8.7))

df2 <- data.frame(levels = c(1, 4, 8, 12)) # other columns not necessary

df1也许，根据两个数据集的级别的联合完成，然后使用na.approx
（来自zoo
）和规则=2
（用于外推）
库（dplyr）
图书馆（tidyr）
图书馆（动物园）
df1%
完成（级别=联合（级别，df2$级别））%>%
变异（值=近似值（值，maxgap=Inf，规则=2））

-输出
df1
# A tibble: 8 x 2
#  levels values
#   <dbl>  <dbl>
#1      1   2.2 
#2      3   5.3 
#3      4   6.6 
#4      5   7.9 
#5      7   5.4 
#6      8   7.05
#7      9   8.7 
#8     12   8.7 

df1
#一个tibble:8x2
#级别值
#     
#1      1   2.2 
#2      3   5.3 
#3      4   6.6 
#4      5   7.9 
#5      7   5.4 
#6      8   7.05
#7      9   8.7 
#8     12   8.7 
也许，根据两个数据集的级别的联合完成，然后使用近似值（来自动物园）和规则=2
（用于外推）
库（dplyr）
图书馆（tidyr）
图书馆（动物园）
df1%
完成（级别=联合（级别，df2$级别））%>%
变异（值=近似值（值，maxgap=Inf，规则=2））

-输出
df1
# A tibble: 8 x 2
#  levels values
#   <dbl>  <dbl>
#1      1   2.2 
#2      3   5.3 
#3      4   6.6 
#4      5   7.9 
#5      7   5.4 
#6      8   7.05
#7      9   8.7 
#8     12   8.7 

df1
#一个tibble:8x2
#级别值
#     
#1      1   2.2 
#2      3   5.3 
#3      4   6.6 
#4      5   7.9 
#5      7   5.4 
#6      8   7.05
#7      9   8.7 
#8     12   8.7 
我确信这是可以压缩的，这是我很久以前写的一些代码，处理必须在有序向量的头/尾处进行外推：
# Function to interpolate / extrapolate: l_estimate => function()
l_estimate <- function(vec){
  # Function to perform-linear interpolation and return vector: 
  # .l_interp_vec => function()
  .l_interp_vec <- function(vec){
    interped_values <- 
      approx(x = vec, method = "linear", ties = "constant", n = length(vec))$y
    return(ifelse(is.na(vec), interped_values[is.na(vec)], vec))
  }
  
  # Store a vector denoting the indices of the vector that are NA: 
  # na_idx => integer vector
  na_idx <- is.na(vec)
  
  # Store a scalar of min row where x isn't NA: min_non_na => integer vector
  min_non_na <- min(which(!(na_idx)))
  
  # Store a scalar of max row where x isn't NA: max_non_na => integer vector
  max_non_na <- max(which(!(na_idx)))
  
  # Store scalar of the number of rows needed to impute prior 
  # to first NA value: ru_lower => integer vector
  ru_lower <- ifelse(min_non_na > 1, min_non_na - 1, min_non_na)
  
  # Store scalar of the number of rows needed to impute after
  # the last non-NA value: ru_upper => integer vector
  ru_upper <- ifelse(
    max_non_na == length(vec), 
    length(vec) - 1, 
    (length(vec) - (max_non_na + 1))
  )
  
  # Store a vector of the ramp to function: ramp_up => numeric vector 
  ramp_up <- as.numeric(
    cumsum(rep(vec[min_non_na]/(min_non_na), ru_lower))
  )
  
  # Apply the interpolation function on vector: y => numeric vector
  y <- as.numeric(.l_interp_vec(as.numeric(vec[min_non_na:max_non_na])))
  
  # Create a vector that combines the ramp_up vector 
  # and y if the first NA is at row 1:
  if(length(ramp_up) >= 1 & max_non_na != length(vec)){
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up, y))), ru_upper+1)) + 
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(ramp_up, y, lower_l_int))
  
  }else if(length(ramp_up) > 1 & max_non_na == length(vec)){
    
    # Store the linear interpolations in  a vector: z => numeric
    z <- as.numeric(c(ramp_up, y))
    
  }else if(min_non_na == 1 & max_non_na != length(vec)){
    
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up, y))), ru_upper+1)) +
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(y, lower_l_int))
    
  }else{
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(y)
    
  }
  # Interpolate between points in x, return new x:
  return(as.numeric(ifelse(is.na(vec), z, vec)))
}

# Apply the function on ordered data: data.frame => stdout(console)
transform(full_df[order(full_df$levels),],
     values = l_estimate(values)
)

#要插值/外推的函数：l_估计=>函数（）
l_估计函数（）
.l_interp_vec整数向量
最大非整数向量
ru_下1，min_non_na-1，min_non_na）
#存储后需要插补的行数的标量
#最后一个非NA值：ru_upper=>整数向量
上数值向量
上升数值向量
y=1&最大不适用！=长度（vec））{
#创建向量插值（如果有）
#最后一个值后有多个NA值：lower_l_int=>数值向量
下整数数值向量
z1&max_non_na==长度（vec））{
#将线性插值存储在向量中：z=>numeric
z数值向量
下整数数值向量
z数值向量
z标准输出（控制台）
转换（全方位[顺序（全方位$）]，
值=l_估计值（值）
)
我确信这是可以压缩的，这是我很久以前写的一些代码，处理必须在有序向量的头/尾处进行外推：
# Function to interpolate / extrapolate: l_estimate => function()
l_estimate <- function(vec){
  # Function to perform-linear interpolation and return vector: 
  # .l_interp_vec => function()
  .l_interp_vec <- function(vec){
    interped_values <- 
      approx(x = vec, method = "linear", ties = "constant", n = length(vec))$y
    return(ifelse(is.na(vec), interped_values[is.na(vec)], vec))
  }
  
  # Store a vector denoting the indices of the vector that are NA: 
  # na_idx => integer vector
  na_idx <- is.na(vec)
  
  # Store a scalar of min row where x isn't NA: min_non_na => integer vector
  min_non_na <- min(which(!(na_idx)))
  
  # Store a scalar of max row where x isn't NA: max_non_na => integer vector
  max_non_na <- max(which(!(na_idx)))
  
  # Store scalar of the number of rows needed to impute prior 
  # to first NA value: ru_lower => integer vector
  ru_lower <- ifelse(min_non_na > 1, min_non_na - 1, min_non_na)
  
  # Store scalar of the number of rows needed to impute after
  # the last non-NA value: ru_upper => integer vector
  ru_upper <- ifelse(
    max_non_na == length(vec), 
    length(vec) - 1, 
    (length(vec) - (max_non_na + 1))
  )
  
  # Store a vector of the ramp to function: ramp_up => numeric vector 
  ramp_up <- as.numeric(
    cumsum(rep(vec[min_non_na]/(min_non_na), ru_lower))
  )
  
  # Apply the interpolation function on vector: y => numeric vector
  y <- as.numeric(.l_interp_vec(as.numeric(vec[min_non_na:max_non_na])))
  
  # Create a vector that combines the ramp_up vector 
  # and y if the first NA is at row 1:
  if(length(ramp_up) >= 1 & max_non_na != length(vec)){
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up, y))), ru_upper+1)) + 
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(ramp_up, y, lower_l_int))
  
  }else if(length(ramp_up) > 1 & max_non_na == length(vec)){
    
    # Store the linear interpolations in  a vector: z => numeric
    z <- as.numeric(c(ramp_up, y))
    
  }else if(min_non_na == 1 & max_non_na != length(vec)){
    
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up, y))), ru_upper+1)) +
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(y, lower_l_int))
    
  }else{
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(y)
    
  }
  # Interpolate between points in x, return new x:
  return(as.numeric(ifelse(is.na(vec), z, vec)))
}

# Apply the function on ordered data: data.frame => stdout(console)
transform(full_df[order(full_df$levels),],
     values = l_estimate(values)
)

#要插值/外推的函数：l_估计=>函数（）
l_估计函数（）
.l_interp_vec整数向量
最大非整数向量
ru_下1，min_non_na-1，min_non_na）
#存储后需要插补的行数的标量
#最后一个非NA值：ru_upper=>整数向量
上数值向量
上升数值向量
y=1&最大长度（vec））{
#创建向量插值（如果有）
#最后一个值后有多个NA值：lower_l_int=>数值向量
下整数数值向量
z1&max_non_na==长度（vec））{
#将线性插值存储在向量中：z=>numeric
z数值向量
下整数数值向量
z数值向量
z标准输出（控制台）
转换（全方位[顺序（全方位$）]，
值=l_估计值（值）
)
approxExtrap
函数有助于点之间的插值，但是您希望如何使插值超出df1
的范围？也许Hmisc:：approxExtrap
看起来像是一个单线解决方案，但从文档中看，如何设置approxExtrap
函数不是很明显将有助于点之间的插值，但您希望如何使外推超出df1
的范围？也许Hmisc:：approxExtrap
似乎是一个单线解决方案，但从文档中看，如何设置此超级错误：连接列在数据中必须存在。x级别的问题s
。这显示up@B.Quaink我检查了你的示例数据。两者都有级别
列。无法获得你提到的错误。我更新了我得到的输出是的，现在可以了！有没有办法保存输出？我不能将%>%之前的df1更改为新的，也不会用此更新原始df1。@B.Quaink你可以分配df1%。
或在最后执行->df1
真棒，感谢您的帮助：）错误：数据中必须存在联接列。级别的x问题。这显示up@B.Quaink我检查了你的示例数据。两者都有级别
列。无法获得你提到的错误。我更新了我得到的输出是的，它现在可以工作了！有没有办法保存输出？我无法将%>%之前的df1更改为新的，它也不会用此更新原始df1。@B.Quaink您可以分配df1%。
或在最后执行->df1
非常棒，谢谢您的帮助：）