R 预测包装外的分段lm_R_Lm_Predict

R 预测包装外的分段lm

R 预测包装外的分段lm,r,lm,predict,R,Lm,Predict,我有数百个分段线性模型的输出阵列（使用R中的分段包制作）。我希望能够使用predict函数在新数据上使用这些输出。明确地说，我的工作区中没有分段线性模型对象；我刚刚保存并重新导入了相关输出（例如系数和断点）。因此，我不能简单地使用分段包中的predict.segmented函数下面是一个玩具示例，基于该示例，它看起来很有希望，但与predict.segmented函数的输出不匹配 library(segmented) set.seed(12) xx <- 1:100 zz <

我有数百个分段线性模型的输出阵列（使用R中的分段包制作）。我希望能够使用predict函数在新数据上使用这些输出。明确地说，我的工作区中没有分段线性模型对象；我刚刚保存并重新导入了相关输出（例如系数和断点）。因此，我不能简单地使用分段包中的predict.segmented函数

下面是一个玩具示例，基于该示例，它看起来很有希望，但与predict.segmented函数的输出不匹配

library(segmented) 
set.seed(12) 

xx <- 1:100 
zz <- runif(100) 
yy <- 2 + 1.5*pmax(xx-35,0) - 1.5*pmax(xx-70,0) + 
               15*pmax(zz-0.5,0) + rnorm(100,0,2) 
dati <- data.frame(x=xx,y=yy,z=zz) 

out.lm<-lm(y~x,data=dati) 
o<-## S3 method for class 'lm': 
     segmented(out.lm,seg.Z=~x,psi=list(x=c(30,60)), 
           control=seg.control(display=FALSE)) 

# Note that coefficients with U in the name are differences in slopes, not slopes. 
# Compare:
slope(o)
coef(o)[2] + coef(o)[3]
coef(o)[2] + coef(o)[3] + coef(o)[4]

# prediction 
pred <- data.frame(x = 1:100) 
pred$dummy1 <- pmax(pred$x - o$psi[1,2], 0) 
pred$dummy2 <- pmax(pred$x - o$psi[2,2], 0) 
pred$dummy3 <- I(pred$x > o$psi[1,2]) * (coef(o)[2] + coef(o)[3])
pred$dummy4 <- I(pred$x > o$psi[2,2]) * (coef(o)[2] + coef(o)[3] + coef(o)[4]) 
names(pred)[-1]<- names(model.frame(o))[-c(1,2)] 

# compute the prediction, using standard predict function 
# computing confidence intervals further 
# suppose that the breakpoints are fixed 
pred <- data.frame(pred, predict(o, newdata= pred, 
                       interval="confidence")) 

# Try prediction using the predict.segment version to compare
test <- predict.segmented(o)
plot(pred$fit, test, ylim = c(0, 100))
abline(0,1, col = "red")
# At least one segment not being predicted correctly?

库（分段）
种子（12）
xx为什么在预测中使用名为xx
的列，但在原始data.frame中它被称为x
？要使predict（）正常工作，列的名称必须完全匹配。谢谢@MrFlick。我修复了predict中的列名，以便它们与原始data.frame匹配。第一段的预测拟合现在看起来不错，但我仍在检查其他段。对于任何与激发此代码的Nabble链接进行比较的人，我更改了坡度的定义（因为根据文档，以“U”开头的系数是坡度的差异）。
library(segmented)


## Define function for making matrix of dummy variables (this is based on code from predict.segmented())
dummy.matrix <- function(x.values, x_names, psi.est = TRUE, nameU, nameV, diffSlope, est.psi) {
  # This function creates a model matrix with dummy variables for a segmented lm with two breakpoints.
  # Inputs:
  # x.values: the x values of the segmented lm
  # x_names: the name of the column of x values
  # psi.est: this is legacy from the predict.segmented function, leave it set to 'TRUE'
  # obj: the segmented lm object
  # nameU: names (class character) of 3rd and 4th coef, which are "U1.x" "U2.x" for lm with two breaks. Example: names(c(obj$coef[3], obj$coef[4]))
  # nameV: names (class character) of 5th and 6th coef, which are "psi1.x" "psi2.x" for lm with two breaks. Example: names(c(obj$coef[5], obj$coef[6]))
  # diffSlope: the coefficients (class numeric) with the slope differences; called U1.x and U2.x for lm with two breaks. Example: c(o$coef[3], o$coef[4])
  # est.psi: the estimated break points (class numeric); these are the estimated breakpoints from segmented.lm. Example: c(obj$psi[1,2], obj$psi[2,2])
  #
  n <- length(x.values)
  k <- length(est.psi)
  PSI <- matrix(rep(est.psi, rep(n, k)), ncol = k)
  newZ <- matrix(x.values, nrow = n, ncol = k, byrow = FALSE)
  dummy1 <- pmax(newZ - PSI, 0)
  if (psi.est) {
    V <- ifelse(newZ > PSI, -1, 0)
    dummy2 <- if (k == 1) 
      V * diffSlope
    else V %*% diag(diffSlope)
    newd <- cbind(x.values, dummy1, dummy2)
    colnames(newd) <- c(x_names, nameU, nameV)
  } else {
    newd <- cbind(x.values, dummy1)
    colnames(newd) <- c(x_names, nameU)
  }
  # if (!x_names %in% names(coef(obj.seg))) 
  #   newd <- newd[, -1, drop = FALSE]
  return(newd)
}

## Test dummy matrix function----------------------------------------------
set.seed(12)
xx<-1:100
zz<-runif(100)
yy<-2+1.5*pmax(xx-35,0)-1.5*pmax(xx-70,0)+15*pmax(zz-.5,0)+rnorm(100,0,2)
dati<-data.frame(x=xx,y=yy,z=zz)
out.lm<-lm(y~x,data=dati)

#1 segmented variable, 2 breakpoints: you have to specify starting values (vector) for psi:
o<-segmented(out.lm,seg.Z=~x,psi=c(30,60),
             control=seg.control(display=FALSE))
slope(o)
plot.segmented(o)
summary(o)


# Test dummy matrix fn with the same dataset
newdata <- dati
nameU1 <- c("U1.x", "U2.x")
nameV1 <- c("psi1.x", "psi2.x")
diffSlope1 <- c(o$coef[3], o$coef[4])
est.psi1 <- c(o$psi[1,2], o$psi[2,2])

test <- dummy.matrix(x.values = newdata$x, x_names = "x", psi.est = TRUE, 
                     nameU = nameU1, nameV = nameV1, diffSlope = diffSlope1, est.psi = est.psi1)


# Predict response variable using matrix multiplication
col1 <- matrix(1, nrow = dim(test)[1])
test <- cbind(col1, test) # Now test is the same as model.matrix(o)
predY <- coef(o) %*% t(test)
plot(predY[1,])
lines(predict.segmented(o), col = "blue") # good, predict.segmented gives same answer