来自Stata的xtpcse-如何在R中重写_R_Stata

来自Stata的xtpcse-如何在R中重写

r stata

来自Stata的xtpcse-如何在R中重写,r,stata,R,Stata,我目前正在学习R。我以前不了解STATA 我想重新分析一项在Stata（xtpcse线性回归和面板校正标准误差）中进行的研究。我在Stata中找不到模型或更详细的代码，也找不到任何其他关于如何在R中重写的提示。我已经为R安装了计量经济学的plm包。这就是我所能得到的 STATA中.do文件的第一行复制到下面（我刚刚看到它非常不可读。这里是指向txt文件的链接，我在其中复制了.do内容：）。我不知道如何更好地处理这件事。我尝试了谷歌ing STATA和R比较之类的方法，但没有成功我想复制的所有

我目前正在学习R。我以前不了解STATA

我想重新分析一项在Stata（xtpcse线性回归和面板校正标准误差）中进行的研究。我在Stata中找不到模型或更详细的代码，也找不到任何其他关于如何在R中重写的提示。我已经为R安装了计量经济学的plm包。这就是我所能得到的

STATA中.do文件的第一行复制到下面（我刚刚看到它非常不可读。这里是指向txt文件的链接，我在其中复制了.do内容：）。我不知道如何更好地处理这件事。我尝试了谷歌ing STATA和R比较之类的方法，但没有成功

我想复制的所有研究数据如下：

更新：我刚试过文森特的密码。我尝试了pcse2和vcovBK代码，它们都有效（尽管我不确定如何处理vcocBK产生的相关矩阵）

然而，在我重新分析的论文中，我仍然很难重现回归系数的估计值。我正在尽我所能地遵循他们的食谱，我想我唯一遗漏的一步是，在Stata中“自相关：普通AR（1）”的部分。我正在分析的论文说：“OLS回归使用面板校正标准误差（Beck/Katz'95），控制每个面板内的一阶相关性（Stata中的corr AR1选项）。”

如何控制R中每个面板内的一阶相关性

以下是我到目前为止对我的数据所做的：

## run lm 
res.lm <- lm(total_FDI ~ ciri + human_cap + worker_rts + polity_4 + lag_total + market_size + income + econ_growth + log_trade + fixed_xr + fix_dollar + xr_fluct + english + lab_growth, data=D)
## run pcse
res.pcse <- pcse2(res.lm,groupN="c_code",groupT="year",pairwise=TRUE)

##运行lm
res.lm查看，其中考虑了面板校正的标准误差。当然，您必须查看STATA中的文档，找出所做的假设，并与pcse进行交叉检查。
正如Ramnath所提到的，将执行STATA的xtpcse
所做的操作。或者，您可以从中使用vcovBK（）
函数，如果您选择后一个选项，请确保使用cluster='time'
选项，这是Beck&Katz（1995）文章建议的，也是Stata命令实现的
pcse
包运行良好，但存在一些问题，使得许多直观的用户输入无法接受，尤其是在数据集不平衡的情况下。您可能想尝试重新编写我刚才编写的函数。只需加载pcse
软件包，加载pcse2
函数，然后按照pcse文档中的说明使用它。我想，下面粘贴的功能比pcse提供的功能更干净、更灵活、更健壮。简单的基准测试还表明，我的版本可能比他们的版本快5到10倍，这对于大型数据集可能很重要
祝你好运
library(Matrix)
pcse2 <- function(object, groupN, groupT, pairwise=TRUE){
  ## Extract basic model info
  groupT <- tail(as.character((match.call()$groupT)), 1)
  groupN <- tail(as.character((match.call()$groupN)), 1)
  dat <- eval(parse(text=object$call$data))

  ## Sanity checks
  if(!"lm" %in% class(object)){stop("Formula object must be of class 'lm'.")}
  if(!groupT %in% colnames(dat)){stop(paste(groupT, 'was not found in data', object$call$data))}
  if(!groupN %in% colnames(dat)){stop(paste(groupN, 'was not found in data', object$call$data))}
  if(anyDuplicated(paste(dat[,groupN], dat[,groupT]))>0){stop(paste('There are duplicate groupN-groupT observations in', object$call$data))}
  if(length(dat[is.na(dat[,groupT]),groupT])>0){stop('There are missing unit indices in the data.')}
  if(length(dat[is.na(dat[,groupN]),groupN])>0){stop('There are missing time indices in the data.')}

  ## Expand model frame to include groupT, groupN, resid columns.
  f <- as.formula(object$call$formula)
  f.expanded <- update.formula(f, paste(". ~ .", groupN, groupT, sep=" + "))
  dat.pcse <- model.frame(f.expanded, dat) 
  dat.pcse$e <- resid(object)  

  ## Extract basic model info (part II)
  N <- length(unique(dat.pcse[,groupN]))
  T <- length(unique(dat.pcse[,groupT]))
  nobs <- nrow(dat.pcse)
  is.balanced <- length(resid(object)) == N * T

  ## If balanced dataset, calculate as in Beck & Katz (1995)
  if(is.balanced){
    dat.pcse <- dat.pcse[order(dat.pcse[,groupN], dat.pcse[,groupT]),]
    X <- model.matrix(f, dat.pcse)
    E <- t(matrix(dat.pcse$e, N, T, byrow=TRUE))
    Omega <- kronecker((crossprod(E) / T), Matrix(diag(1, T)) )

  ## If unbalanced and pairwise, calculate as in Franzese (1996)
  }else if(pairwise==TRUE){
    ## Rectangularize
    rectangle <- expand.grid(unique(dat.pcse[,groupN]), unique(dat.pcse[,groupT]))
    names(rectangle) <- c(groupN, groupT)
    rectangle <- merge(rectangle, dat.pcse, all.x=TRUE)
    rectangle <- rectangle[order(rectangle[,groupN], rectangle[,groupT]),]
    valid <- ifelse(is.na(rectangle$e),0,1) 
    rectangle[is.na(rectangle)] <- 0
    X <- model.matrix(f, rectangle)
    X[valid==0,1] <- 0

    ## Calculate pcse
    E <- crossprod(t(matrix(rectangle$e, N, T, byrow=TRUE)))
    V <- crossprod(t(matrix(valid, N, T, byrow=TRUE)))
    if (length(V[V==0]) > 0){stop("Error! A CS-unit exists without any obs or without any obs in a common period with another CS-unit. You must remove that unit from the data passed to pcse().")}
    Omega <-  kronecker(E/V, Matrix(diag(1, T)))

  ## If unbalanced and casewise, caluate based on largest rectangular subset of data
  }else{ 
    ## Rectangularize
    rectangle <- expand.grid(unique(dat.pcse[,groupN]), unique(dat.pcse[,groupT]))
    names(rectangle) <- c(groupN, groupT)
    rectangle <- merge(rectangle, dat.pcse, all.x=TRUE)
    rectangle <- rectangle[order(rectangle[,groupN], rectangle[,groupT]),]
    valid <- ifelse(is.na(rectangle$e),0,1) 
    rectangle[is.na(rectangle)] <- 0
    X <- model.matrix(f, rectangle)
    X[valid==0,1] <- 0

    ## Keep only years for which we have the max number of observations
    large.panels <- by(dat.pcse, dat.pcse[,groupT], nrow) # How many valid observations per year?
    if(max(large.panels) < N){warning('There is no time period during which all units are observed. Consider using pairwise estimation.')}
    T.balanced <- names(large.panels[large.panels==max(large.panels)]) # Which years have max(valid observations)?
    T.casewise <- length(T.balanced)
    dat.balanced <- dat.pcse[dat.pcse[,groupT] %in% T.balanced,] # Extract biggest rectangular subset
    dat.balanced <- dat.balanced[order(dat.balanced[,groupN], dat.balanced[,groupT]),]
    e <- dat.balanced$e

    ## Calculate pcse as in Beck & Katz (1995)
    E <- t(matrix(dat.balanced$e, N, T.casewise, byrow=TRUE))
    Omega <- kronecker((crossprod(E) / T.casewise), Matrix(diag(1, T)))
  }

  ## Finish evaluation, clean and output
  salami <- t(X) %*% Omega %*% X
  bread <- solve(crossprod(X))
  sandwich <- bread %*% salami %*% bread
  colnames(sandwich) <- names(coef(object))
  row.names(sandwich) <- names(coef(object))
  pcse <- sqrt(diag(sandwich))
  b <- coef(object)
  tstats <- b/pcse
  df <- nobs - ncol(X)
  pval <- 2*pt(abs(tstats), df, lower.tail=FALSE)
  res <- list(vcov=sandwich, pcse=pcse, b=b, tstats=tstats, df=df, pval=pval, pairwise=pairwise, 
              nobs=nobs, nmiss=(N*T)-nobs, call=match.call())
  class(res) <- "pcse"
  return(res)
}

库（矩阵）
请给我2号。请这样做。您可能想查看stats.stackexchange.com Stata手册（如果您有Stata
library(Matrix)
pcse2 <- function(object, groupN, groupT, pairwise=TRUE){
  ## Extract basic model info
  groupT <- tail(as.character((match.call()$groupT)), 1)
  groupN <- tail(as.character((match.call()$groupN)), 1)
  dat <- eval(parse(text=object$call$data))

  ## Sanity checks
  if(!"lm" %in% class(object)){stop("Formula object must be of class 'lm'.")}
  if(!groupT %in% colnames(dat)){stop(paste(groupT, 'was not found in data', object$call$data))}
  if(!groupN %in% colnames(dat)){stop(paste(groupN, 'was not found in data', object$call$data))}
  if(anyDuplicated(paste(dat[,groupN], dat[,groupT]))>0){stop(paste('There are duplicate groupN-groupT observations in', object$call$data))}
  if(length(dat[is.na(dat[,groupT]),groupT])>0){stop('There are missing unit indices in the data.')}
  if(length(dat[is.na(dat[,groupN]),groupN])>0){stop('There are missing time indices in the data.')}

  ## Expand model frame to include groupT, groupN, resid columns.
  f <- as.formula(object$call$formula)
  f.expanded <- update.formula(f, paste(". ~ .", groupN, groupT, sep=" + "))
  dat.pcse <- model.frame(f.expanded, dat) 
  dat.pcse$e <- resid(object)  

  ## Extract basic model info (part II)
  N <- length(unique(dat.pcse[,groupN]))
  T <- length(unique(dat.pcse[,groupT]))
  nobs <- nrow(dat.pcse)
  is.balanced <- length(resid(object)) == N * T

  ## If balanced dataset, calculate as in Beck & Katz (1995)
  if(is.balanced){
    dat.pcse <- dat.pcse[order(dat.pcse[,groupN], dat.pcse[,groupT]),]
    X <- model.matrix(f, dat.pcse)
    E <- t(matrix(dat.pcse$e, N, T, byrow=TRUE))
    Omega <- kronecker((crossprod(E) / T), Matrix(diag(1, T)) )

  ## If unbalanced and pairwise, calculate as in Franzese (1996)
  }else if(pairwise==TRUE){
    ## Rectangularize
    rectangle <- expand.grid(unique(dat.pcse[,groupN]), unique(dat.pcse[,groupT]))
    names(rectangle) <- c(groupN, groupT)
    rectangle <- merge(rectangle, dat.pcse, all.x=TRUE)
    rectangle <- rectangle[order(rectangle[,groupN], rectangle[,groupT]),]
    valid <- ifelse(is.na(rectangle$e),0,1) 
    rectangle[is.na(rectangle)] <- 0
    X <- model.matrix(f, rectangle)
    X[valid==0,1] <- 0

    ## Calculate pcse
    E <- crossprod(t(matrix(rectangle$e, N, T, byrow=TRUE)))
    V <- crossprod(t(matrix(valid, N, T, byrow=TRUE)))
    if (length(V[V==0]) > 0){stop("Error! A CS-unit exists without any obs or without any obs in a common period with another CS-unit. You must remove that unit from the data passed to pcse().")}
    Omega <-  kronecker(E/V, Matrix(diag(1, T)))

  ## If unbalanced and casewise, caluate based on largest rectangular subset of data
  }else{ 
    ## Rectangularize
    rectangle <- expand.grid(unique(dat.pcse[,groupN]), unique(dat.pcse[,groupT]))
    names(rectangle) <- c(groupN, groupT)
    rectangle <- merge(rectangle, dat.pcse, all.x=TRUE)
    rectangle <- rectangle[order(rectangle[,groupN], rectangle[,groupT]),]
    valid <- ifelse(is.na(rectangle$e),0,1) 
    rectangle[is.na(rectangle)] <- 0
    X <- model.matrix(f, rectangle)
    X[valid==0,1] <- 0

    ## Keep only years for which we have the max number of observations
    large.panels <- by(dat.pcse, dat.pcse[,groupT], nrow) # How many valid observations per year?
    if(max(large.panels) < N){warning('There is no time period during which all units are observed. Consider using pairwise estimation.')}
    T.balanced <- names(large.panels[large.panels==max(large.panels)]) # Which years have max(valid observations)?
    T.casewise <- length(T.balanced)
    dat.balanced <- dat.pcse[dat.pcse[,groupT] %in% T.balanced,] # Extract biggest rectangular subset
    dat.balanced <- dat.balanced[order(dat.balanced[,groupN], dat.balanced[,groupT]),]
    e <- dat.balanced$e

    ## Calculate pcse as in Beck & Katz (1995)
    E <- t(matrix(dat.balanced$e, N, T.casewise, byrow=TRUE))
    Omega <- kronecker((crossprod(E) / T.casewise), Matrix(diag(1, T)))
  }

  ## Finish evaluation, clean and output
  salami <- t(X) %*% Omega %*% X
  bread <- solve(crossprod(X))
  sandwich <- bread %*% salami %*% bread
  colnames(sandwich) <- names(coef(object))
  row.names(sandwich) <- names(coef(object))
  pcse <- sqrt(diag(sandwich))
  b <- coef(object)
  tstats <- b/pcse
  df <- nobs - ncol(X)
  pval <- 2*pt(abs(tstats), df, lower.tail=FALSE)
  res <- list(vcov=sandwich, pcse=pcse, b=b, tstats=tstats, df=df, pval=pval, pairwise=pairwise, 
              nobs=nobs, nmiss=(N*T)-nobs, call=match.call())
  class(res) <- "pcse"
  return(res)
}