对不平衡面板数据在R
我有一个不平衡的面板数据集。以下数据可用于说明:对不平衡面板数据在R,r,plyr,reshape,R,Plyr,Reshape,我有一个不平衡的面板数据集。以下数据可用于说明: Id <- c(rep(1:4,3),rep(5,2)) Id <- Id[order(Id)] Year <- c(rep(2000:2002,4),c(2000,2002)) z1 <- rnorm(14) z2 <- rnorm(14) z3 <- rnorm(14) z4 <- rnorm(14) CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.
Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))
z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)
CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest
Id Year y x1 x2 x3
1 1 2000 -0.7463355 -1.1920928 -1.2358912 -0.2527170
2 1 2001 -0.3475260 -0.1729497 -0.6252036 0.4366446
3 1 2002 0.6815678 0.1775869 0.2860103 0.8479373
4 2 2000 0.7927199 1.2830142 1.7018747 2.4475020
5 2 2001 0.5157535 0.4365100 1.1512340 1.0882581
6 2 2002 -1.2806121 0.3392759 0.5781835 0.2829615
7 3 2000 1.8741119 0.3904028 0.7984584 -1.7015025
8 3 2001 -0.4799960 0.6397883 0.4719150 0.8601328
9 3 2002 -2.2031991 0.3789198 -0.1143526 0.6771387
10 4 2000 -1.0634857 -0.6171644 -0.5732400 0.7718195
11 4 2001 0.3266059 -0.6882776 -0.2013544 -0.7242561
12 4 2002 0.2921222 -0.5149802 0.6180026 -0.4417939
13 5 2000 -2.2447755 -1.3328675 -1.6370130 0.4537451
14 5 2002 -0.9959034 -0.5958327 -0.3408927 0.2162799
我曾想过使用plm
、restrape
或plyr
软件包,但当我想在我的面板上使用lag
和diff
时,我找不到有效的方法
谢谢
我找到了一个非常有效的解决方案。它使用上面
statquant
建议的split
和lappy
,但也使用plm
包来计算不平衡面板数据集的差异
library(plm)
Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))
z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)
CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest
Id Year y x1 x2 x3
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241
pDataTest <- pdata.frame(DataTest, index = c('Id','Year'))
pDataTest$yDiff <- diff(pDataTest$y)
pDataTest
Id Year y x1 x2 x3 yDiff
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223 NA
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551 1.6641810
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161 -1.6046382
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465 NA
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473 -0.2258592
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274 0.8418135
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735 NA
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057 -2.0276963
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791 -1.1968093
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610 NA
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713 -1.8549798
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849 0.2439162
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991 NA
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241 NA
Temp <-split(pDataTest,'Year')
MyFormula <- formula(yDiff ~ lag(x1) + x2 + x3)
Fit <- lapply(Temp[-1], lm, formula=MyFormula)
Fit
$`2001`
Call:
FUN(formula = ..1, data = X[[1L]])
Coefficients:
(Intercept) lag(x1) x2 x3
15.27 -52.88 -1.53 39.17
$`2002`
Call:
FUN(formula = ..1, data = X[[2L]])
Coefficients:
(Intercept) lag(x1) x2 x3
-0.8738 0.1038 1.2805 0.1626
库(plm)
Id您为什么不使用split
来剪切数据。按年
帧
,然后在每个片段上使用lappy
?@statquant:这可能是一个解决方案。你能举个例子吗?我有另一个肮脏的解决方案,我使用plm
包创建一个pdata.frame
,然后使用diff
和lag
正确地延迟和差异我的面板。然后,我将结果与Id
、Year
、x2
和x3
合并,然后使用lme4
包中的lmlist
按Year
进行线性回归。不过,滞后/差异和与其他数据合并的第一步似乎没有那么有效。@AnandaMahto:我还没有找到答案。不确定如何更新问题。它有代码来生成数据,并且它说明了我想要在横截面中执行的回归公式,对于任何给定年份可用的Id
。请让我确切地知道您认为我应该说明什么以便更容易回答。现在我找到了一个答案:)
library(plm)
Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))
z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)
CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest
Id Year y x1 x2 x3
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241
pDataTest <- pdata.frame(DataTest, index = c('Id','Year'))
pDataTest$yDiff <- diff(pDataTest$y)
pDataTest
Id Year y x1 x2 x3 yDiff
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223 NA
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551 1.6641810
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161 -1.6046382
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465 NA
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473 -0.2258592
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274 0.8418135
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735 NA
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057 -2.0276963
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791 -1.1968093
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610 NA
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713 -1.8549798
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849 0.2439162
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991 NA
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241 NA
Temp <-split(pDataTest,'Year')
MyFormula <- formula(yDiff ~ lag(x1) + x2 + x3)
Fit <- lapply(Temp[-1], lm, formula=MyFormula)
Fit
$`2001`
Call:
FUN(formula = ..1, data = X[[1L]])
Coefficients:
(Intercept) lag(x1) x2 x3
15.27 -52.88 -1.53 39.17
$`2002`
Call:
FUN(formula = ..1, data = X[[2L]])
Coefficients:
(Intercept) lag(x1) x2 x3
-0.8738 0.1038 1.2805 0.1626