R 面板回归误差
我试着做面板回归,其中因变量(不同公司的股票收益率)回归到5个自变量上。 以下是自变量数据框的可复制示例R 面板回归误差,r,regression,tapply,R,Regression,Tapply,我试着做面板回归,其中因变量(不同公司的股票收益率)回归到5个自变量上。 以下是自变量数据框的可复制示例 dput(factors_1[1:10,]) structure(list(Date = 200002:200011, Mkt.RF = c(5.94, 0.66, -5.58, -0.09, 0.67, -1.58, -1.61, -4.99, -2.71, -4.55), SMB = c(0.84, -5.15, -4.62, 0.16, 0.33, -0.69, 0.68, 2.3
dput(factors_1[1:10,])
structure(list(Date = 200002:200011, Mkt.RF = c(5.94, 0.66, -5.58,
-0.09, 0.67, -1.58, -1.61, -4.99, -2.71, -4.55), SMB = c(0.84,
-5.15, -4.62, 0.16, 0.33, -0.69, 0.68, 2.35, -6.1, -0.78), HML = c(-9.45,
3.33, 5.93, 6.17, 3.14, 3.31, -0.5, 2.64, 7.54, 11.15), RMW = c(3.55,
-2.59, -1.53, -3.38, -3.45, -0.12, -1.27, 1.63, 2.7, 0.79), CMA = c(-7.33,
4.96, 1.32, 4.94, 1.22, -0.12, 0.64, 2.16, 4.1, 8.75), RF = c(0.43,
0.47, 0.46, 0.5, 0.4, 0.48, 0.5, 0.51, 0.56, 0.51)), row.names = c(NA,
10L), class = "data.frame")
这里是股票收益表
dput(xx[1:10, 1:10])
structure(list(Date = structure(c(10990.9954886386, 11019.9953776753,
11050.9954014418, 11080.9952984982, 11111.9953776753, 11141.9951640545,
11172.995061378, 11203.9951324494, 11233.9950455918, 11264.9949982497
), class = "Date"), X1 = c(0.0954887438827963,
-0.0596463008222008, 0.071350885788402, 0.0305926490738153, 0.0408331711459304,
-0.0211402933162625, -0.00493862203119688, 0.006182173191563,
0.0032423131269943, 0.0193884936176278), X2 = c(-0.123462974283698,
0.230503533400868, -0.0272942506612435, 0.0483790669291113, -0.0595278152717571,
0.12087834022411, -0.032011380068422, -0.0813892896957779, 0.0138779835292666,
0.0726322608057619), X3 = c(-0.0682052985267971, 0.172249290323711,
-0.154888201350603, 0.0395159403332963, -0.0143942598523314,
-0.0607566985291722, -0.0310708779173386, -0.0746345858888015,
-0.151109426840925, 0.0118888362760825), X4 = c(-0.114511361380472,
0.00998441685033158, 0.192522150537581, -0.0158023343537101,
0.0374730915541921, 0.0777493327863055, -0.0016218724457906,
-0.0635452365157563, 0.0565030106039299, 0.115759209185826),
X5 = c(0.00389199996406542, -0.0212889913893688,
0.164892967212694, -0.00832469019706505, -0.00462232472270219,
-0.0070177636719938, 0.00453659662769512, 0.0528941822866427,
-0.00836737746775751, -0.0050017502848112), X6 = c(-0.10351479457366,
0.0237125822002096, 0.0101860439504515, 0.0111924296807739,
-0.0652473862813747, 2.11404059631271e-05, 0.0261396151198399,
-0.0356789492292369, -0.0706069184275196, -0.0656535040135704
), X7 = c(-0.0980023956049211, 0.102552120231041,
-0.0959174074104425, -0.0790740833989735, 0.118610740782993,
-0.100050822390369, -0.00333557692764708, -0.0368703292701125,
0.0628135821343774, 0.0471186471744018), X9 = c(-0.0304322345046196,
-0.0977595796246631, 0.138258584646108, 0.0344876873979214,
-0.000721154371596811, 0.0508635363751093, 0.0533435865577603,
-0.0506646520146184, 0.0497235991059199, 0.0284083879640369
), X9 = c(-0.159712703662352, -0.0234902492510041, 0.116858931667507,
0.00432376896685471, 0.114340108193219, 0.00235829911414087,
-0.0573195744121493, 0.095634961997471, -0.0871461890063988,
-0.0738243041819919)), row.names = c(NA, 10L), class = "data.frame")
我尝试的是:
p1_q1_l<-plm(as.matrix(data.frame(xx[, -1]))~factors_1$Mkt.RF+factors_1$SMB+factors_1$HML+factors_1$RMW+factors_1$CMA,data=factors_1, method="within")
我不明白发生了什么事。两个表都是具有相同数量观测值的数据帧。如何解决此问题?错误很可能是因为您将矩阵定义为独立(Y)变量,其中需要向量。您需要长格式的数据,其中Y是一列,ID和time列表示不同的观察结果 不过,我对两个数据集的兼容性有一些疑问,但您可能希望将它们合并为一个数据集。请仔细查看如何合并原始数据,特别是关于
日期
列的数据
当我正确理解您的xxx
数据时,X*
是不同的公司。现在,使用重塑
将数据转换为长格式
xxx <- reshape(xx, timevar=1, varying=2:9, direction="long", sep="")
xxx$Date <- as.character(xx$Date[xxx$Date])
library(plm)
p1_q1_l <- plm(X ~ Mkt.RF + SMB + HML + RMW + CMA + RF, method="within",
index=c("id", "Date"), data=dat)
# Model Formula: X ~ Mkt.RF + SMB + HML + RMW + CMA + RF
#
# Coefficients:
# Mkt.RF SMB HML RMW CMA RF
# 0.0042267 0.0054278 -0.0016806 0.0129446 0.0148160 -0.4194726
这里合并
dat <- merge(xxx, factors_1x, all.x=TRUE)
head(dat)
# Date X id Mkt.RF SMB HML RMW CMA RF
# 1 2000-02-03 0.09548874 1 5.94 0.84 -9.45 3.55 -7.33 0.43
# 2 2000-02-03 -0.05964630 2 5.94 0.84 -9.45 3.55 -7.33 0.43
# 3 2000-02-03 0.07135089 3 5.94 0.84 -9.45 3.55 -7.33 0.43
# 4 2000-02-03 0.03059265 4 5.94 0.84 -9.45 3.55 -7.33 0.43
# 5 2000-02-03 0.04083317 5 5.94 0.84 -9.45 3.55 -7.33 0.43
# 6 2000-02-03 -0.02114029 6 5.94 0.84 -9.45 3.55 -7.33 0.43
如果将数据帧传递给模型的
data
参数,则公式应为原始列名,例如lm(mpg~hp,mtcars)
。在公式中加入子集可能会导致奇怪的事情发生。谢谢。我可以问一下为什么它不适用于整个样品吗?我尝试了xxx@Julia我不知道,您可能想咨询这个高度活跃的问题,以了解如何将数据从宽改为长:
dat <- merge(xxx, factors_1x, all.x=TRUE)
head(dat)
# Date X id Mkt.RF SMB HML RMW CMA RF
# 1 2000-02-03 0.09548874 1 5.94 0.84 -9.45 3.55 -7.33 0.43
# 2 2000-02-03 -0.05964630 2 5.94 0.84 -9.45 3.55 -7.33 0.43
# 3 2000-02-03 0.07135089 3 5.94 0.84 -9.45 3.55 -7.33 0.43
# 4 2000-02-03 0.03059265 4 5.94 0.84 -9.45 3.55 -7.33 0.43
# 5 2000-02-03 0.04083317 5 5.94 0.84 -9.45 3.55 -7.33 0.43
# 6 2000-02-03 -0.02114029 6 5.94 0.84 -9.45 3.55 -7.33 0.43
library(plm)
p1_q1_l <- plm(X ~ Mkt.RF + SMB + HML + RMW + CMA + RF, method="within",
index=c("id", "Date"), data=dat)
# Model Formula: X ~ Mkt.RF + SMB + HML + RMW + CMA + RF
#
# Coefficients:
# Mkt.RF SMB HML RMW CMA RF
# 0.0042267 0.0054278 -0.0016806 0.0129446 0.0148160 -0.4194726