Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/codeigniter/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 通过重叠行名称和平均值来合并/组合两个数据集的最有效方法_R - Fatal编程技术网

R 通过重叠行名称和平均值来合并/组合两个数据集的最有效方法

R 通过重叠行名称和平均值来合并/组合两个数据集的最有效方法,r,R,我想找到最有效的方法来组合两个数据帧,并对具有不同row.name的列中的值求平均值。因此,我想从两个数据中提取jsut重叠的row.names,并将它们合并为一个。列中的值应按平均值计算。示例数据包括: mtcars <- structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 30.4

我想找到最有效的方法来组合两个数据帧,并对具有不同row.name的列中的值求平均值。因此,我想从两个数据中提取jsut重叠的row.names,并将它们合并为一个。列中的值应按平均值计算。示例数据包括:

mtcars <- 
structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 
24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
    disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
    167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, 
    71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, 
    301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 
    123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 
    150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 
    3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
    3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
    3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
    ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 
    3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
    1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
    1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61, 
    19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, 
    18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
    17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
    ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
    0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, 
    3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
    3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 
    2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
    2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp", 
"drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4", 
"Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", 
"Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", 
"Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
"Volvo 142E"), class = "data.frame")

mtcars您似乎在通过行名称查找两个数据集之间的“内部联接”。我建议尝试使用
data.table
package进行合并以及以后的熔化和dcasting操作

首先,我将
mtcars
重命名为
mtcars2
,因为
mtcars
是一个存储的数据集,我不希望两者都覆盖它,因为
setDT
实际上不能覆盖存储的数据集,所以假设在现实生活中,您的数据被称为
mtcars2

library(data.table)
mtcars2 <- copy(mtcars)
现在,我们将在
rn
(键)上执行内部联接,同时使用
后缀=NULL

Res <- merge(mtcars2, mtcars11, suffixes = NULL)

假设
d1
d2
是您的data.frames,下面是我的方法。不过,您必须使用
mget
才能工作

require(data.table) # v1.9.5

setkey(setDT(d1, keep.rownames=TRUE), rn)
setkey(setDT(d2, keep.rownames=TRUE), rn)

xcols = names(d1)[-1L]
icols = paste("i.", xcols, sep="")

foo <- function(a, b) mean(c(a, b), na.rm=TRUE)
d1[d2, Map(foo, mget(xcols), mget(icols)), by=.EACHI, nomatch=0L]
require(data.table)#v1.9.5
setkey(setDT(d1,keep.rownames=TRUE),rn)
setkey(setDT(d2,keep.rownames=TRUE),rn)
xcols=名称(d1)[-1L]
icols=粘贴(“i.”,xcols,sep=”“)

foo您期望的输出将是什么样子?此外,无需
dput(mtcars)
-我们已经有了它。最后,您可能可以将此问题简化为两个更小的数据集,行和列都更少。而且,
data\u mt
data\u mt11
是相同的,这可能不是最好的示例。
(mtcars[vec\u inter,]+mtcars11[vec\u inter,]/2
这种情况下的输出应该类似于这些数据之一。这只是一个例子。我可以很容易地更改列中的值,使其具有两个不同的数据集,但我只是想向您展示我想要实现的。。
setkey(setDT(mtcars2, keep.rownames = TRUE), rn)
setkey(setDT(mtcars11, keep.rownames = TRUE), rn)
Res <- merge(mtcars2, mtcars11, suffixes = NULL)
dcast(melt(Res, "rn"), rn ~ variable, mean.default)
#                      rn  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
#  1:         AMC Javelin 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
#  2:  Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
#  3:          Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
#  4:   Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
#  5:          Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
#  6:    Dodge Challenger 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
#  7:          Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
#  8:        Ferrari Dino 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
#  9:            Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
# 10:         Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
# 11:      Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
# 12:   Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
# 13: Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
# 14:        Lotus Europa 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
# 15:           Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
# 16:            Merc 230 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
# 17:           Merc 240D 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
# 18:            Merc 280 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
# 19:           Merc 280C 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
# 20:          Merc 450SE 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
# 21:         Merc 450SLC 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
# 22:       Porsche 914-2 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
# 23:      Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
# 24:       Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
# 25:             Valiant 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
require(data.table) # v1.9.5

setkey(setDT(d1, keep.rownames=TRUE), rn)
setkey(setDT(d2, keep.rownames=TRUE), rn)

xcols = names(d1)[-1L]
icols = paste("i.", xcols, sep="")

foo <- function(a, b) mean(c(a, b), na.rm=TRUE)
d1[d2, Map(foo, mget(xcols), mget(icols)), by=.EACHI, nomatch=0L]