R 如何创建列中项目的组合矩阵添加到一起
我有一个叫做mymat的矩阵。我想创建另一个矩阵,将mymat中的所有项目成对组合,并将它们的R 如何创建列中项目的组合矩阵添加到一起,r,R,我有一个叫做mymat的矩阵。我想创建另一个矩阵,将mymat中的所有项目成对组合,并将它们的值相加,得到类似结果的结果 mymat<- structure(c("AOGC-03-0122", "AOGC-05-0009", "AOGC-08-0006", "AOGC-08-0032", "AOGC-08-0054", "0.000971685122254438", "0.00114138129544444", "0.000779586347096811", "0.0013280767
值
相加,得到类似结果的结果
mymat<- structure(c("AOGC-03-0122", "AOGC-05-0009", "AOGC-08-0006", "AOGC-08-0032",
"AOGC-08-0054", "0.000971685122254438", "0.00114138129544444",
"0.000779586347096811", "0.00132807674454652", "0.000867219894408284"
), .Dim = c(5L, 2L), .Dimnames = list(NULL, c("samples", "value"
)))
我们可以使用expand.grid
和merge
d1 <- expand.grid(rep(list(mymat[, "samples"]),2))
d2 <- data.frame(samples=mymat[,1], value = as.numeric(mymat[,2]),
stringsAsFactors=FALSE)
d3 <- merge(merge(d1, d2, by.x="Var1", by.y="samples", all.x=TRUE),
d2, by.x="Var2", by.y= "samples")
res <- data.frame(combination = do.call(paste, c(d3[1:2], sep="+")),
total.value = d3[,3]+d3[,4])
head(res,3)
# combination total.value
#1 AOGC-03-0122+AOGC-03-0122 0.001943370
#2 AOGC-03-0122+AOGC-05-0009 0.002113066
#3 AOGC-03-0122+AOGC-08-0006 0.001751271
d1
- 矩阵是齐次数据对象。它基本上是一个带有维度属性的
矩阵
类原子向量(忽略列表矩阵的情况)。在单个矩阵中不能有字符串和数字的组合。如果要存储具有异构列类型的数据表,则应使用data.frame。显然,samples
和value
列的适当类型分别是string和number。因此,您的输入矩阵实际上应该是data.frame,而您的输出也应该是data.frame,因为它只是排列输入记录
- 您不需要在这里调用
merge()
,当然也不需要调用两次;矢量化索引可以完成这项工作。使用merge()
将导致排列顺序取决于样本
值的字典顺序,而不是它们在输入中出现的顺序,这可能是不可取的
值
d1 <- expand.grid(rep(list(mymat[, "samples"]),2))
d2 <- data.frame(samples=mymat[,1], value = as.numeric(mymat[,2]),
stringsAsFactors=FALSE)
d3 <- merge(merge(d1, d2, by.x="Var1", by.y="samples", all.x=TRUE),
d2, by.x="Var2", by.y= "samples")
res <- data.frame(combination = do.call(paste, c(d3[1:2], sep="+")),
total.value = d3[,3]+d3[,4])
head(res,3)
# combination total.value
#1 AOGC-03-0122+AOGC-03-0122 0.001943370
#2 AOGC-03-0122+AOGC-05-0009 0.002113066
#3 AOGC-03-0122+AOGC-08-0006 0.001751271
values <- as.double(mymat[,'value']);
with(expand.grid(rep(list(seq_len(nrow(mymat))),2L)),
data.frame(
combination=paste(mymat[Var2,'samples'],mymat[Var1,'samples'],sep='+'),
total.value=values[Var2]+values[Var1]
)
);
## combination total.value
## 1 AOGC-03-0122+AOGC-03-0122 0.001943370
## 2 AOGC-03-0122+AOGC-05-0009 0.002113066
## 3 AOGC-03-0122+AOGC-08-0006 0.001751271
## 4 AOGC-03-0122+AOGC-08-0032 0.002299762
## 5 AOGC-03-0122+AOGC-08-0054 0.001838905
## 6 AOGC-05-0009+AOGC-03-0122 0.002113066
## 7 AOGC-05-0009+AOGC-05-0009 0.002282763
## 8 AOGC-05-0009+AOGC-08-0006 0.001920968
## 9 AOGC-05-0009+AOGC-08-0032 0.002469458
## 10 AOGC-05-0009+AOGC-08-0054 0.002008601
## 11 AOGC-08-0006+AOGC-03-0122 0.001751271
## 12 AOGC-08-0006+AOGC-05-0009 0.001920968
## 13 AOGC-08-0006+AOGC-08-0006 0.001559173
## 14 AOGC-08-0006+AOGC-08-0032 0.002107663
## 15 AOGC-08-0006+AOGC-08-0054 0.001646806
## 16 AOGC-08-0032+AOGC-03-0122 0.002299762
## 17 AOGC-08-0032+AOGC-05-0009 0.002469458
## 18 AOGC-08-0032+AOGC-08-0006 0.002107663
## 19 AOGC-08-0032+AOGC-08-0032 0.002656153
## 20 AOGC-08-0032+AOGC-08-0054 0.002195297
## 21 AOGC-08-0054+AOGC-03-0122 0.001838905
## 22 AOGC-08-0054+AOGC-05-0009 0.002008601
## 23 AOGC-08-0054+AOGC-08-0006 0.001646806
## 24 AOGC-08-0054+AOGC-08-0032 0.002195297
## 25 AOGC-08-0054+AOGC-08-0054 0.001734440
bgoldst <- function(mymat) { values <- as.double(mymat[,'value']); with(expand.grid(rep(list(seq_len(nrow(mymat))),2L)),data.frame(combination=paste(mymat[Var2,'samples'],mymat[Var1,'samples'],sep='+'),total.value=values[Var2]+values[Var1])); };
akrun <- function(mymat) { d1 <- expand.grid(rep(list(mymat[, "samples"]),2)); d2 <- data.frame(samples=mymat[,1], value = as.numeric(mymat[,2]), stringsAsFactors=FALSE); d3 <- merge(merge(d1, d2, by.x="Var1", by.y="samples", all.x=TRUE), d2, by.x="Var2", by.y= "samples"); res <- data.frame(combination = do.call(paste, c(d3[1:2], sep="+")), total.value = d3[,3]+d3[,4]); };
identical(bgoldst(mymat),akrun(mymat));
## [1] TRUE
library(microbenchmark);
microbenchmark(bgoldst(mymat),akrun(mymat));
## Unit: microseconds
## expr min lq mean median uq max neval
## bgoldst(mymat) 390.875 412.685 444.4554 433.8535 457.589 662.434 100
## akrun(mymat) 1603.697 1658.009 1789.0585 1692.0075 1824.793 3227.921 100
N <- 1e3; mymat <- matrix(c(sprintf('sample_%d',seq_len(N)),runif(N)),ncol=2L,dimnames=list(NULL,c('samples','value')));
x <- bgoldst(mymat); y <- akrun(mymat); identical(structure(transform(x[order(x$combination),],combination=as.character(combination)),row.names=seq_len(nrow(x))),structure(transform(y[order(y$combination),],combination=as.character(combination)),row.names=seq_len(nrow(y)))); ## annoyingly involved line of code to obviate row order, factor levels order, and row names differences
## [1] TRUE
microbenchmark(bgoldst(mymat),akrun(mymat),times=3L);
## Unit: seconds
## expr min lq mean median uq max neval
## bgoldst(mymat) 8.103589 8.328722 8.418285 8.553854 8.575633 8.597411 3
## akrun(mymat) 30.777301 31.152458 31.348615 31.527615 31.634272 31.740929 3