R:根据匹配的行和列名从关联矩阵中删除项
让我们看一看示例矩阵并计算相关性:R:根据匹配的行和列名从关联矩阵中删除项,r,matrix,correlation,R,Matrix,Correlation,让我们看一看示例矩阵并计算相关性: some.data <- data.frame( A1.1 = c(1,3,4,5,6), A1.2 = c(4,5,6,2,3), A1.3 = c(3,3,4,2,1), A2.1 = c(3,4,5,2,4), A2.2 = c(4,5,5,4,2), A2.3 = c(1,1,2,2,3), A3.1 = c(1,3,4,5,6), A3.2 = c(1,4,3,3,4), A3.3 = c(4,4,4,4,5
some.data <- data.frame(
A1.1 = c(1,3,4,5,6),
A1.2 = c(4,5,6,2,3),
A1.3 = c(3,3,4,2,1),
A2.1 = c(3,4,5,2,4),
A2.2 = c(4,5,5,4,2),
A2.3 = c(1,1,2,2,3),
A3.1 = c(1,3,4,5,6),
A3.2 = c(1,4,3,3,4),
A3.3 = c(4,4,4,4,5)
)
cor.mat <- cor(some.data)
在我的原始数据中,有些列是相关的,这里用前缀(A1、A2、A3)表示。由于我对这些不感兴趣,我想将具有相同前缀的相关性设置为零,如下所示: A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3
A1.1 0 0 0 0.06839411 -0.5305954 0.9009862 1.00000000 0.7428336 0.6393620
A1.2 0 0 0 0.83205029 0.6454972 -0.3779645 -0.41099747 0.0000000 -0.3535534
A1.3 0 0 0 0.42307692 0.8951436 -0.6289709 -0.61554702 -0.3580574 -0.7844645
A2.1 0.06839411 0.8320503 0.4230769 0 0 0 0.06839411 0.3580574 0.1961161
A2.2 -0.53059545 0.6454972 0.8951436 0 0 0 -0.53059545 -0.1666667 -0.9128709
A2.3 0.90098616 -0.3779645 -0.6289709 0 0 0 0.90098616 0.4879500 0.8017837
A3.1 1.00000000 -0.4109975 -0.6155470 0.06839411 -0.5305954 0.9009862 0 0 0
A3.2 0.74283363 0.0000000 -0.3580574 0.35805744 -0.1666667 0.4879500 0 0 0
A3.3 0.63936201 -0.3535534 -0.7844645 0.19611614 -0.9128709 0.8017837 0 0 0
我可以使用for循环来实现这一点,但我认为这比那容易得多。一个选项是将数据从宽改为长,这样它将包含三列
cor.mat_long <- reshape2::melt(cor.mat)
cor.mat_long
# Var1 Var2 value
#1 A1.1 A1.1 1.00000000
#2 A1.2 A1.1 -0.41099747
#3 A1.3 A1.1 -0.61554702
#4 A2.1 A1.1 0.06839411
#5 A2.2 A1.1 -0.53059545
#6 A2.3 A1.1 0.90098616
#...
最后,重新调整为宽格式
cor.mat2 <- reshape2::dcast(cor.mat_long, Var1 ~ Var2)
cor.mat2
# Var1 A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3
#1 A1.1 0.00000000 0.0000000 0.0000000 0.06839411 -0.5305954 0.9009862 1.00000000 0.7428336 0.6393620
#2 A1.2 0.00000000 0.0000000 0.0000000 0.83205029 0.6454972 -0.3779645 -0.41099747 0.0000000 -0.3535534
#3 A1.3 0.00000000 0.0000000 0.0000000 0.42307692 0.8951436 -0.6289709 -0.61554702 -0.3580574 -0.7844645
#4 A2.1 0.06839411 0.8320503 0.4230769 0.00000000 0.0000000 0.0000000 0.06839411 0.3580574 0.1961161
#5 A2.2 -0.53059545 0.6454972 0.8951436 0.00000000 0.0000000 0.0000000 -0.53059545 -0.1666667 -0.9128709
#6 A2.3 0.90098616 -0.3779645 -0.6289709 0.00000000 0.0000000 0.0000000 0.90098616 0.4879500 0.8017837
#7 A3.1 1.00000000 -0.4109975 -0.6155470 0.06839411 -0.5305954 0.9009862 0.00000000 0.0000000 0.0000000
#8 A3.2 0.74283363 0.0000000 -0.3580574 0.35805744 -0.1666667 0.4879500 0.00000000 0.0000000 0.0000000
#9 A3.3 0.63936201 -0.3535534 -0.7844645 0.19611614 -0.9128709 0.8017837 0.00000000 0.0000000 0.0000000
不知道这是否比循环容易得多。一个选项是将数据从宽改为长,使其包含三列
cor.mat_long <- reshape2::melt(cor.mat)
cor.mat_long
# Var1 Var2 value
#1 A1.1 A1.1 1.00000000
#2 A1.2 A1.1 -0.41099747
#3 A1.3 A1.1 -0.61554702
#4 A2.1 A1.1 0.06839411
#5 A2.2 A1.1 -0.53059545
#6 A2.3 A1.1 0.90098616
#...
最后,重新调整为宽格式
cor.mat2 <- reshape2::dcast(cor.mat_long, Var1 ~ Var2)
cor.mat2
# Var1 A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3
#1 A1.1 0.00000000 0.0000000 0.0000000 0.06839411 -0.5305954 0.9009862 1.00000000 0.7428336 0.6393620
#2 A1.2 0.00000000 0.0000000 0.0000000 0.83205029 0.6454972 -0.3779645 -0.41099747 0.0000000 -0.3535534
#3 A1.3 0.00000000 0.0000000 0.0000000 0.42307692 0.8951436 -0.6289709 -0.61554702 -0.3580574 -0.7844645
#4 A2.1 0.06839411 0.8320503 0.4230769 0.00000000 0.0000000 0.0000000 0.06839411 0.3580574 0.1961161
#5 A2.2 -0.53059545 0.6454972 0.8951436 0.00000000 0.0000000 0.0000000 -0.53059545 -0.1666667 -0.9128709
#6 A2.3 0.90098616 -0.3779645 -0.6289709 0.00000000 0.0000000 0.0000000 0.90098616 0.4879500 0.8017837
#7 A3.1 1.00000000 -0.4109975 -0.6155470 0.06839411 -0.5305954 0.9009862 0.00000000 0.0000000 0.0000000
#8 A3.2 0.74283363 0.0000000 -0.3580574 0.35805744 -0.1666667 0.4879500 0.00000000 0.0000000 0.0000000
#9 A3.3 0.63936201 -0.3535534 -0.7844645 0.19611614 -0.9128709 0.8017837 0.00000000 0.0000000 0.0000000
不知道这是否比循环容易得多。我们可以用1的块对角矩阵相乘
library(Matrix)
as.matrix(cor.mat * !bdiag(replicate(3, matrix(1, 3, 3), simplify = FALSE)))
# A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3
#A1.1 0.00000000 0.0000000 0.0000000 0.06839411 -0.5305954 0.9009862 1.00000000 0.7428336 0.6393620
#A1.2 0.00000000 0.0000000 0.0000000 0.83205029 0.6454972 -0.3779645 -0.41099747 0.0000000 -0.3535534
#A1.3 0.00000000 0.0000000 0.0000000 0.42307692 0.8951436 -0.6289709 -0.61554702 -0.3580574 -0.7844645
#A2.1 0.06839411 0.8320503 0.4230769 0.00000000 0.0000000 0.0000000 0.06839411 0.3580574 0.1961161
#A2.2 -0.53059545 0.6454972 0.8951436 0.00000000 0.0000000 0.0000000 -0.53059545 -0.1666667 -0.9128709
#A2.3 0.90098616 -0.3779645 -0.6289709 0.00000000 0.0000000 0.0000000 0.90098616 0.4879500 0.8017837
#A3.1 1.00000000 -0.4109975 -0.6155470 0.06839411 -0.5305954 0.9009862 0.00000000 0.0000000 0.0000000
#A3.2 0.74283363 0.0000000 -0.3580574 0.35805744 -0.1666667 0.4879500 0.00000000 0.0000000 0.0000000
#A3.3 0.63936201 -0.3535534 -0.7844645 0.19611614 -0.9128709 0.8017837 0.00000000 0.0000000 0.0000000
或者另一个选项是使用
行/列
索引
replace(cor.mat, cbind(rep(1:9, each = 3),
c(sapply(list(1:3, 4:6, 7:9), rep, 3))), 0)
或者使用
outer
构造逻辑矩阵并与cor.mat相乘
nm1 <- sub("\\.\\d+$", "", colnames(cor.mat))
cor.mat * outer(nm1, nm1, `!=`)
nm1我们可以用1的块对角矩阵相乘
library(Matrix)
as.matrix(cor.mat * !bdiag(replicate(3, matrix(1, 3, 3), simplify = FALSE)))
# A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3
#A1.1 0.00000000 0.0000000 0.0000000 0.06839411 -0.5305954 0.9009862 1.00000000 0.7428336 0.6393620
#A1.2 0.00000000 0.0000000 0.0000000 0.83205029 0.6454972 -0.3779645 -0.41099747 0.0000000 -0.3535534
#A1.3 0.00000000 0.0000000 0.0000000 0.42307692 0.8951436 -0.6289709 -0.61554702 -0.3580574 -0.7844645
#A2.1 0.06839411 0.8320503 0.4230769 0.00000000 0.0000000 0.0000000 0.06839411 0.3580574 0.1961161
#A2.2 -0.53059545 0.6454972 0.8951436 0.00000000 0.0000000 0.0000000 -0.53059545 -0.1666667 -0.9128709
#A2.3 0.90098616 -0.3779645 -0.6289709 0.00000000 0.0000000 0.0000000 0.90098616 0.4879500 0.8017837
#A3.1 1.00000000 -0.4109975 -0.6155470 0.06839411 -0.5305954 0.9009862 0.00000000 0.0000000 0.0000000
#A3.2 0.74283363 0.0000000 -0.3580574 0.35805744 -0.1666667 0.4879500 0.00000000 0.0000000 0.0000000
#A3.3 0.63936201 -0.3535534 -0.7844645 0.19611614 -0.9128709 0.8017837 0.00000000 0.0000000 0.0000000
或者另一个选项是使用行/列
索引
replace(cor.mat, cbind(rep(1:9, each = 3),
c(sapply(list(1:3, 4:6, 7:9), rep, 3))), 0)
或者使用outer
构造逻辑矩阵并与cor.mat相乘
nm1 <- sub("\\.\\d+$", "", colnames(cor.mat))
cor.mat * outer(nm1, nm1, `!=`)
nm1仅当此示例的模式代表整个数据时,此选项才有效。如果依赖列的数量是可变的,那就不起作用(注意:我确实喜欢这个非常干净的解决方案!)很公平:)我对它投了更高的票,因为它真的很棒。我的评论是为OP做准备的,以防他的例子不能代表整个数据。只有当这个例子的模式代表整个数据时,这才有效。如果依赖列的数量是可变的,那就不起作用(注意:我确实喜欢这个非常干净的解决方案!)很公平:)我对它投了更高的票,因为它真的很棒。我的评论是为了提醒OP,以防他的例子不能代表全部数据。