R合并距离矩阵 我有以下问题,我想合并距离矩阵,考虑下面的数据: x1 <- c(2,2,2,3,1,2,4,6,1,2,4) y1 <- c(5,4,3,3,4,2,1,6,4,2,3) x2 <- c(8,2,7,3,1,2,2,2,1,2,6) y2 <- c(1,3,3,3,1,2,4,3,1,2,8) x3 <- c(4,4,1,2,4,6,3,2,4,6,9) y3 <- c(1,2,3,3,1,2,4,6,1,2,1) x4 <- c(4,4,1,2,4,6,3,2,4,6,9) y4 <- c(1,2,3,3,1,2,4,6,1,2,1) x5 <- c(4,1,2,4,6,2,3,3,6,2,9) y5 <- c(1,3,3,3,1,2,4,6,1,2,1) id1 <- c("a","a","a","a","b","b","b","b","b","b","b") dat <- data.frame(x1,y1,x2,y2,x3,y3,id1)

R合并距离矩阵 我有以下问题,我想合并距离矩阵,考虑下面的数据: x1 <- c(2,2,2,3,1,2,4,6,1,2,4) y1 <- c(5,4,3,3,4,2,1,6,4,2,3) x2 <- c(8,2,7,3,1,2,2,2,1,2,6) y2 <- c(1,3,3,3,1,2,4,3,1,2,8) x3 <- c(4,4,1,2,4,6,3,2,4,6,9) y3 <- c(1,2,3,3,1,2,4,6,1,2,1) x4 <- c(4,4,1,2,4,6,3,2,4,6,9) y4 <- c(1,2,3,3,1,2,4,6,1,2,1) x5 <- c(4,1,2,4,6,2,3,3,6,2,9) y5 <- c(1,3,3,3,1,2,4,6,1,2,1) id1 <- c("a","a","a","a","b","b","b","b","b","b","b") dat <- data.frame(x1,y1,x2,y2,x3,y3,id1),r,merge,R,Merge,请注意,实际上大约有20个组,总共大约有10000行数据。这只是一个简化版本。谢谢 以下结合了许多简单的步骤,但它们都有效: > aa = dist(dat[dat$id1=='a',], upper=T) Warning message: In dist(dat[dat$id1 == "a", ], upper = T) : NAs introduced by coercion > bb = dist(dat[dat$id1=='b',], upper=T) Warning mes

请注意,实际上大约有20个组,总共大约有10000行数据。这只是一个简化版本。谢谢

以下结合了许多简单的步骤,但它们都有效:

> aa = dist(dat[dat$id1=='a',], upper=T)
Warning message:
In dist(dat[dat$id1 == "a", ], upper = T) : NAs introduced by coercion
> bb = dist(dat[dat$id1=='b',], upper=T)
Warning message:
In dist(dat[dat$id1 == "b", ], upper = T) : NAs introduced by coercion
> 
> aa
         1        2        3        4
1          7.000000 5.066228 7.000000
2 7.000000          6.480741 3.055050
3 5.066228 6.480741          4.582576
4 7.000000 3.055050 4.582576         

> bb
           5         6         7         8         9        10        11
5             3.741657  6.658328  8.573214  0.000000  3.741657 11.276820
6   3.741657            5.066228  8.708234  3.741657  0.000000  8.841191
7   6.658328  5.066228            6.390097  6.658328  5.066228  9.721111
8   8.573214  8.708234  6.390097            8.573214  8.708234 12.220202
9   0.000000  3.741657  6.658328  8.573214            3.741657 11.276820
10  3.741657  0.000000  5.066228  8.708234  3.741657            8.841191
11 11.276820  8.841191  9.721111 12.220202 11.276820  8.841191          
> 

> aadf = data.frame(as.matrix(aa))
> aadf
        X1       X2       X3       X4
1 0.000000 7.000000 5.066228 7.000000
2 7.000000 0.000000 6.480741 3.055050
3 5.066228 6.480741 0.000000 4.582576
4 7.000000 3.055050 4.582576 0.000000
> 
> bbdf = data.frame(as.matrix(bb))
> bbdf
          X5       X6       X7        X8        X9      X10       X11
5   0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820
6   3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
7   6.658328 5.066228 0.000000  6.390097  6.658328 5.066228  9.721111
8   8.573214 8.708234 6.390097  0.000000  8.573214 8.708234 12.220202
9   0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820
10  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
11 11.276820 8.841191 9.721111 12.220202 11.276820 8.841191  0.000000
> 
> 
> aadf$newcol = 1:4
> aadf
        X1       X2       X3       X4 newcol
1 0.000000 7.000000 5.066228 7.000000      1
2 7.000000 0.000000 6.480741 3.055050      2
3 5.066228 6.480741 0.000000 4.582576      3
4 7.000000 3.055050 4.582576 0.000000      4
> 
> 
> bbdf$newcol = 5:11
> bbdf
          X5       X6       X7        X8        X9      X10       X11 newcol
5   0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820      5
6   3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191      6
7   6.658328 5.066228 0.000000  6.390097  6.658328 5.066228  9.721111      7
8   8.573214 8.708234 6.390097  0.000000  8.573214 8.708234 12.220202      8
9   0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820      9
10  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191     10
11 11.276820 8.841191 9.721111 12.220202 11.276820 8.841191  0.000000     11
> 

> dat$newcol = rownames(dat)
> dat
   x1 y1 x2 y2 x3 y3 id1 newcol
1   2  5  8  1  4  1   a      1
2   2  4  2  3  4  2   a      2
3   2  3  7  3  1  3   a      3
4   3  3  3  3  2  3   a      4
5   1  4  1  1  4  1   b      5
6   2  2  2  2  6  2   b      6
7   4  1  2  4  3  4   b      7
8   6  6  2  3  2  6   b      8
9   1  4  1  1  4  1   b      9
10  2  2  2  2  6  2   b     10
11  4  3  6  8  9  1   b     11
> 

> aa1 = merge(dat, aadf, id='newcol')
> bb1 = merge(dat, bbdf, id='newcol')
> 
> aa1
  newcol x1 y1 x2 y2 x3 y3 id1       X1       X2       X3       X4
1      1  2  5  8  1  4  1   a 0.000000 7.000000 5.066228 7.000000
2      2  2  4  2  3  4  2   a 7.000000 0.000000 6.480741 3.055050
3      3  2  3  7  3  1  3   a 5.066228 6.480741 0.000000 4.582576
4      4  3  3  3  3  2  3   a 7.000000 3.055050 4.582576 0.000000
> bb1
  newcol x1 y1 x2 y2 x3 y3 id1        X5       X6       X7        X8        X9      X10       X11
1     10  2  2  2  2  6  2   b  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
2     11  4  3  6  8  9  1   b 11.276820 8.841191 9.721111 12.220202 11.276820 8.841191  0.000000
3      5  1  4  1  1  4  1   b  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820
4      6  2  2  2  2  6  2   b  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
5      7  4  1  2  4  3  4   b  6.658328 5.066228 0.000000  6.390097  6.658328 5.066228  9.721111
6      8  6  6  2  3  2  6   b  8.573214 8.708234 6.390097  0.000000  8.573214 8.708234 12.220202
7      9  1  4  1  1  4  1   b  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820

> datfinal = merge(aa1, bb1, id='newcol', all=T)
> datfinal
   newcol x1 y1 x2 y2 x3 y3 id1       X1       X2       X3       X4        X5       X6       X7        X8        X9      X10
1       1  2  5  8  1  4  1   a 0.000000 7.000000 5.066228 7.000000        NA       NA       NA        NA        NA       NA
2      10  2  2  2  2  6  2   b       NA       NA       NA       NA  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000
3      11  4  3  6  8  9  1   b       NA       NA       NA       NA 11.276820 8.841191 9.721111 12.220202 11.276820 8.841191
4       2  2  4  2  3  4  2   a 7.000000 0.000000 6.480741 3.055050        NA       NA       NA        NA        NA       NA
5       3  2  3  7  3  1  3   a 5.066228 6.480741 0.000000 4.582576        NA       NA       NA        NA        NA       NA
6       4  3  3  3  3  2  3   a 7.000000 3.055050 4.582576 0.000000        NA       NA       NA        NA        NA       NA
7       5  1  4  1  1  4  1   b       NA       NA       NA       NA  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657
8       6  2  2  2  2  6  2   b       NA       NA       NA       NA  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000
9       7  4  1  2  4  3  4   b       NA       NA       NA       NA  6.658328 5.066228 0.000000  6.390097  6.658328 5.066228
10      8  6  6  2  3  2  6   b       NA       NA       NA       NA  8.573214 8.708234 6.390097  0.000000  8.573214 8.708234
11      9  1  4  1  1  4  1   b       NA       NA       NA       NA  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657
         X11
1         NA
2   8.841191
3   0.000000
4         NA
5         NA
6         NA
7  11.276820
8   8.841191
9   9.721111
10 12.220202
11 11.276820
> 
编辑:

以下内容将自动处理所有组:

automatic = function(dat){
    dat$newcol = rownames(dat)
    dat$newcol = as.numeric(dat$newcol)
    gps = unique(dat$id1)
    for(gp in gps){
        aa = dist(dat[dat$id1==gp,], upper=T)
        aadf = data.frame(as.matrix(aa))
        aadf$newcol = rownames(aadf)
        aadf$newcol = as.numeric(aadf$newcol)
        dat = merge(dat, aadf, id='newcol', all=T)
    }
    dat
}

automatic(dat)
   newcol x1 y1 x2 y2 x3 y3 id1       X1       X2       X3       X4        X5        X6        X7        X8        X9       X10
1       1  2  5  8  1  4  1   a 0.000000 7.010197 5.451081 7.634508        NA        NA        NA        NA        NA        NA
2       2  2  4  2  3  4  2   a 7.010197 0.000000 6.502747 3.703280        NA        NA        NA        NA        NA        NA
3       3  2  3  7  3  1  3   a 5.451081 6.502747 0.000000 4.659859        NA        NA        NA        NA        NA        NA
4       4  3  3  3  3  2  3   a 7.634508 3.703280 4.659859 0.000000        NA        NA        NA        NA        NA        NA
5       5  1  4  1  1  4  1   b       NA       NA       NA       NA  0.000000  4.720775  8.485281 11.109841  5.237229  7.964206
6       6  2  2  2  2  6  2   b       NA       NA       NA       NA  4.720775  0.000000  6.279217 10.875924  6.000000  5.237229
7       7  4  1  2  4  3  4   b       NA       NA       NA       NA  8.485281  6.279217  0.000000  7.855844  8.485281  7.289915
8       8  6  6  2  3  2  6   b       NA       NA       NA       NA 11.109841 10.875924  7.855844  0.000000 10.474459 10.875924
9       9  1  4  1  1  4  1   b       NA       NA       NA       NA  5.237229  6.000000  8.485281 10.474459  0.000000  4.720775
10     10  2  2  2  2  6  2   b       NA       NA       NA       NA  7.964206  5.237229  7.289915 10.875924  4.720775  0.000000
11     11  4  3  6  8  9  1   b       NA       NA       NA       NA 15.766148 12.558435 12.895182 15.325050 13.918128 10.796825
        X11
1        NA
2        NA
3        NA
4        NA
5  15.76615
6  12.55843
7  12.89518
8  15.32505
9  13.91813
10 10.79682
11  0.00000
Warning messages:
1: In dist(dat[dat$id1 == gp, ], upper = T) : NAs introduced by coercion
2: In dist(dat[dat$id1 == gp, ], upper = T) : NAs introduced by coercion

这是另一种不可知的方式,关于你有多少个x1,y1对。它确实假设只有
a
b
组,但不包括例如
c

这显然不同于rnso提供的内容,因为his为一些
b
组包括
NAs
,而我的没有。如果这就是你想要的,那就不要用这个

a <- as.matrix(distance[["a"]])
b <- as.matrix(distance[["b"]])
a_na <- matrix(NA, nrow = nrow(a), ncol = ncol(b) - ncol(a))
a_merged <- cbind(a, a_na)
ab_merged <- rbind(a_merged, b)
dat2 <- cbind(dat, ab_merged)
names(dat2)[(ncol(dat) + 1):ncol(dat2)] <- paste0("Dist", 1:ncol(ab_merged))

您希望最终输出是什么样子?为什么
rbind
?为什么不
cbind
?我想首先需要合并矩阵(rbind),然后绑定到原始数据(cbind)。不知何故,我所知道的一切都不管用。有没有更“自动”的方法?原始数据集有更多的组和10000行-谢谢!此外,kann矩阵从同一个X变量开始,因此之前没有空行?再次感谢,但是我还有多个组。。。大概有100多个。有什么建议吗?@tobias sch:请看我上面的编辑。它自动处理所有组。
automatic = function(dat){
    dat$newcol = rownames(dat)
    dat$newcol = as.numeric(dat$newcol)
    gps = unique(dat$id1)
    for(gp in gps){
        aa = dist(dat[dat$id1==gp,], upper=T)
        aadf = data.frame(as.matrix(aa))
        aadf$newcol = rownames(aadf)
        aadf$newcol = as.numeric(aadf$newcol)
        dat = merge(dat, aadf, id='newcol', all=T)
    }
    dat
}

automatic(dat)
   newcol x1 y1 x2 y2 x3 y3 id1       X1       X2       X3       X4        X5        X6        X7        X8        X9       X10
1       1  2  5  8  1  4  1   a 0.000000 7.010197 5.451081 7.634508        NA        NA        NA        NA        NA        NA
2       2  2  4  2  3  4  2   a 7.010197 0.000000 6.502747 3.703280        NA        NA        NA        NA        NA        NA
3       3  2  3  7  3  1  3   a 5.451081 6.502747 0.000000 4.659859        NA        NA        NA        NA        NA        NA
4       4  3  3  3  3  2  3   a 7.634508 3.703280 4.659859 0.000000        NA        NA        NA        NA        NA        NA
5       5  1  4  1  1  4  1   b       NA       NA       NA       NA  0.000000  4.720775  8.485281 11.109841  5.237229  7.964206
6       6  2  2  2  2  6  2   b       NA       NA       NA       NA  4.720775  0.000000  6.279217 10.875924  6.000000  5.237229
7       7  4  1  2  4  3  4   b       NA       NA       NA       NA  8.485281  6.279217  0.000000  7.855844  8.485281  7.289915
8       8  6  6  2  3  2  6   b       NA       NA       NA       NA 11.109841 10.875924  7.855844  0.000000 10.474459 10.875924
9       9  1  4  1  1  4  1   b       NA       NA       NA       NA  5.237229  6.000000  8.485281 10.474459  0.000000  4.720775
10     10  2  2  2  2  6  2   b       NA       NA       NA       NA  7.964206  5.237229  7.289915 10.875924  4.720775  0.000000
11     11  4  3  6  8  9  1   b       NA       NA       NA       NA 15.766148 12.558435 12.895182 15.325050 13.918128 10.796825
        X11
1        NA
2        NA
3        NA
4        NA
5  15.76615
6  12.55843
7  12.89518
8  15.32505
9  13.91813
10 10.79682
11  0.00000
Warning messages:
1: In dist(dat[dat$id1 == gp, ], upper = T) : NAs introduced by coercion
2: In dist(dat[dat$id1 == gp, ], upper = T) : NAs introduced by coercion
a <- as.matrix(distance[["a"]])
b <- as.matrix(distance[["b"]])
a_na <- matrix(NA, nrow = nrow(a), ncol = ncol(b) - ncol(a))
a_merged <- cbind(a, a_na)
ab_merged <- rbind(a_merged, b)
dat2 <- cbind(dat, ab_merged)
names(dat2)[(ncol(dat) + 1):ncol(dat2)] <- paste0("Dist", 1:ncol(ab_merged))
   x1 y1 x2 y2 x3 y3 id1     Dist1    Dist2    Dist3     Dist4     Dist5    Dist6     Dist7
1   2  5  8  1  4  1   a  0.000000 7.000000 5.066228  7.000000        NA       NA        NA
2   2  4  2  3  4  2   a  7.000000 0.000000 6.480741  3.055050        NA       NA        NA
3   2  3  7  3  1  3   a  5.066228 6.480741 0.000000  4.582576        NA       NA        NA
4   3  3  3  3  2  3   a  7.000000 3.055050 4.582576  0.000000        NA       NA        NA
5   1  4  1  1  4  1   b  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820
6   2  2  2  2  6  2   b  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
7   4  1  2  4  3  4   b  6.658328 5.066228 0.000000  6.390097  6.658328 5.066228  9.721111
8   6  6  2  3  2  6   b  8.573214 8.708234 6.390097  0.000000  8.573214 8.708234 12.220202
9   1  4  1  1  4  1   b  0.000000 3.741657 6.658328  8.573214  0.000000 3.741657 11.276820
10  2  2  2  2  6  2   b  3.741657 0.000000 5.066228  8.708234  3.741657 0.000000  8.841191
11  4  3  6  8  9  1   b 11.276820 8.841191 9.721111 12.220202 11.276820 8.841191  0.000000