在R中对齐字符矩阵中的类似行_R_Matrix_Character

在R中对齐字符矩阵中的类似行

r matrix

在R中对齐字符矩阵中的类似行,r,matrix,character,R,Matrix,Character,我有一个结构如下的字符矩阵： dog 1 cow 9 mouse 7 bird 10 tiger 1 gnu 2 tiger 3 deer 7 deer 27 skunk 2 rat 50 NA NA mouse 8 snake 3 NA NA cow 7 NA NA NA NA sheep 21 NA NA NA

我有一个结构如下的字符矩阵：

dog    1   cow    9     mouse  7 
bird   10  tiger  1     gnu    2
tiger  3   deer   7     deer   27
skunk  2   rat    50    NA     NA
mouse  8   snake  3     NA     NA 
cow    7   NA     NA    NA     NA
sheep  21  NA     NA    NA     NA 
gnu    5   NA     NA    NA     NA

假设这是区域设置中的动物矩阵，每个区域设置的数据由连续的列对定义。一些动物可能在不同的地区很常见，但地区也可能有独特的动物。最后，我想为这些数据制作一个热图，因此需要对该矩阵进行重新排序，使其具有一个结构，其中所有类型的动物都有一列，每个区域设置中的数字对应的连续列：

dog    1    NA    NA 
tiger  3    1     NA 
skunk  2    NA    NA
mouse  8    NA    NA
cow    7    9     NA
sheep  21   NA    NA
gnu    5    NA    2
deer   NA   7     27
rat    NA   50    NA
snake  NA   3     NA
mouse  NA   NA    7
bird   10   NA    NA

换句话说，我有

A1 <- c("dog", "bird", "tiger", "skunk", "mouse", "cow", "sheep", "gnu")
B1 <- as.character(c(1, 10, 3, 2, 8, 7, 21, 5))
A2 <- c("cow", "tiger", "deer", "rat", "snake", NA, NA, NA)
B2 <- as.character(c(9, 1, 7, 50, 3, NA, NA, NA))
A3 <- c("mouse", "gnu", "deer", NA, NA, NA, NA, NA)
B3 <- as.character(c(7, 2, 27, NA, NA, NA, NA, NA))
TheMatrix <- cbind(A1, B1, A2, B2, A3, B3)

A1以下是我的看法：
> x <- read.table(text = "
+ dog    1   cow    9     mouse  7 
+ bird   10  tiger  1     gnu    2
+ tiger  3   deer   7     deer   27
+ skunk  2   rat    50    NA     NA
+ mouse  8   snake  3     NA     NA 
+ cow    7   NA     NA    NA     NA
+ sheep  21  NA     NA    NA     NA 
+ gnu    5   NA     NA    NA     NA ")

C.最后，使用reformae2
软件包中的dcast
：
> library(reshape2)
> dcast(x, animal ~ locale, fun.aggregate = sum, value.var = "count")
   animal  1  2  3
1    bird 10  0  0
2     cow  7  9  0
3    deer  0  7 27
4     dog  1  0  0
5     gnu  5  0  2
6   mouse  8  0  7
7     rat  0 50  0
8   sheep 21  0  0
9   skunk  2  0  0
10  snake  0  3  0
11  tiger  3  1  0
12   <NA>  0 NA NA

>库（2）
>dcast（x，animal~locale，fun.aggregate=sum，value.var=“count”）
动物1 2 3
1只鸟10 0 0
2头牛7 9 0
3鹿0 7 27
4只狗10 0
5GNU502
6鼠标807
7只老鼠0 50 0
8只羊21 0 0
9臭鼬2 0 0
10.03.0
11老虎3110
120NA

D.清理输出并用NA
替换0
的最后一步留给读者作为练习：）。
库（重塑2）
ncol=ncol（矩阵）
nrow=nrow（矩阵）
dcast（如数据帧所示）（na.省略（cbind（c）（矩阵[，序号（1，ncols，2）]），
c（TheMatrix[，seq（2，ncols，2）]，
代表（colnames（TheMatrix）[seq（2，ncols，2）]，
每个=nrows）），
V1~V3，value.var='V2'）
#V1 B1 B2 B3
#一只鸟10
#2头牛7 9
#3鹿7 27
#4狗1
#5 gnu 5 2
#6鼠标8 7
#7老鼠50
#8只绵羊21
#9臭鼬2
#10蛇3
#11老虎3 1

这里发生了很多事情（每个都很简单），要理解这些事情，只需自己运行每一点（从内部和外部开始）。
这里是一个带有Reduce

#provide number of locales
max_locale=3
#this list contains the column numbers we want to use to split TheMatrix
split_list=split(1:(2*max_locale),sort(rep(1:max_locale,2)))

#this function will be used to re-merge the split matrix
my_locale_merge=function(x,y) {
    merge(x,y,by.x=colnames(x)[1],by.y=colnames(y)[1],all=TRUE)
}

#the outer subset is used to get rid of the NA animals
subset(
    #reduce subsequently applies my_locale_merge to the split matrix
    Reduce(
        "my_locale_merge",
        #lapply is used to split the matrix
        lapply(split_list,function(x) {
            as.data.frame(TheMatrix[,x,drop=FALSE],stringsAsFactors=FALSE)
            })
        ),
    !is.na(A1)
)

据我所知，Reduce
不允许用户传递额外的函数参数，如by.x
。因此，我定义了一个新函数my\u locale\u merge
，用于处理这些参数。谢谢，我从未使用过整形2，这就成功了！谢谢，你的评论很有帮助。我将NAs删除：）
> for (i in 1:(ncol(x)/2)) names(ll[[i]])[1:2] <- c("animal", "count")
> x <- Reduce(rbind, ll)
   animal count locale
1     dog     1      1
2    bird    10      1
3   tiger     3      1
4   skunk     2      1
5   mouse     8      1
6     cow     7      1
7   sheep    21      1
8     gnu     5      1
9     cow     9      2
10  tiger     1      2
11   deer     7      2
12    rat    50      2
13  snake     3      2
14   <NA>    NA      2
15   <NA>    NA      2
16   <NA>    NA      2
17  mouse     7      3
18    gnu     2      3
19   deer    27      3
20   <NA>    NA      3
21   <NA>    NA      3
22   <NA>    NA      3
23   <NA>    NA      3
24   <NA>    NA      3

> library(reshape2)
> dcast(x, animal ~ locale, fun.aggregate = sum, value.var = "count")
   animal  1  2  3
1    bird 10  0  0
2     cow  7  9  0
3    deer  0  7 27
4     dog  1  0  0
5     gnu  5  0  2
6   mouse  8  0  7
7     rat  0 50  0
8   sheep 21  0  0
9   skunk  2  0  0
10  snake  0  3  0
11  tiger  3  1  0
12   <NA>  0 NA NA

library(reshape2)

ncols = ncol(TheMatrix)
nrows = nrow(TheMatrix)

dcast(as.data.frame(na.omit(cbind(c(TheMatrix[,seq(1,ncols,2)]),
                                  c(TheMatrix[,seq(2,ncols,2)]),
                                  rep(colnames(TheMatrix)[seq(2,ncols,2)],
                                      each = nrows)))),
      V1 ~ V3, value.var = 'V2')
#      V1   B1   B2   B3
#1   bird   10 <NA> <NA>
#2    cow    7    9 <NA>
#3   deer <NA>    7   27
#4    dog    1 <NA> <NA>
#5    gnu    5 <NA>    2
#6  mouse    8 <NA>    7
#7    rat <NA>   50 <NA>
#8  sheep   21 <NA> <NA>
#9  skunk    2 <NA> <NA>
#10 snake <NA>    3 <NA>
#11 tiger    3    1 <NA>

#provide number of locales
max_locale=3
#this list contains the column numbers we want to use to split TheMatrix
split_list=split(1:(2*max_locale),sort(rep(1:max_locale,2)))

#this function will be used to re-merge the split matrix
my_locale_merge=function(x,y) {
    merge(x,y,by.x=colnames(x)[1],by.y=colnames(y)[1],all=TRUE)
}

#the outer subset is used to get rid of the NA animals
subset(
    #reduce subsequently applies my_locale_merge to the split matrix
    Reduce(
        "my_locale_merge",
        #lapply is used to split the matrix
        lapply(split_list,function(x) {
            as.data.frame(TheMatrix[,x,drop=FALSE],stringsAsFactors=FALSE)
            })
        ),
    !is.na(A1)
)