在R中对齐字符矩阵中的类似行

在R中对齐字符矩阵中的类似行,r,matrix,character,R,Matrix,Character,我有一个结构如下的字符矩阵: dog 1 cow 9 mouse 7 bird 10 tiger 1 gnu 2 tiger 3 deer 7 deer 27 skunk 2 rat 50 NA NA mouse 8 snake 3 NA NA cow 7 NA NA NA NA sheep 21 NA NA NA

我有一个结构如下的字符矩阵:

dog    1   cow    9     mouse  7 
bird   10  tiger  1     gnu    2
tiger  3   deer   7     deer   27
skunk  2   rat    50    NA     NA
mouse  8   snake  3     NA     NA 
cow    7   NA     NA    NA     NA
sheep  21  NA     NA    NA     NA 
gnu    5   NA     NA    NA     NA 
假设这是区域设置中的动物矩阵,每个区域设置的数据由连续的列对定义。一些动物可能在不同的地区很常见,但地区也可能有独特的动物。最后,我想为这些数据制作一个热图,因此需要对该矩阵进行重新排序,使其具有一个结构,其中所有类型的动物都有一列,每个区域设置中的数字对应的连续列:

dog    1    NA    NA 
tiger  3    1     NA 
skunk  2    NA    NA
mouse  8    NA    NA
cow    7    9     NA
sheep  21   NA    NA
gnu    5    NA    2
deer   NA   7     27
rat    NA   50    NA
snake  NA   3     NA
mouse  NA   NA    7
bird   10   NA    NA
换句话说,我有

A1 <- c("dog", "bird", "tiger", "skunk", "mouse", "cow", "sheep", "gnu")
B1 <- as.character(c(1, 10, 3, 2, 8, 7, 21, 5))
A2 <- c("cow", "tiger", "deer", "rat", "snake", NA, NA, NA)
B2 <- as.character(c(9, 1, 7, 50, 3, NA, NA, NA))
A3 <- c("mouse", "gnu", "deer", NA, NA, NA, NA, NA)
B3 <- as.character(c(7, 2, 27, NA, NA, NA, NA, NA))
TheMatrix <- cbind(A1, B1, A2, B2, A3, B3)
A1以下是我的看法:

> x <- read.table(text = "
+ dog    1   cow    9     mouse  7 
+ bird   10  tiger  1     gnu    2
+ tiger  3   deer   7     deer   27
+ skunk  2   rat    50    NA     NA
+ mouse  8   snake  3     NA     NA 
+ cow    7   NA     NA    NA     NA
+ sheep  21  NA     NA    NA     NA 
+ gnu    5   NA     NA    NA     NA ")
C.最后,使用
reformae2
软件包中的
dcast

> library(reshape2)
> dcast(x, animal ~ locale, fun.aggregate = sum, value.var = "count")
   animal  1  2  3
1    bird 10  0  0
2     cow  7  9  0
3    deer  0  7 27
4     dog  1  0  0
5     gnu  5  0  2
6   mouse  8  0  7
7     rat  0 50  0
8   sheep 21  0  0
9   skunk  2  0  0
10  snake  0  3  0
11  tiger  3  1  0
12   <NA>  0 NA NA
>库(2)
>dcast(x,animal~locale,fun.aggregate=sum,value.var=“count”)
动物1 2 3
1只鸟10 0 0
2头牛7 9 0
3鹿0 7 27
4只狗10 0
5GNU502
6鼠标807
7只老鼠0 50 0
8只羊21 0 0
9臭鼬2 0 0
10.03.0
11老虎3110
120NA
D.清理输出并用
NA
替换
0
的最后一步留给读者作为练习:)。

库(重塑2)
ncol=ncol(矩阵)
nrow=nrow(矩阵)
dcast(如数据帧所示)(na.省略(cbind(c)(矩阵[,序号(1,ncols,2)]),
c(TheMatrix[,seq(2,ncols,2)],
代表(colnames(TheMatrix)[seq(2,ncols,2)],
每个=nrows)),
V1~V3,value.var='V2')
#V1 B1 B2 B3
#一只鸟10
#2头牛7 9
#3鹿7 27
#4狗1
#5 gnu 5 2
#6鼠标8 7
#7老鼠50
#8只绵羊21
#9臭鼬2
#10蛇3
#11老虎3 1

这里发生了很多事情(每个都很简单),要理解这些事情,只需自己运行每一点(从内部和外部开始)。

这里是一个带有
Reduce

#provide number of locales
max_locale=3
#this list contains the column numbers we want to use to split TheMatrix
split_list=split(1:(2*max_locale),sort(rep(1:max_locale,2)))

#this function will be used to re-merge the split matrix
my_locale_merge=function(x,y) {
    merge(x,y,by.x=colnames(x)[1],by.y=colnames(y)[1],all=TRUE)
}

#the outer subset is used to get rid of the NA animals
subset(
    #reduce subsequently applies my_locale_merge to the split matrix
    Reduce(
        "my_locale_merge",
        #lapply is used to split the matrix
        lapply(split_list,function(x) {
            as.data.frame(TheMatrix[,x,drop=FALSE],stringsAsFactors=FALSE)
            })
        ),
    !is.na(A1)
)

据我所知,
Reduce
不允许用户传递额外的函数参数,如
by.x
。因此,我定义了一个新函数
my\u locale\u merge
,用于处理这些参数。

谢谢,我从未使用过整形2,这就成功了!谢谢,你的评论很有帮助。我将NAs删除:)
> for (i in 1:(ncol(x)/2)) names(ll[[i]])[1:2] <- c("animal", "count")
> x <- Reduce(rbind, ll)
   animal count locale
1     dog     1      1
2    bird    10      1
3   tiger     3      1
4   skunk     2      1
5   mouse     8      1
6     cow     7      1
7   sheep    21      1
8     gnu     5      1
9     cow     9      2
10  tiger     1      2
11   deer     7      2
12    rat    50      2
13  snake     3      2
14   <NA>    NA      2
15   <NA>    NA      2
16   <NA>    NA      2
17  mouse     7      3
18    gnu     2      3
19   deer    27      3
20   <NA>    NA      3
21   <NA>    NA      3
22   <NA>    NA      3
23   <NA>    NA      3
24   <NA>    NA      3
> library(reshape2)
> dcast(x, animal ~ locale, fun.aggregate = sum, value.var = "count")
   animal  1  2  3
1    bird 10  0  0
2     cow  7  9  0
3    deer  0  7 27
4     dog  1  0  0
5     gnu  5  0  2
6   mouse  8  0  7
7     rat  0 50  0
8   sheep 21  0  0
9   skunk  2  0  0
10  snake  0  3  0
11  tiger  3  1  0
12   <NA>  0 NA NA
library(reshape2)

ncols = ncol(TheMatrix)
nrows = nrow(TheMatrix)

dcast(as.data.frame(na.omit(cbind(c(TheMatrix[,seq(1,ncols,2)]),
                                  c(TheMatrix[,seq(2,ncols,2)]),
                                  rep(colnames(TheMatrix)[seq(2,ncols,2)],
                                      each = nrows)))),
      V1 ~ V3, value.var = 'V2')
#      V1   B1   B2   B3
#1   bird   10 <NA> <NA>
#2    cow    7    9 <NA>
#3   deer <NA>    7   27
#4    dog    1 <NA> <NA>
#5    gnu    5 <NA>    2
#6  mouse    8 <NA>    7
#7    rat <NA>   50 <NA>
#8  sheep   21 <NA> <NA>
#9  skunk    2 <NA> <NA>
#10 snake <NA>    3 <NA>
#11 tiger    3    1 <NA>
#provide number of locales
max_locale=3
#this list contains the column numbers we want to use to split TheMatrix
split_list=split(1:(2*max_locale),sort(rep(1:max_locale,2)))

#this function will be used to re-merge the split matrix
my_locale_merge=function(x,y) {
    merge(x,y,by.x=colnames(x)[1],by.y=colnames(y)[1],all=TRUE)
}

#the outer subset is used to get rid of the NA animals
subset(
    #reduce subsequently applies my_locale_merge to the split matrix
    Reduce(
        "my_locale_merge",
        #lapply is used to split the matrix
        lapply(split_list,function(x) {
            as.data.frame(TheMatrix[,x,drop=FALSE],stringsAsFactors=FALSE)
            })
        ),
    !is.na(A1)
)