R 映射矩阵_R_Matrix_Mapping - Fatal编程技术网

R 映射矩阵

r matrix mapping

R 映射矩阵,r,matrix,mapping,R,Matrix,Mapping,我有一个csv文件包含10000行这种类型参考号以及从一些文本文件中提取的矩阵，其中R为该格式（包含60到100行）资料我想根据NA值用ref矩阵中的值映射数据矩阵，我的意思是用等效值替换每个NA 我的预期产出是 ref smbole name r2 kn knife r3 fr door r1 ts table_spoon r3 fr door 我试过这个

我有一个csv文件包含10000行这种类型

参考号

以及从一些文本文件中提取的矩阵，其中R为该格式（包含60到100行）

资料

我想根据

NA值用ref矩阵中的值映射数据矩阵
，我的意思是用等效值替换每个NA
我的预期产出是
ref     smbole      name
r2      kn         knife
r3      fr           door
r1      ts          table_spoon
r3      fr          door

我试过这个代码，但它没有改变任何事情
ref <- as.matrix(read.delim("name.csv", sep = "\t"))

fun <- function(rowi,r) {
  res <- apply(as.data.frame(ref),1,function(x) {length(na.omit(match(na.omit(rowi),x)))})
  IND <- which(  max(data) == data  )[1]

  rowi[is.na(rowi)] <- unlist(genemap[IND,])[is.na(rowi)]
  return(rowi)
}

as.data.frame(t(apply(data, 1, fun, ref))
)

ref有点复杂，但它可能会工作
假设数据如下所示：
ref<-structure(list(ref = c("r1", "r2", "r3"), smbole = c("ts", "kn", 
"fr"), name = c("table_spoon", "knife", "door")), class = "data.frame", row.names = c(NA, -3L))

data<-structure(list(ref = c("r2", "r3", NA, NA), smbole = c("kn", 
NA, NA, NA), name = c(NA, "door", "table_spoon", "door")), class = "data.frame", row.names = c(NA, -4L))

您可以遍历dat
的每一行，在ref
中找到匹配的行，然后结果就是所有匹配的行
t(
  apply(dat, 1, function(x){
    ind <- which.max(!is.na(x)) #index of first non-NA
    ref[ref[,ind] == x[ind],] # row of ref which matches this value
  })
)

#      ref  smbole name         
# [1,] "r2" "kn"   "knife"      
# [2,] "r3" "fr"   "door"       
# [3,] "r1" "ts"   "table_spoon"
# [4,] "r3" "fr"   "door" 

t(
应用（dat，1，函数（x）{
ind具有数据更新联接的解决方案。表
：
library(data.table)
ref <- as.data.frame(ref, stringsAsFactors = F); setDT(ref)
data <- as.data.frame(data, stringsAsFactors = F); setDT(data)
for(oncol in colnames(ref)){
    for(scol in setdiff(colnames(ref), oncol)){
        rcol <- paste0('i.', scol)
        data[ref, (scol) := ifelse(is.na(get(scol)), get(rcol), get(scol)), on = oncol]
    }
}

# > data
#    ref smbole        name
# 1:  r2     kn       knife
# 2:  r3     fr        door
# 3:  r1     ts table spoon
# 4:  r3     fr        door

库（data.table）
ref是ref
的每一列中的值都是唯一的（即，在ref
的列中没有值出现两次）？ref中的值是唯一的，但数据中的值可能出现两次或更多@mt1022I无法假设数据的外观，因为它们很大（1000行）我必须从CSV中读取它。我使用的数据只是你在问题中发布的数据，在表中只有一个下划线。只需在真实数据上尝试减少（…
行，看看它是否有效。@rachidrachid，ref
和data
应首先转换为数据。frame
（as.data.frame
）然后转到data.table
（使用setDT）以使用上述代码。
Reduce(function(x,y) {x[rowSums(!is.na(x))==0,]<-y[rowSums(!is.na(x))==0,];x},
           Map(function(x,y) ref[match(y,x),],ref,data))

#     ref smbole        name
#2     r2     kn       knife
#3     r3     fr        door
#NA    r1     ts table_spoon
#NA.1  r3     fr        door

t(
  apply(dat, 1, function(x){
    ind <- which.max(!is.na(x)) #index of first non-NA
    ref[ref[,ind] == x[ind],] # row of ref which matches this value
  })
)

#      ref  smbole name         
# [1,] "r2" "kn"   "knife"      
# [2,] "r3" "fr"   "door"       
# [3,] "r1" "ts"   "table_spoon"
# [4,] "r3" "fr"   "door" 

ref <- structure(c("r1", "r2", "r3", "ts", "kn", "fr", "table_spoon", 
"knife", "door"), .Dim = c(3L, 3L), .Dimnames = list(NULL, c("ref", 
"smbole", "name")))

#      ref  smbole name         
# [1,] "r1" "ts"   "table_spoon"
# [2,] "r2" "kn"   "knife"      
# [3,] "r3" "fr"   "door" 

dat <- structure(c("r2", "r3", NA, NA, "kn", NA, NA, NA, NA, "door", 
"table_spoon", "door"), .Dim = c(4L, 3L), .Dimnames = list(NULL, 
    c("ref", "smbole", "name")))

#      ref  smbole name         
# [1,] "r2" "kn"   NA           
# [2,] "r3" NA     "door"       
# [3,] NA   NA     "table_spoon"
# [4,] NA   NA     "door"  

library(data.table)
ref <- as.data.frame(ref, stringsAsFactors = F); setDT(ref)
data <- as.data.frame(data, stringsAsFactors = F); setDT(data)
for(oncol in colnames(ref)){
    for(scol in setdiff(colnames(ref), oncol)){
        rcol <- paste0('i.', scol)
        data[ref, (scol) := ifelse(is.na(get(scol)), get(rcol), get(scol)), on = oncol]
    }
}

# > data
#    ref smbole        name
# 1:  r2     kn       knife
# 2:  r3     fr        door
# 3:  r1     ts table spoon
# 4:  r3     fr        door

data <- structure(list(ref = c("r2", "r3", NA, NA), smbole = c("kn", 
    NA, NA, NA), name = c(NA, "door", "table spoon", "door")), row.names = c(NA, 
        -4L), class = c("data.table", "data.frame"))

ref <- structure(list(ref = c("r1", "r2", "r3"), smbole = c("ts", "kn", 
    "fr"), name = c("table spoon", "knife", "door")), row.names = c(NA, 
        -3L), class = c("data.table", "data.frame"))