Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/77.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
替换多个列中的值R_R_Database_Replace - Fatal编程技术网

替换多个列中的值R

替换多个列中的值R,r,database,replace,R,Database,Replace,我正在为R中的某些值的替换而挣扎,我想听听您对如何进行的建议。在resume中,我有一个带有索引的数据,我需要用另一个索引替换该索引的值。考虑到我在这两个索引上都有5564个案例,我想知道如何在整个数据中创建它 假设这是我的数据。第1帧: a<-sample(1:10, 10);b<- sample(1:10,10); c<- sample(1:10,10) d<-sample(1:10, 10) df1<- cbind(a, b, c, d) a

我正在为R中的某些值的替换而挣扎,我想听听您对如何进行的建议。在resume中,我有一个带有索引的数据,我需要用另一个索引替换该索引的值。考虑到我在这两个索引上都有5564个案例,我想知道如何在整个数据中创建它

假设这是我的数据。第1帧:

a<-sample(1:10, 10);b<- sample(1:10,10); c<- sample(1:10,10) 
d<-sample(1:10, 10)
df1<- cbind(a, b, c, d)
       a  b  c  d
 [1,]  3  4  7  5
 [2,]  2  2  1  7
 [3,]  9  8 10  8
 [4,]  1  1  3  2
 [5,]  7  5  2  9
 [6,] 10  7  4  3
 [7,]  8  3  8  6
 [8,]  6  6  5  1
 [9,]  5 10  9 10
 [10,]  4  9  6  4

a不确定您是否想要:

for ( i in seq_len(nrow(df2))){

    df1[df1 == df2[,"index1"][i]] <- df2[,"index2"][i]

}
df1
for(i在序号(nrow(df2))中){

df1[df1==df2[,“index1”][i]]我会使用匹配函数。长话短说:


df1[其中(df1%在%df2[,“index1”])]使用
dplyr
tidyr
的解决方案是最终输出

# Set seed for reporducibility
set.seed(123)

# Create df1
a<-sample(1:10, 10)
b<- sample(1:10,10) 
c<- sample(1:10,10) 
d<-sample(1:10, 10)
df1<- as.data.frame(cbind(a, b, c, d))

# Create df2
index1<- c(1:10)
index2 <- sample(1:1000, 10)
df2<- as.data.frame(cbind(index1, index2))

# Load packages
library(dplyr)
library(tidyr)

df1_new <- df1 %>%
  mutate(ID = 1:n()) %>%
  gather(Column, index1, -ID) %>%
  left_join(df2, by = "index1") %>%
  select(-index1) %>%
  spread(Column, index2) %>%
  select(-ID)
#为可重复性设置种子
种子集(123)
#创建df1

a这个问题可以通过结合两种方法来解决:

  • 通过将“映射”表
    df2
    df1
    连接,将
    index1
    值转换为
    index2
  • 重新调整df1的形状,以避免单独处理每个列。 dcast()
  • 对于后者,
    melt()
    用于从宽格式重塑为长格式,
    dcast()
    用于从长格式重塑为宽格式

    library(data.table)
    # coercing to data.table
    DT1 <- data.table(df1)
    DT2 <- data.table(df2)
    # reshape from wide to long format,
    # join with mapping table,
    # finally, reshape from long to wide format, remove row count
    dcast(DT2[melt(DT1[, rn := .I], id.vars = "rn"), on = .(index1 = value)],
          rn ~ variable, value.var = "index2")[, rn := NULL][]
    

    df1没有索引1。你能显示你想要的输出吗?嗨,埃里克!谢谢你的回答。df1上的索引就是值。这些值与DF2的索引1相同,我需要用它们替换DF2的索引2上的值。你能让你的代码重现吗?
    样本
    应该是
    样本
    ,你需要使用
    set.seed(1)
    以确保每次采集“随机”样本时得到相同的结果。是否确实要
    cbind(…)
    以及-您的每个真实的
    df1
    df2
    是一个
    矩阵
    ,还是一个
    数据帧
    ?这将影响答案。很抱歉,示例函数中的输入错误,但我已经用下面的建议解决了!感谢解决方案!我将在这里检查详细信息。因为我仍然需要用这些数据勾勒出我未来计划的一些步骤,我相信你的整个解释对我会有很大帮助
    pos <- which(df1 %in% df2[,"index1"]) 
    df1[pos] == df1 # it's the same thing
    #         a    b    c    d
    # [1,] TRUE TRUE TRUE TRUE
    # [2,] TRUE TRUE TRUE TRUE
    # [3,] TRUE TRUE TRUE TRUE
    # [4,] TRUE TRUE TRUE TRUE
    # [5,] TRUE TRUE TRUE TRUE
    # [6,] TRUE TRUE TRUE TRUE
    # [7,] TRUE TRUE TRUE TRUE
    # [8,] TRUE TRUE TRUE TRUE
    # [9,] TRUE TRUE TRUE TRUE
    #[10,] TRUE TRUE TRUE TRUE
    
    tomatch <- match(df1, df2[,"index1"]) # all of them.
    tomatch
    # [1]  1  2  3  7  8 10  6  9  5  4  1  6  2  9  7 10  5  4  3  8  8  1  2  9 10
    #[26]  3  6  4  7  5  6  9  4  8  5  2 10  3  7  1
    df2[,"index2"][tomatch] # what we want to replace them with
    # [1] 829 568 836 717 693  92 645 222 767 107 829 645 568 222 717  92 767 107
    #[19] 836 693 693 829 568 222  92 836 645 107 717 767 645 222 107 693 767 568
    #[37]  92 836 717 829
    df1[pos] <- df2[,"index2"][tomatch]
    df1
    #        a   b   c   d
    # [1,] 829 829 693 645
    # [2,] 568 645 829 222
    # [3,] 836 568 568 107
    # [4,] 717 222 222 693
    # [5,] 693 717  92 767
    # [6,]  92  92 836 568
    # [7,] 645 767 645  92
    # [8,] 222 107 107 836
    # [9,] 767 836 717 717
    #[10,] 107 693 767 829
    
    # Set seed for reporducibility
    set.seed(123)
    
    # Create df1
    a<-sample(1:10, 10)
    b<- sample(1:10,10) 
    c<- sample(1:10,10) 
    d<-sample(1:10, 10)
    df1<- as.data.frame(cbind(a, b, c, d))
    
    # Create df2
    index1<- c(1:10)
    index2 <- sample(1:1000, 10)
    df2<- as.data.frame(cbind(index1, index2))
    
    # Load packages
    library(dplyr)
    library(tidyr)
    
    df1_new <- df1 %>%
      mutate(ID = 1:n()) %>%
      gather(Column, index1, -ID) %>%
      left_join(df2, by = "index1") %>%
      select(-index1) %>%
      spread(Column, index2) %>%
      select(-ID)
    
    library(data.table)
    # coercing to data.table
    DT1 <- data.table(df1)
    DT2 <- data.table(df2)
    # reshape from wide to long format,
    # join with mapping table,
    # finally, reshape from long to wide format, remove row count
    dcast(DT2[melt(DT1[, rn := .I], id.vars = "rn"), on = .(index1 = value)],
          rn ~ variable, value.var = "index2")[, rn := NULL][]
    
          a   b   c   d
     1: 924 197 852 405
     2: 241 241 877 852
     3: 421 772 646 772
     4: 877 877 924 241
     5: 852 405 241 421
     6: 646 852 197 924
     7: 772 924 772 854
     8: 854 854 405 877
     9: 405 646 421 646
    10: 197 421 854 197