替换多个列中的值R_R_Database_Replace

替换多个列中的值R

r database replace

替换多个列中的值R,r,database,replace,R,Database,Replace,我正在为R中的某些值的替换而挣扎，我想听听您对如何进行的建议。在resume中，我有一个带有索引的数据，我需要用另一个索引替换该索引的值。考虑到我在这两个索引上都有5564个案例，我想知道如何在整个数据中创建它假设这是我的数据。第1帧： a<-sample(1:10, 10);b<- sample(1:10,10); c<- sample(1:10,10) d<-sample(1:10, 10) df1<- cbind(a, b, c, d) a

我正在为R中的某些值的替换而挣扎，我想听听您对如何进行的建议。在resume中，我有一个带有索引的数据，我需要用另一个索引替换该索引的值。考虑到我在这两个索引上都有5564个案例，我想知道如何在整个数据中创建它

假设这是我的数据。第1帧：

a<-sample(1:10, 10);b<- sample(1:10,10); c<- sample(1:10,10) 
d<-sample(1:10, 10)
df1<- cbind(a, b, c, d)
       a  b  c  d
 [1,]  3  4  7  5
 [2,]  2  2  1  7
 [3,]  9  8 10  8
 [4,]  1  1  3  2
 [5,]  7  5  2  9
 [6,] 10  7  4  3
 [7,]  8  3  8  6
 [8,]  6  6  5  1
 [9,]  5 10  9 10
 [10,]  4  9  6  4

a不确定您是否想要：
for ( i in seq_len(nrow(df2))){

    df1[df1 == df2[,"index1"][i]] <- df2[,"index2"][i]

}
df1

for（i在序号（nrow（df2））中）{
df1[df1==df2[，“index1”][i]]我会使用匹配函数。长话短说：
df1[其中（df1%在%df2[，“index1”]）]使用dplyr
和tidyr
的解决方案是最终输出
# Set seed for reporducibility
set.seed(123)

# Create df1
a<-sample(1:10, 10)
b<- sample(1:10,10) 
c<- sample(1:10,10) 
d<-sample(1:10, 10)
df1<- as.data.frame(cbind(a, b, c, d))

# Create df2
index1<- c(1:10)
index2 <- sample(1:1000, 10)
df2<- as.data.frame(cbind(index1, index2))

# Load packages
library(dplyr)
library(tidyr)

df1_new <- df1 %>%
  mutate(ID = 1:n()) %>%
  gather(Column, index1, -ID) %>%
  left_join(df2, by = "index1") %>%
  select(-index1) %>%
  spread(Column, index2) %>%
  select(-ID)

#为可重复性设置种子
种子集（123）
#创建df1
a这个问题可以通过结合两种方法来解决：
通过将“映射”表df2
与df1
连接，将index1
值转换为index2
值
重新调整df1的形状，以避免单独处理每个列。
dcast（）
对于后者，melt（）
用于从宽格式重塑为长格式，dcast（）
用于从长格式重塑为宽格式
library(data.table)
# coercing to data.table
DT1 <- data.table(df1)
DT2 <- data.table(df2)
# reshape from wide to long format,
# join with mapping table,
# finally, reshape from long to wide format, remove row count
dcast(DT2[melt(DT1[, rn := .I], id.vars = "rn"), on = .(index1 = value)],
      rn ~ variable, value.var = "index2")[, rn := NULL][]

df1没有索引1。你能显示你想要的输出吗？嗨，埃里克！谢谢你的回答。df1上的索引就是值。这些值与DF2的索引1相同，我需要用它们替换DF2的索引2上的值。你能让你的代码重现吗？样本
应该是样本
，你需要使用set.seed（1）
以确保每次采集“随机”样本时得到相同的结果。是否确实要cbind（…）
以及-您的每个真实的df1
和df2
是一个矩阵
，还是一个数据帧？这将影响答案。很抱歉，示例函数中的输入错误，但我已经用下面的建议解决了！感谢解决方案！我将在这里检查详细信息。因为我仍然需要用这些数据勾勒出我未来计划的一些步骤，我相信你的整个解释对我会有很大帮助
pos <- which(df1 %in% df2[,"index1"]) 
df1[pos] == df1 # it's the same thing
#         a    b    c    d
# [1,] TRUE TRUE TRUE TRUE
# [2,] TRUE TRUE TRUE TRUE
# [3,] TRUE TRUE TRUE TRUE
# [4,] TRUE TRUE TRUE TRUE
# [5,] TRUE TRUE TRUE TRUE
# [6,] TRUE TRUE TRUE TRUE
# [7,] TRUE TRUE TRUE TRUE
# [8,] TRUE TRUE TRUE TRUE
# [9,] TRUE TRUE TRUE TRUE
#[10,] TRUE TRUE TRUE TRUE

tomatch <- match(df1, df2[,"index1"]) # all of them.
tomatch
# [1]  1  2  3  7  8 10  6  9  5  4  1  6  2  9  7 10  5  4  3  8  8  1  2  9 10
#[26]  3  6  4  7  5  6  9  4  8  5  2 10  3  7  1
df2[,"index2"][tomatch] # what we want to replace them with
# [1] 829 568 836 717 693  92 645 222 767 107 829 645 568 222 717  92 767 107
#[19] 836 693 693 829 568 222  92 836 645 107 717 767 645 222 107 693 767 568
#[37]  92 836 717 829
df1[pos] <- df2[,"index2"][tomatch]
df1
#        a   b   c   d
# [1,] 829 829 693 645
# [2,] 568 645 829 222
# [3,] 836 568 568 107
# [4,] 717 222 222 693
# [5,] 693 717  92 767
# [6,]  92  92 836 568
# [7,] 645 767 645  92
# [8,] 222 107 107 836
# [9,] 767 836 717 717
#[10,] 107 693 767 829

# Set seed for reporducibility
set.seed(123)

# Create df1
a<-sample(1:10, 10)
b<- sample(1:10,10) 
c<- sample(1:10,10) 
d<-sample(1:10, 10)
df1<- as.data.frame(cbind(a, b, c, d))

# Create df2
index1<- c(1:10)
index2 <- sample(1:1000, 10)
df2<- as.data.frame(cbind(index1, index2))

# Load packages
library(dplyr)
library(tidyr)

df1_new <- df1 %>%
  mutate(ID = 1:n()) %>%
  gather(Column, index1, -ID) %>%
  left_join(df2, by = "index1") %>%
  select(-index1) %>%
  spread(Column, index2) %>%
  select(-ID)

library(data.table)
# coercing to data.table
DT1 <- data.table(df1)
DT2 <- data.table(df2)
# reshape from wide to long format,
# join with mapping table,
# finally, reshape from long to wide format, remove row count
dcast(DT2[melt(DT1[, rn := .I], id.vars = "rn"), on = .(index1 = value)],
      rn ~ variable, value.var = "index2")[, rn := NULL][]

      a   b   c   d
 1: 924 197 852 405
 2: 241 241 877 852
 3: 421 772 646 772
 4: 877 877 924 241
 5: 852 405 241 421
 6: 646 852 197 924
 7: 772 924 772 854
 8: 854 854 405 877
 9: 405 646 421 646
10: 197 421 854 197