使用dplyr条件替换数据帧中的元素

使用dplyr条件替换数据帧中的元素,r,dplyr,R,Dplyr,在这些行下面的某个地方,我有一行带有WT1..WTn。尝试仅更改标签列中的少数元素,而不更改其他元素。生成一个名为old的变量,该变量保存需要替换/重命名的元素的名称 代码 期望输出 我遗漏了什么?用str\u替换fromstringrstr_replace_all采用命名向量进行匹配和替换,其中名称是要匹配的模式,值是替换^和$正则表达式元字符被包装到每个模式中,以确保它们完全匹配: genotype.og label genotype label_new MT1 MT1 MT M

在这些行下面的某个地方,我有一行带有
WT1..WTn
。尝试仅更改标签列中的少数元素,而不更改其他元素。生成一个名为
old
的变量,该变量保存需要替换/重命名的元素的名称

代码 期望输出
我遗漏了什么?

str\u替换
from
stringr
str_replace_all
采用命名向量进行匹配和替换,其中名称是要匹配的模式,值是替换
^
$
正则表达式元字符被包装到每个模式中,以确保它们完全匹配:

genotype.og label   genotype    label_new
MT1 MT1 MT  MT1
MT2 MT2 MT  MT2
MT3 MT3 MT  MT3
MT4 MT4 MT  MT4
MT5 MT5 MT  MT5
MT6 MT6 MT  MT6
WT1 WT1 WT  WS1
WT4 WT4 WT  WS4
WT11    WT11    WT  WS11
WT13    WT13    WT  WS13
WT27    WT27    WT  WS27
WT28    WT28    WT  WS28
WT74    WT74    WT  WS74
WT53    WT53    WT  WS53
WT68    WT68    WT  WS68
WT84    WT84    WT  WS84
WT92    WT92    WT  WS92
WT95    WT95    WT  WS95
查找字符串变为:

library(stringr)
library(dplyr)

df %>%
  mutate(label_new = str_replace_all(label, setNames(new, paste0('^', old, '$'))))
或以R为基数:

> setNames(new, paste0('^', old, '$'))
 ^WT1$  ^WT4$ ^WT11$ ^WT13$ ^WT27$ ^WT28$ ^WT74$ ^WT53$ ^WT68$ ^WT84$ ^WT92$ ^WT95$ 
 "WS1"  "WS4" "WS11" "WS13" "WS27" "WS28" "WS74" "WS53" "WS68" "WS84" "WS92" "WS95"
数据:

df$label_new <- df$label
label_match <- match(df$label_new, old)
df$label_new[!is.na(label_match)] <- new[na.omit(label_match)]
   genotype.og label genotype label_new
1          MT1   MT1       MT       MT1
2          MT2   MT2       MT       MT2
3          MT3   MT3       MT       MT3
4          MT4   MT4       MT       MT4
5          MT5   MT5       MT       MT5
6          MT6   MT6       MT       MT6
7          WT1   WT1       WT       WS1
8          WT4   WT4       WT       WS4
9         WT11  WT11       WT      WS11
10        WT13  WT13       WT      WS13
11        WT27  WT27       WT      WS27
12        WT28  WT28       WT      WS28
13        WT74  WT74       WT      WS74
14        WT53  WT53       WT      WS53
15        WT68  WT68       WT      WS68
16        WT84  WT84       WT      WS84
17        WT92  WT92       WT      WS92
18        WT95  WT95       WT      WS95

df我更喜欢@avid_用户的解决方案,但这里有一种方法可以显示沿途的映射

df <- structure(list(genotype.og = c("MT1", "MT2", "MT3", "MT4", "MT5", 
"MT6", "WT1", "WT4", "WT11", "WT13", "WT27", "WT28", "WT74", 
"WT53", "WT68", "WT84", "WT92", "WT95"), label = c("MT1", "MT2", 
"MT3", "MT4", "MT5", "MT6", "WT1", "WT4", "WT11", "WT13", "WT27", 
"WT28", "WT74", "WT53", "WT68", "WT84", "WT92", "WT95"), genotype = c("MT", 
"MT", "MT", "MT", "MT", "MT", "WT", "WT", "WT", "WT", "WT", "WT", 
"WT", "WT", "WT", "WT", "WT", "WT")), .Names = c("genotype.og", 
"label", "genotype"), class = "data.frame", row.names = c(NA, 
-18L))
库(dplyr)
df%
#连接替换字符串;没有替代品的地方
左连接(数据帧(旧,新),按=c(“标签”=“旧”))%>%
#更新标签以在可用的情况下使用替换件
变异(label=if_else(is.na(新),label,新))%>%
选择(-new)

@avid\u用户donedid是否进行就地更换?没有添加新的专栏?啊,这是有道理的,完全是使用了错误的functions@sbradbio但是创建一个新的列却很简单。请参阅我的编辑。@sbradbio这确实是一个问题,因为
str\u replace
使用正则表达式进行字符串替换。我的解决方案是将
^
$
环绕在每个
元素周围,使它们完全匹配。即使在这种情况下,基本的R解决方案也能起作用。请参阅我的更新。@sbradbio它们都是正则表达式元字符<代码>^
匹配字符串的开头,而
$
匹配字符串的结尾。因此,例如
^WT1$
只匹配以
WT1
开头和结尾的字符串
WT10
在这种情况下不匹配,因为它不以
1
结尾。
df$label_new <- df$label
label_match <- match(df$label_new, old)
df$label_new[!is.na(label_match)] <- new[na.omit(label_match)]
   genotype.og label genotype label_new
1          MT1   MT1       MT       MT1
2          MT2   MT2       MT       MT2
3          MT3   MT3       MT       MT3
4          MT4   MT4       MT       MT4
5          MT5   MT5       MT       MT5
6          MT6   MT6       MT       MT6
7          WT1   WT1       WT       WS1
8          WT4   WT4       WT       WS4
9         WT11  WT11       WT      WS11
10        WT13  WT13       WT      WS13
11        WT27  WT27       WT      WS27
12        WT28  WT28       WT      WS28
13        WT74  WT74       WT      WS74
14        WT53  WT53       WT      WS53
15        WT68  WT68       WT      WS68
16        WT84  WT84       WT      WS84
17        WT92  WT92       WT      WS92
18        WT95  WT95       WT      WS95
df <- structure(list(genotype.og = c("MT1", "MT2", "MT3", "MT4", "MT5", 
"MT6", "WT1", "WT4", "WT11", "WT13", "WT27", "WT28", "WT74", 
"WT53", "WT68", "WT84", "WT92", "WT95"), label = c("MT1", "MT2", 
"MT3", "MT4", "MT5", "MT6", "WT1", "WT4", "WT11", "WT13", "WT27", 
"WT28", "WT74", "WT53", "WT68", "WT84", "WT92", "WT95"), genotype = c("MT", 
"MT", "MT", "MT", "MT", "MT", "WT", "WT", "WT", "WT", "WT", "WT", 
"WT", "WT", "WT", "WT", "WT", "WT")), .Names = c("genotype.og", 
"label", "genotype"), class = "data.frame", row.names = c(NA, 
-18L))
library(dplyr)
df <- df %>% 
  # Join with the replacement strings; NA where no replacement
  left_join(data_frame(old, new), by = c("label" = "old")) %>%
  # Update label to use replacement where available
  mutate(label = if_else(is.na(new), label, new)) %>%
  select(-new)