Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/73.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
删除R中的重复子字符串_R_String_Duplicates_Tidyverse - Fatal编程技术网

删除R中的重复子字符串

删除R中的重复子字符串,r,string,duplicates,tidyverse,R,String,Duplicates,Tidyverse,我有“BY”、“SN”、“SY”和“BN”的字符串序列 如下表所示,有多个实例。 我想把“SNBY”改为“SNBY”,把“snsnsnsnsnsnsnsnsnsn”改为“SNBY” SNo条 1 SNBY 2斯比 3比森 4斯比 5斯比 6斯比 拜比森斯7号 8 SNBY 9比森 10比10 11比森 12 SNBY 13 SNBY 14亿新元 15 15 SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSN

我有“BY”、“SN”、“SY”和“BN”的字符串序列 如下表所示,有多个实例。 我想把“SNBY”改为“SNBY”,把“snsnsnsnsnsnsnsnsnsn”改为“SNBY”

SNo条
1 SNBY
2斯比
3比森
4斯比
5斯比
6斯比
拜比森斯7号
8 SNBY
9比森
10比10
11比森
12 SNBY
13 SNBY
14亿新元
15 15 SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSN
16SYBN
1717亿新加坡元
18.18亿SYBNSYBNBNBNBNBNBN
19 19 SNBYSNSNSNSNSNSN
20比20

一种可能的方法是每隔一秒钟找到一个字符,然后找到唯一的序列并粘贴()

代码

# Add a whitespace every 2 characters and then split into substrings
tmp1 <- strsplit(gsub("(.{2})", "\\1 ", df$Bars), " ")

# Obtain the unique substrings and paste
df$Bars <- sapply(tmp1, function(x){
  paste0(unique(x), collapse = "")
})

df

#    SNo     Bars
# 1    1     SNBY
# 2    2     SNBY
# 3    3     BYSN
# 4    4     SNBY
# 5    5     SNBY
# 6    6     SNBY
# 7    7     BYSN
# 8    8     SNBY
# 9    9     BYSN
# 10  10     BYSN
# 11  11     BYSN
# 12  12     SNBY
# 13  13     SNBY
# 14  14     BNSY
# 15  15     SNBY
# 16  16     SYBN
# 17  17     BNSY
# 18  18 BYSYBNSN
# 19  19     SNBY
# 20  20     BYSN
#每2个字符添加一个空格,然后拆分为子字符串

tmp1实现这一点的一种可能方法是,每隔一秒钟找到一个字符,然后找到唯一的序列并粘贴()

代码

# Add a whitespace every 2 characters and then split into substrings
tmp1 <- strsplit(gsub("(.{2})", "\\1 ", df$Bars), " ")

# Obtain the unique substrings and paste
df$Bars <- sapply(tmp1, function(x){
  paste0(unique(x), collapse = "")
})

df

#    SNo     Bars
# 1    1     SNBY
# 2    2     SNBY
# 3    3     BYSN
# 4    4     SNBY
# 5    5     SNBY
# 6    6     SNBY
# 7    7     BYSN
# 8    8     SNBY
# 9    9     BYSN
# 10  10     BYSN
# 11  11     BYSN
# 12  12     SNBY
# 13  13     SNBY
# 14  14     BNSY
# 15  15     SNBY
# 16  16     SYBN
# 17  17     BNSY
# 18  18 BYSYBNSN
# 19  19     SNBY
# 20  20     BYSN
#每2个字符添加一个空格,然后拆分为子字符串

tmp1一个方便的解决方案是使用tidyverse核心库stringr中的str_replace_all()函数:

table<- table %>%
    mutate(Bars=str_replace_all(Bars, c("SNSNSNBY"="SNBY", 
    "SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNBYSN"="SNBY"))
表格%
变异(条=str\u替换所有条(条,c(“SNBY”=“SNBY”,
“snsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsn

一个简便的解决方案是使用tidyverse核心库stringr中的str_replace_all()函数:

table<- table %>%
    mutate(Bars=str_replace_all(Bars, c("SNSNSNBY"="SNBY", 
    "SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNBYSN"="SNBY"))
表格%
变异(条=str\u替换所有条(条,c(“SNBY”=“SNBY”,
“snsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsnsn

您可以使用
case\u when()

库(tidyverse)
df1%
当(V3==“SNBY”~“SNBY”,
V3==“SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSN,
真~V3)
)

您可以使用
case\u when()

库(tidyverse)
df1%
当(V3==“SNBY”~“SNBY”,
V3==“SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSN,
真~V3)
)

最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”。最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”。最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”@Shoonya这对条目18没有意义,因为您指定了4个不同的子字符串,并且此条目包含所有子字符串。请更新您的问题,为您的模拟
data.frame()
。编辑了数据-第18行中出现了一些错误。最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”@Shoonya这对条目18没有意义,因为您指定了4个不同的子字符串,并且此条目包含所有子字符串。请更新您的问题,为您的mock
data.frame()
。编辑了数据-第18行出现了一些错误。最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”最终输出应为字符串“BNSY”或“SNBY”或“BYSN”或“SYBN”
library(tidyverse)
df1 <- df %>% 
  mutate(V3 = case_when(V3 == "SNSNSNBY" ~ "SNBY",
                        V3 == "SNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNSNBYSN" ~ "SNBY",
                        TRUE ~ V3)
         )