将R中的第一个数字字符替换为sub和if
我想替换以5到1开头的3个字符数字。我尝试使用sub if条件,但失败了 不连接:将R中的第一个数字字符替换为sub和if,r,if-statement,replace,conditional-statements,R,If Statement,Replace,Conditional Statements,我想替换以5到1开头的3个字符数字。我尝试使用sub if条件,但失败了 不连接: DTNASC AGE 1 3031997 520 2 9022017 0 3 13071933 83 4 6022002 515 5 2061966 50 6 28121946 70 7 4121955 61 8 3101943 73 9 6022017 20 10 14012017 0 11 20071931 8 if((nchar(DO_co
DTNASC AGE
1 3031997 520
2 9022017 0
3 13071933 83
4 6022002 515
5 2061966 50
6 28121946 70
7 4121955 61
8 3101943 73
9 6022017 20
10 14012017 0
11 20071931 8
if((nchar(DO_concatenated$AGE) == 3)&(funcaoidade(DO_concatenated$AGE) == 5)){
DO_concatenated$IDADE = sub(pattern = 5, replacement = 1, DO_concatenated$AGE)
}
如果它起作用,输出将如下所示:
DTNASC AGE
1 3031997 120
2 9022017 0
3 13071933 83
4 6022002 115
5 2061966 50
6 28121946 70
7 4121955 61
8 3101943 73
9 6022017 20
10 14012017 0
11 20071931 8
我之前这样做是为了删除以4开头的变量,代码如下:
if((nchar(DO_concatenated$IDADE) == 3)&(funcaoidade(DO_concatenated$IDADE) == 4)){
DO_concatenated$IDADE = sub(pattern = 4, replacement = "", DO_concatenated$IDADE)
}
成功了
“funcaoidade”查找数字的第一个字符
funcaoidade = function(x){
substr(x, start = 1, stop = 1)
}
那么,有什么区别呢?
提前谢谢 下面是一种使用stringr软件包的方法
library(dplyr)
library(stringr)
data <-
data.frame(
DTNASC = c(3031997, 9022017, 13071933, 6022002, 2061966, 28121946, 4121955,
3101943, 6022017, 14012017, 20071931),
AGE = c(520, 0, 83, 515, 50, 70, 61, 73, 20, 0, 8)
)
data %>%
mutate(# Replacement of Age
# To convert it into character to make it easier
AGE = as.character(AGE),
# Here 5 is the character we are checking in first character
# str_sub(AGE, 1, 1) -> Checks first character
# nchar(AGE) == 3 -> Checks if the length of AGE is 3
# str_replace(AGE, "5", "1") -> Replaces 5 with 1
# as.numeric() -> To convert to a number
AGE = ifelse(str_sub(AGE, 1, 1) == "5" & nchar(AGE) == 3,
as.numeric(str_replace(AGE, "5", "1")),as.numeric(AGE)),
# Replacement of DTNASC
# To convert it into character to make it easier
DTNASC = as.character(DTNASC),
# Here 4 is the character we are checking in first character
# str_sub(DTNASC, 1, 1) -> Checks first character
# nchar(DTNASC) == 7 -> Checks if the length of DTNASC is 7
# str_replace(DTNASC, "4", "") -> Replaces 4 with null
# as.numeric() -> To convert to a number
DTNASC = ifelse(str_sub(DTNASC, 1, 1) == "4" & nchar(DTNASC) == 7,
as.numeric(str_replace(DTNASC, "4", "")),as.numeric(DTNASC)))
# DTNASC AGE
# 3031997 120
# 9022017 0
# 13071933 83
# 6022002 115
# 2061966 50
# 28121946 70
# 121955 61
# 3101943 73
# 6022017 20
# 14012017 0
# 20071931 8
库(dplyr)
图书馆(stringr)
数据%
突变(#年龄的替代
#将其转换为字符以使其更容易
年龄=作为字符(年龄),
#这里5是我们要签入的第一个字符
#str_sub(年龄,1,1)->检查第一个字符
#nchar(年龄)==3->检查年龄长度是否为3
#str_replace(年龄,“5”,“1”)->将5替换为1
#as.numeric()->转换为数字
年龄=ifelse(str_sub(年龄,1,1)=“5”&nchar(年龄)=3,
as.numeric(str_替换(年龄,“5”,“1”)),as.numeric(年龄)),
#DTNASC的更换
#将其转换为字符以使其更容易
DTNASC=作为字符(DTNASC),
#这里4是我们要签入的第一个字符
#str_sub(DTNASC,1,1)->检查第一个字符
#nchar(DTNASC)==7->检查DTNASC的长度是否为7
#str_replace(DTNASC,“4”和“”)->将4替换为null
#as.numeric()->转换为数字
DTNASC=ifelse(str_sub(DTNASC,1,1)=“4”&nchar(DTNASC)==7,
as.numeric(str_replace(DTNASC,“4”,“”),as.numeric(DTNASC)))
#DTNASC年龄
# 3031997 120
# 9022017 0
# 13071933 83
# 6022002 115
# 2061966 50
# 28121946 70
# 121955 61
# 3101943 73
# 6022017 20
# 14012017 0
# 20071931 8
您可以使用正则表达式执行以下操作:
df$AGE1 <- as.integer(sub("^5(..)", "1\\1", df$AGE))
df
# DTNASC AGE AGE1
#1 3031997 520 120
#2 9022017 0 0
#3 13071933 83 83
#4 6022002 515 115
#5 2061966 50 50
#6 28121946 70 70
#7 4121955 61 61
#8 3101943 73 73
#9 6022017 20 20
#10 14012017 0 0
#11 20071931 8 8
请使用与您之前发布的解决方案相同的方法
i1谢谢,但是您知道如何解释为什么第一个解决方案有效,然后不再有效吗?它有一些关于变量类的信息:if/else
的输出是一个TRUE/FALSE,它期望输入一个TRUE/FALSE。如果该输出值为单个TRUE,则它将对整个列执行子
,如果为FALSE,则不会(nchar(do_concatated$AGE)==3)和(funcaoidade(do_concatated$AGE)==5)[1]TRUE FALSE
,第一个值是TRUE
,当您执行if/else
时,需要第一个if((nchar(do_concatated$AGE)==3)和(funcaoidade(do_concatated$AGE)==5)打印(“hello”)[1]“hello”
好的,我会检查!谢谢
df <- structure(list(DTNASC = c(3031997, 9022017, 13071933, 6022002,
2061966, 28121946, 4121955, 3101943, 6022017, 14012017, 20071931
), AGE = c(520, 0, 83, 515, 50, 70, 61, 73, 20, 0, 8)), class = "data.frame",
row.names = c(NA, -11L))