将R中的第一个数字字符替换为sub和if_R_If Statement_Replace_Conditional Statements

将R中的第一个数字字符替换为sub和if

r if-statement replace

将R中的第一个数字字符替换为sub和if,r,if-statement,replace,conditional-statements,R,If Statement,Replace,Conditional Statements,我想替换以5到1开头的3个字符数字。我尝试使用sub if条件，但失败了不连接： DTNASC AGE 1 3031997 520 2 9022017 0 3 13071933 83 4 6022002 515 5 2061966 50 6 28121946 70 7 4121955 61 8 3101943 73 9 6022017 20 10 14012017 0 11 20071931 8 if((nchar(DO_co

我想替换以5到1开头的3个字符数字。我尝试使用sub if条件，但失败了

不连接：

    DTNASC   AGE
1   3031997  520
2   9022017  0
3   13071933 83
4   6022002  515
5   2061966  50
6   28121946 70
7   4121955  61
8   3101943  73
9   6022017  20
10  14012017 0
11  20071931 8

if((nchar(DO_concatenated$AGE) == 3)&(funcaoidade(DO_concatenated$AGE) == 5)){
  DO_concatenated$IDADE = sub(pattern = 5, replacement = 1, DO_concatenated$AGE) 
}

如果它起作用，输出将如下所示：

    DTNASC   AGE
1   3031997  120
2   9022017  0
3   13071933 83
4   6022002  115
5   2061966  50
6   28121946 70
7   4121955  61
8   3101943  73
9   6022017  20
10  14012017 0
11  20071931 8

我之前这样做是为了删除以4开头的变量，代码如下：

if((nchar(DO_concatenated$IDADE) == 3)&(funcaoidade(DO_concatenated$IDADE) == 4)){
  DO_concatenated$IDADE = sub(pattern = 4, replacement = "", DO_concatenated$IDADE) 
}

成功了

“funcaoidade”查找数字的第一个字符

funcaoidade = function(x){
  substr(x, start = 1, stop = 1)
}

那么，有什么区别呢？

提前谢谢

下面是一种使用stringr软件包的方法

library(dplyr)
library(stringr)

data <-
  data.frame(
    DTNASC = c(3031997, 9022017, 13071933, 6022002, 2061966, 28121946, 4121955, 
               3101943, 6022017, 14012017, 20071931),
    AGE = c(520, 0, 83, 515, 50, 70, 61, 73, 20, 0, 8)
  )

data %>%
  mutate(# Replacement of Age
    # To convert it into character to make it easier
    AGE = as.character(AGE),
    # Here 5 is the character we are checking in first character
    # str_sub(AGE, 1, 1) -> Checks first character
    # nchar(AGE) == 3 -> Checks if the length of AGE is 3
    # str_replace(AGE, "5", "1") -> Replaces 5 with 1
    # as.numeric() -> To convert to a number
    AGE = ifelse(str_sub(AGE, 1, 1) == "5" & nchar(AGE) == 3,
                 as.numeric(str_replace(AGE, "5", "1")),as.numeric(AGE)),

    # Replacement of DTNASC
    # To convert it into character to make it easier
    DTNASC = as.character(DTNASC),
    # Here 4 is the character we are checking in first character
    # str_sub(DTNASC, 1, 1) -> Checks first character
    # nchar(DTNASC) == 7 -> Checks if the length of DTNASC is 7
    # str_replace(DTNASC, "4", "") -> Replaces 4 with null
    # as.numeric() -> To convert to a number
    DTNASC = ifelse(str_sub(DTNASC, 1, 1) == "4" & nchar(DTNASC) == 7,
                 as.numeric(str_replace(DTNASC, "4", "")),as.numeric(DTNASC)))

# DTNASC AGE
# 3031997 120
# 9022017   0
# 13071933  83
# 6022002 115
# 2061966  50
# 28121946  70
# 121955  61
# 3101943  73
# 6022017  20
# 14012017   0
# 20071931   8

库（dplyr）
图书馆（stringr）
数据%
突变（#年龄的替代
#将其转换为字符以使其更容易
年龄=作为字符（年龄），
#这里5是我们要签入的第一个字符
#str_sub（年龄，1，1）->检查第一个字符
#nchar（年龄）==3->检查年龄长度是否为3
#str_replace（年龄，“5”，“1”）->将5替换为1
#as.numeric（）->转换为数字
年龄=ifelse（str_sub（年龄，1,1）=“5”&nchar（年龄）=3，
as.numeric（str_替换（年龄，“5”，“1”）），as.numeric（年龄）），
#DTNASC的更换
#将其转换为字符以使其更容易
DTNASC=作为字符（DTNASC），
#这里4是我们要签入的第一个字符
#str_sub（DTNASC，1，1）->检查第一个字符
#nchar（DTNASC）==7->检查DTNASC的长度是否为7
#str_replace（DTNASC，“4”和“”）->将4替换为null
#as.numeric（）->转换为数字
DTNASC=ifelse（str_sub（DTNASC，1，1）=“4”&nchar（DTNASC）==7，
as.numeric（str_replace（DTNASC，“4”，“”），as.numeric（DTNASC）））
#DTNASC年龄
# 3031997 120
# 9022017   0
# 13071933  83
# 6022002 115
# 2061966  50
# 28121946  70
# 121955  61
# 3101943  73
# 6022017  20
# 14012017   0
# 20071931   8

您可以使用正则表达式执行以下操作：

df$AGE1 <- as.integer(sub("^5(..)", "1\\1", df$AGE))
df

#     DTNASC AGE AGE1
#1   3031997 520  120
#2   9022017   0    0
#3  13071933  83   83
#4   6022002 515  115
#5   2061966  50   50
#6  28121946  70   70
#7   4121955  61   61
#8   3101943  73   73
#9   6022017  20   20
#10 14012017   0    0
#11 20071931   8    8

请使用与您之前发布的解决方案相同的方法

i1谢谢，但是您知道如何解释为什么第一个解决方案有效，然后不再有效吗？它有一些关于变量类的信息：if/else
的输出是一个TRUE/FALSE，它期望输入一个TRUE/FALSE。如果该输出值为单个TRUE，则它将对整个列执行子
，如果为FALSE，则不会（nchar（do_concatated$AGE）==3）和（funcaoidade（do_concatated$AGE）==5）[1]TRUE FALSE
，第一个值是TRUE
，当您执行if/else
时，需要第一个if（（nchar（do_concatated$AGE）==3）和（funcaoidade（do_concatated$AGE）==5）打印（“hello”）[1]“hello”好的，我会检查！谢谢
df <- structure(list(DTNASC = c(3031997, 9022017, 13071933, 6022002, 
2061966, 28121946, 4121955, 3101943, 6022017, 14012017, 20071931
), AGE = c(520, 0, 83, 515, 50, 70, 61, 73, 20, 0, 8)), class = "data.frame", 
row.names = c(NA, -11L))