Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/69.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
基于条件匹配R中多列中的值_R_If Statement_String Matching - Fatal编程技术网

基于条件匹配R中多列中的值

基于条件匹配R中多列中的值,r,if-statement,string-matching,R,If Statement,String Matching,假设我有一个数据源 我想根据以下参数向这个数据帧添加一个列df$response,我想我需要一组嵌套的ifelse,但我很难正确执行它: 对于给定的X行,如果df$match=1 在df$响应中打印1,如果: df$match=0的df$match中的任何行在df$caseID、df$faculty和df$phase中的内容与第X行相同。否则,打印0 所以输出应该是这样的: response 1 0 0 1 0 0 因为只有第一行和第四行包含df$caseID、df$faculty和df$p

假设我有一个数据源

我想根据以下参数向这个数据帧添加一个列df$response,我想我需要一组嵌套的ifelse,但我很难正确执行它:

对于给定的X行,如果df$match=1

在df$响应中打印1,如果:

df$match=0的df$match中的任何行在df$caseID、df$faculty和df$phase中的内容与第X行相同。否则,打印0

所以输出应该是这样的:

response

1
0
0
1
0
0
因为只有第一行和第四行包含df$caseID、df$faculty和df$phase中匹配的值,其中df$match=1的行和df$match=0的行都有匹配的值。

在您的机器上使用[]进行索引速度更快,成本更低

df <- data.frame(
  "resident" = c("george","george","george","jane","jane","jane"),
  "faculty" = c("sally","sally","sally","carl","carl","carl"),
  "submittedBy" = c("george","sally","george","jane","carl","carl"),
  "match" = c(1,0,1,1,0,0),
  "caseID" = c("george_1","george_1","george_1","jane_1","jane_1","jane_1"),
  "phase" = c("pre","pre","intra","pre","pre","intra"),
  stringsAsFactors = FALSE
  )

response <- NULL

for (i in 1:nrow(df)) {
  response[i] <- ifelse(
    df$match[i] == 0, 0,
    ifelse(
      any(paste(df$caseID,df$faculty,df$phase,sep="")[df$match == 0] == 
            paste(df$caseID,df$faculty,df$phase,sep="")[i]),
      1, 0
    )
  )
}

response
[1] 1 0 0 1 0 0
在您的机器上,使用[]进行索引速度更快,成本更低

df <- data.frame(
  "resident" = c("george","george","george","jane","jane","jane"),
  "faculty" = c("sally","sally","sally","carl","carl","carl"),
  "submittedBy" = c("george","sally","george","jane","carl","carl"),
  "match" = c(1,0,1,1,0,0),
  "caseID" = c("george_1","george_1","george_1","jane_1","jane_1","jane_1"),
  "phase" = c("pre","pre","intra","pre","pre","intra"),
  stringsAsFactors = FALSE
  )

response <- NULL

for (i in 1:nrow(df)) {
  response[i] <- ifelse(
    df$match[i] == 0, 0,
    ifelse(
      any(paste(df$caseID,df$faculty,df$phase,sep="")[df$match == 0] == 
            paste(df$caseID,df$faculty,df$phase,sep="")[i]),
      1, 0
    )
  )
}

response
[1] 1 0 0 1 0 0
这是我该怎么做的

# read the data
test <- read.table(text = 'resident    faculty    submittedBy    match    caseID    phase
                   george      sally      george         1        george_1  pre
                   george      sally      sally          0        george_1  pre
                   george      sally      george         1        george_1  intra
                   jane        carl       jane           1        jane_1    pre
                   jane        carl       carl           0        jane_1    pre
                   jane        carl       carl           0        jane_1    intra', header=T)

# create the response
resp <- logical(0)

# iterate over each loop
for (rr in 1:nrow(test)){
  if (test$match[rr] == 0){
    resp[rr] <- 0
  }
  else{
    tmp <- rbind(test[-rr, c('faculty', 'caseID', 'phase')],  # add the onto the end
                 test[rr, c('faculty', 'caseID', 'phase')])   # test if line is duplicated
    resp[rr] <- ifelse(duplicated(tmp)[nrow(tmp)], 1, 0)
  }
}
这是我该怎么做的

# read the data
test <- read.table(text = 'resident    faculty    submittedBy    match    caseID    phase
                   george      sally      george         1        george_1  pre
                   george      sally      sally          0        george_1  pre
                   george      sally      george         1        george_1  intra
                   jane        carl       jane           1        jane_1    pre
                   jane        carl       carl           0        jane_1    pre
                   jane        carl       carl           0        jane_1    intra', header=T)

# create the response
resp <- logical(0)

# iterate over each loop
for (rr in 1:nrow(test)){
  if (test$match[rr] == 0){
    resp[rr] <- 0
  }
  else{
    tmp <- rbind(test[-rr, c('faculty', 'caseID', 'phase')],  # add the onto the end
                 test[rr, c('faculty', 'caseID', 'phase')])   # test if line is duplicated
    resp[rr] <- ifelse(duplicated(tmp)[nrow(tmp)], 1, 0)
  }
}

假设match中只有1和0个值,使用dplyr的一种方法是检查每个caseID、faculty和phase是否在match 1和0中有两个不同的值,并在match为0时将响应替换为0


假设match中只有1和0个值,使用dplyr的一种方法是检查每个caseID、faculty和phase是否在match 1和0中有两个不同的值,并在match为0时将响应替换为0

我们可以使用data.table方法。将“data.frame”转换为“data.table”setDTdf1,按“caseID”、“faculty”、“phase”分组,如果匹配检查的唯一元素长度等于2,则获取该元素的长度,并创建一个二进制列“response”,对于“match”为0的值,将“response”分配给0

library(data.table)
setDT(df1)[, response := +((uniqueN(match) == 2) & match != 0), 
                  .(caseID, faculty, phase)][]
#   resident faculty submittedBy match   caseID phase response
#1:   george   sally      george     1 george_1   pre        1
#2:   george   sally       sally     0 george_1   pre        0
#3:   george   sally      george     1 george_1 intra        0
#4:     jane    carl        jane     1   jane_1   pre        1
#5:     jane    carl        carl     0   jane_1   pre        0
#6:     jane    carl        carl     0   jane_1 intra        0
或者使用带ave的base R

数据 我们可以使用data.table方法。将“data.frame”转换为“data.table”setDTdf1,按“caseID”、“faculty”、“phase”分组,如果匹配检查的唯一元素长度等于2,则获取该元素的长度,并创建一个二进制列“response”,对于“match”为0的值,将“response”分配给0

library(data.table)
setDT(df1)[, response := +((uniqueN(match) == 2) & match != 0), 
                  .(caseID, faculty, phase)][]
#   resident faculty submittedBy match   caseID phase response
#1:   george   sally      george     1 george_1   pre        1
#2:   george   sally       sally     0 george_1   pre        0
#3:   george   sally      george     1 george_1 intra        0
#4:     jane    carl        jane     1   jane_1   pre        1
#5:     jane    carl        carl     0   jane_1   pre        0
#6:     jane    carl        carl     0   jane_1 intra        0
或者使用带ave的base R

数据 另一种数据表方法。连接键变量并检查值是否不在match==0集合中:

另一种数据表方法。连接键变量并检查值是否不在match==0集合中:


我不确定我是否遵守……你能把标准分成单独的要点,以便更好地区分它们,并且在措辞上更清楚一点吗?也许exlicity会解释为什么前几个回答是这样的。@你看到了吗?@RAB我已经根据第1/4行的匹配情况进行了调整?第1行:教员=sally,案例ID=george_1,阶段=pre。这些都不匹配吗?我听不懂你的逻辑。他们认为匹配的是什么?@RAB我正在寻找匹配=1的X行,然后查看匹配=0的任何行是否与第X行的相位、系数值和案例ID第1行和第2行的值相同,因为其中一行匹配=1,另一行匹配=0,但相位、系数值,caseID sameim不确定我是否遵循……你能把标准分成单独的要点,以便更好地区分它们,并且在措辞上更清楚一点吗?也许exlicity会解释为什么前几个回答是这样的。@你看到了吗?@RAB我已经根据第1/4行的匹配情况进行了调整?第1行:教员=sally,案例ID=george_1,阶段=pre。这些都不匹配吗?我听不懂你的逻辑。他们认为匹配的是什么?@RAB我正在寻找匹配=1的X行,然后查看匹配=0的任何行是否与第X行的相位、系数值和案例ID第1行和第2行的值相同,因为其中一行匹配=1,另一行匹配=0,但相位、系数值,和caseID是sameI不断得到一个错误:这个代码的错误我不断得到一个错误:这个代码的错误我爱ave解决方案,非常简洁。你能解释它的工作原理吗?我从没见过它用那个way@RABave可用于替换data.table中dplyr或[,:=,by=]的group by+突变。它的工作方式是我们将第一个参数作为要修改的列/向量传递,这里它是match,随后的参数都是分组变量,直到有趣为止。如果我们没有通过任何的乐趣,默认情况下采取的平均值。好的是它不会改变原来的顺序。在本例中,我们传递匿名函数调用以检查每个组中唯一匹配元素的长度。比赛0&是将值0替换为与0love匹配的ave解决方案,非常简洁。你能解释它的工作原理吗?我从没见过它用那个way@RABave可用于替换data.table中dplyr或[,:=,by=]的group by+突变。它的工作方式是我们将第一个参数作为要修改的列/向量传递,这里它是match,随后的参数都是分组变量,直到有趣为止。如果我们没有通过任何的乐趣,默认情况下采取的平均值。 好的是它不会改变原来的顺序。在本例中,我们传递匿名函数调用以检查每个组中唯一匹配元素的长度。比赛0将替换与0匹配的值0(&I)
df1 <- structure(list(resident = structure(c(1L, 1L, 1L, 2L, 2L, 2L), 
.Label = c("george", 
"jane"), class = "factor"), faculty = structure(c(2L, 2L, 2L, 
1L, 1L, 1L), .Label = c("carl", "sally"), class = "factor"), 
    submittedBy = structure(c(2L, 4L, 2L, 3L, 1L, 1L), .Label = c("carl", 
    "george", "jane", "sally"), class = "factor"), match = c(1L, 
    0L, 1L, 1L, 0L, 0L), caseID = structure(c(1L, 1L, 1L, 2L, 
    2L, 2L), .Label = c("george_1", "jane_1"), class = "factor"), 
    phase = structure(c(2L, 2L, 1L, 2L, 2L, 1L), .Label = c("intra", 
    "pre"), class = "factor")), class = "data.frame", row.names = c(NA, 
-6L))
library(data.table)
setDT(dat)

dat[, response := match==1]
dat[!dat[match==0], on=c("caseID","faculty","phase"), response := FALSE]

dat
#   resident faculty submittedBy match   caseID phase response
#1:   george   sally      george     1 george_1   pre     TRUE
#2:   george   sally       sally     0 george_1   pre    FALSE
#3:   george   sally      george     1 george_1 intra    FALSE
#4:     jane    carl        jane     1   jane_1   pre     TRUE
#5:     jane    carl        carl     0   jane_1   pre    FALSE
#6:     jane    carl        carl     0   jane_1 intra    FALSE