R 循环遍历每一行并比较被迭代行的多列中的值_R

R 循环遍历每一行并比较被迭代行的多列中的值
R 循环遍历每一行并比较被迭代行的多列中的值,r,R,我每年都有以下数据框 ID Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec ABC 0 0 0 1 0 0 0 0 1 0 0 0 DEF 0 0 0 1 1 0 0 0 1 0 0 0 GHI 0 0 0 1 0 1 0 0 0 1 0 0 MNO 0 0
我每年都有以下数据框

ID   Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec
ABC   0  0    0     1    0   0    0   0  1     0   0  0
DEF   0  0    0     1    1   0    0   0  1     0   0  0
GHI   0  0    0     1    0   1    0   0  0     1   0  0
MNO   0  0    0     1    0   1    0   0  1     0   0  0
QAL   0  1    1     1    0   0    1   0  0    1   0  0

我希望遍历每一行，然后找到该列，在该列之后的三列为0。我想得到这样的东西，上面写着至少3个月内没有0的月份：
ID    col1    col2 
ABC   April   Sept  
DEF   May     Sept 
GHI   Jun      N/A
MNO   Sept    N/A
QAL   N/A     N/A

我已经知道了如何遍历向量并获得索引
vec2 <- names(yearly)
# vec is each row of yearly 
  for(i in 1:length(vec)){
if(vec[i]==1){
  if(vec[i+1]+vec[i+2]+vec[i+3]==0){  #I think R automatically takes care of the out of bounds index
    print(vec2[1])
    print(vec2[i+1])
  }
}
 }
    }
  }

vec2由于每行的答案数量可变，我会选择列表。这种方法使用rle
查找零的运行，然后检查运行中是否有超过2个。然后返回这些运行之前月份的名称
# Data
df <- read.table(text = "ID   Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec
ABC   0  0    0     1    0   0    0   0  1     0   0  0
           DEF   0  0    0     1    1   0    0   0  1     0   0  0
           GHI   0  0    0     1    0   1    0   0  0     1   0  0
           MNO   0  0    0     1    0   1    0   0  1     0   0  0
           QAL   0  1    1     1    0   0    1   0  0    1   0  0",
           header = TRUE)

# Repackage as list (rows become elements of list)
df_list <- setNames(split(df[, -1], seq(nrow(df))), rownames(df$ID))

# Count function
morpheus_count <- function(x){
  #Run Length Encoding
  tmp <- rle(x)

  # Return months preceding a run of three (or greater) zeroes
  names(tmp$values)[which(tmp$values==0 & tmp$lengths>2)-1]
}

# Run on list
lapply(df_list, morpheus_count)

由于每行的答案数量可变，所以我选择列表。这种方法使用rle
查找零的运行，然后检查运行中是否有超过2个。然后返回这些运行之前月份的名称
# Data
df <- read.table(text = "ID   Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec
ABC   0  0    0     1    0   0    0   0  1     0   0  0
           DEF   0  0    0     1    1   0    0   0  1     0   0  0
           GHI   0  0    0     1    0   1    0   0  0     1   0  0
           MNO   0  0    0     1    0   1    0   0  1     0   0  0
           QAL   0  1    1     1    0   0    1   0  0    1   0  0",
           header = TRUE)

# Repackage as list (rows become elements of list)
df_list <- setNames(split(df[, -1], seq(nrow(df))), rownames(df$ID))

# Count function
morpheus_count <- function(x){
  #Run Length Encoding
  tmp <- rle(x)

  # Return months preceding a run of three (or greater) zeroes
  names(tmp$values)[which(tmp$values==0 & tmp$lengths>2)-1]
}

# Run on list
lapply(df_list, morpheus_count)

有不同的方法来解决这个问题：
字符串匹配
此方法使用字符串匹配，因此依赖于字符长度为1的值：
library(data.table)
library(magrittr)

yearly[, 
       {
         Reduce(paste0, .SD) %>% 
           stringr::str_locate_all("1000") %>% 
           as.data.table()
       }, 
       .SDcols = -"ID", by = "ID"][
         , .(ID, month = names(yearly)[start + 1L])]

可根据OP的要求将其重塑为宽格式：
yearly[, 
       {
         Reduce(paste0, .SD) %>% 
           stringr::str_locate_all("1000") %>% 
           as.data.table()
       }, 
       .SDcols = -"ID", by = "ID"][
         , .(ID, month = names(yearly)[start + 1L])][
           , dcast(.SD, ID ~ rowid(ID, prefix = "col"))][
             yearly[, ID], on = "ID"]

ID col1 col2
1:ABC四月至九月
2:DEF五月至九月
3：吉军
4:MNO 9月
5:QAL

数据
每年有不同的方法来解决这个问题：
字符串匹配
此方法使用字符串匹配，因此依赖于字符长度为1的值：
library(data.table)
library(magrittr)

yearly[, 
       {
         Reduce(paste0, .SD) %>% 
           stringr::str_locate_all("1000") %>% 
           as.data.table()
       }, 
       .SDcols = -"ID", by = "ID"][
         , .(ID, month = names(yearly)[start + 1L])]

可根据OP的要求将其重塑为宽格式：
yearly[, 
       {
         Reduce(paste0, .SD) %>% 
           stringr::str_locate_all("1000") %>% 
           as.data.table()
       }, 
       .SDcols = -"ID", by = "ID"][
         , .(ID, month = names(yearly)[start + 1L])][
           , dcast(.SD, ID ~ rowid(ID, prefix = "col"))][
             yearly[, ID], on = "ID"]

ID col1 col2
1:ABC四月至九月
2:DEF五月至九月
3：吉军
4:MNO 9月
5:QAL

数据
年度数据：
请注意:

确保数据类型为data.frame
确保仅对0,1
数据应用fun1
。这就是调用df[，-1]
的原因
您可以更改n
内fun1
中的其他条件
数据：
请注意:

确保数据类型为data.frame
确保仅对0,1
数据应用fun1
。这就是调用df[，-1]
的原因
您可以更改n
内fun1
中的其他条件
为什么GHI不是10月6日？？？10月10日之后，你需要30秒，但只有20秒，所以我才知道！为什么GHI不是JUN，OCT？？？在OCT之后，你需要30秒，但是只有20秒，所以我才知道！
library(data.table)
library(magrittr)
# pattern to find matches
tmp <- data.table(1L, 0L, 0L, 0L, month = "")
# column 1 is the ID column
lapply(2:10, function(x) 
{ # rename col names for join of subsequent columns
  setnames(tmp, 1:4, names(yearly)[x:(x+3)])
  # append starting month of sequence
  tmp[, month := names(yearly)[x]]
  # inner join
  yearly[tmp, on = head(names(tmp), -1L), .(ID, month), nomatch = 0L]
}) %>% 
  # convert list to data.table
  rbindlist() %>% 
  # reshape to wide format and append missing ID rows
  dcast(ID ~ rowid(ID, prefix = "col")) %>% 
  .[yearly[, ID], on = "ID"]

    ID  col1 col2
1: ABC April Sept
2: DEF   May Sept
3: GHI   Jun <NA>
4: MNO  Sept <NA>
5: QAL  <NA> <NA>

yearly <- fread(
"ID   Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec
ABC   0  0    0     1    0   0    0   0  1     0   0  0
DEF   0  0    0     1    1   0    0   0  1     0   0  0
GHI   0  0    0     1    0   1    0   0  0     1   0  0
MNO   0  0    0     1    0   1    0   0  1     0   0  0
QAL   0  1    1     1    0   0    1   0  0     1   0  0"
)

df<-data.table::fread("
ID   Jan Feb March April May Jun Jul Aug Sept Oct Nov Dec
ABC   0  0    0     1    0   0    0   0  1     0   0  0
DEF   0  0    0     1    1   0    0   0  1     0   0  0
GHI   0  0    0     1    0   1    0   0  0     1   0  0
MNO   0  0    0     1    0   1    0   0  1     0   0  0
QAL   0  1    1     1    0   0    1   0  0     1   0  0") %>% setDF

library(magrittr)
rowNames <- df[,1,drop=T]
months   <- names(df[,-1])
fun1<-function(x) {
    n      <- 3 #at least 3 zeros (change if needed)
    pos    <- c(-1,cumsum(x)) %>% diff %>% as.logical %>% which
    counts <- table(cumsum(x)) %>% as.numeric %>% {. > n & as.logical(x[pos])}
    return(months[pos[counts]])
}

res <- apply(df[,-1],1,fun1)
names(res) <- rowNames

$ABC
[1] "April" "Sept" 

$DEF
[1] "May"  "Sept"

$GHI
[1] "Jun"

$MNO
[1] "Sept"

$QAL
character(0)