R 一行中文本的多个case_
数据 我有一个数据框,如下所示:R 一行中文本的多个case_,r,dplyr,R,Dplyr,数据 我有一个数据框,如下所示: structure(list(EndoscopyEventRaw = c("", "", "oesophagus:rfa;oesophagus:nac", "oesophagus:rfa;oesophagus:nac", "oesophagus:brushings", "oesophagus:rfa;oesophagus:emr;oesophagus:nac", "oesophagus:apc", "oesophagus:apc;oesophagus:nac
structure(list(EndoscopyEventRaw = c("", "", "oesophagus:rfa;oesophagus:nac",
"oesophagus:rfa;oesophagus:nac", "oesophagus:brushings", "oesophagus:rfa;oesophagus:emr;oesophagus:nac",
"oesophagus:apc", "oesophagus:apc;oesophagus:nac", "oesophagus:apc",
"")), row.names = c(NA, 10L), class = "data.frame")
dataframe<-dataframe %>% mutate(OPCS4ZCode2 = case_when(
grepl("nac",EndoscopyEventRaw)~ "CodeForNAC",
grepl("apc",EndoscopyEventRaw) ~ "CodeForAPC",
grepl("rfa",EndoscopyEventRaw) ~ "CodeForRFA",
grepl("grasp",EndoscopyEventRaw) ~ "CodeForGrasp"
),
TRUE ~ ""
)
目标
我想将这方面的内容提取到一个新的专栏中,可能使用case\u,当
基于以下规则时:
structure(list(EndoscopyEventRaw = c("", "", "oesophagus:rfa;oesophagus:nac",
"oesophagus:rfa;oesophagus:nac", "oesophagus:brushings", "oesophagus:rfa;oesophagus:emr;oesophagus:nac",
"oesophagus:apc", "oesophagus:apc;oesophagus:nac", "oesophagus:apc",
"")), row.names = c(NA, 10L), class = "data.frame")
dataframe<-dataframe %>% mutate(OPCS4ZCode2 = case_when(
grepl("nac",EndoscopyEventRaw)~ "CodeForNAC",
grepl("apc",EndoscopyEventRaw) ~ "CodeForAPC",
grepl("rfa",EndoscopyEventRaw) ~ "CodeForRFA",
grepl("grasp",EndoscopyEventRaw) ~ "CodeForGrasp"
),
TRUE ~ ""
)
当我使用
case\u When
时,它在找到第一个匹配项时停止查找。当找到匹配项时,是否有一种方法可以匹配上述所有目标,同时使用或不使用case\u?是,case\u跳过其他条件。一种方法是将数据分成不同的行,然后在
时使用带有case\u的条件,并将数据汇总回来
library(dplyr)
df %>%
mutate(row = row_number()) %>%
tidyr::separate_rows(EndoscopyEventRaw, sep = ";") %>%
mutate(OPCS4ZCode2 = case_when(grepl("nac",EndoscopyEventRaw)~ "CodeForNAC",
grepl("apc",EndoscopyEventRaw) ~ "CodeForAPC",
grepl("rfa",EndoscopyEventRaw) ~ "CodeForRFA",
grepl("grasp",EndoscopyEventRaw) ~ "CodeForGrasp",
TRUE ~ "")) %>%
group_by(row) %>%
summarise(OPCS4ZCode2 = toString(OPCS4ZCode2)) %>%
select(-row)
# A tibble: 10 x 1
# OPCS4ZCode2
# <chr>
# 1 ""
# 2 ""
# 3 CodeForRFA, CodeForNAC
# 4 CodeForRFA, CodeForNAC
# 5 ""
# 6 CodeForRFA, , CodeForNAC
# 7 CodeForAPC
# 8 CodeForAPC, CodeForNAC
# 9 CodeForAPC
#10 ""
库(dplyr)
df%>%
变异(行=行编号())%>%
tidyr::单独的_行(sep=“;”)%>%
突变(OPCS4ZCode2=case_当(grepl(“nac”,EndoscopyEventRaw)~“CodeForNAC”,
grepl(“apc”,内窥镜检查法)~“CodeForAPC”,
grepl(“rfa”,内窥镜检查法)~“CodeForRFA”,
格雷普(“抓握”,内窥镜检查法)~“密码破解”,
真~'')%>%
分组依据(行)%>%
总结(OPCS4ZCode2=toString(OPCS4ZCode2))%>%
选择(-行)
#一个tibble:10x1
#OPCS4ZCode2
#
# 1 ""
# 2 ""
#3 CodeForRFA,CodeForNAC
#4 CodeForRFA,CodeForNAC
# 5 ""
#6 CodeForRFA,CodeForNAC
#7编码孔
#8 CodeForAPC,CodeForNAC
#9码forapc
#10 ""
是,在找到匹配项后跳过其他条件时的情况。一种方法是将数据分成不同的行,然后在
时使用带有case\u的条件,并将数据汇总回来
library(dplyr)
df %>%
mutate(row = row_number()) %>%
tidyr::separate_rows(EndoscopyEventRaw, sep = ";") %>%
mutate(OPCS4ZCode2 = case_when(grepl("nac",EndoscopyEventRaw)~ "CodeForNAC",
grepl("apc",EndoscopyEventRaw) ~ "CodeForAPC",
grepl("rfa",EndoscopyEventRaw) ~ "CodeForRFA",
grepl("grasp",EndoscopyEventRaw) ~ "CodeForGrasp",
TRUE ~ "")) %>%
group_by(row) %>%
summarise(OPCS4ZCode2 = toString(OPCS4ZCode2)) %>%
select(-row)
# A tibble: 10 x 1
# OPCS4ZCode2
# <chr>
# 1 ""
# 2 ""
# 3 CodeForRFA, CodeForNAC
# 4 CodeForRFA, CodeForNAC
# 5 ""
# 6 CodeForRFA, , CodeForNAC
# 7 CodeForAPC
# 8 CodeForAPC, CodeForNAC
# 9 CodeForAPC
#10 ""
库(dplyr)
df%>%
变异(行=行编号())%>%
tidyr::单独的_行(sep=“;”)%>%
突变(OPCS4ZCode2=case_当(grepl(“nac”,EndoscopyEventRaw)~“CodeForNAC”,
grepl(“apc”,内窥镜检查法)~“CodeForAPC”,
grepl(“rfa”,内窥镜检查法)~“CodeForRFA”,
格雷普(“抓握”,内窥镜检查法)~“密码破解”,
真~'')%>%
分组依据(行)%>%
总结(OPCS4ZCode2=toString(OPCS4ZCode2))%>%
选择(-行)
#一个tibble:10x1
#OPCS4ZCode2
#
# 1 ""
# 2 ""
#3 CodeForRFA,CodeForNAC
#4 CodeForRFA,CodeForNAC
# 5 ""
#6 CodeForRFA,CodeForNAC
#7编码孔
#8 CodeForAPC,CodeForNAC
#9码forapc
#10 ""