如何在dplyr mutate中将向量分配到多个变量中
我有以下数据框:如何在dplyr mutate中将向量分配到多个变量中,r,regex,dplyr,R,Regex,Dplyr,我有以下数据框: library(tidyverse) dat <-structure(list(motif_name_binned = c("Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1", "Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2", "Ddit3::Cebpa/MA0019.1/Jaspar.insti
library(tidyverse)
dat <-structure(list(motif_name_binned = c("Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1",
"Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2",
"Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin3"
), motif_score = c(6.816695, 6.816695, 6.816695)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("motif_name_binned",
"motif_score"))
dat
给
# A tibble: 3 x 5
motif_name_binned motif_score motif inst binno
<chr> <dbl> <chr> <chr> <int>
1 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1 6.816695 Ddit3::Cebpa chr1:183286845-183287245 1
2 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2 6.816695 Ddit3::Cebpa chr1:183286845-183287245 2
3 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin3 6.816695 Ddit3::Cebpa chr1:183286845-183287245 3
我如何在dplyr mutate中合并这个稍后的多功能正则表达式?您可以使用tidyr::extract将正则表达式中的捕获组转换为新列:
library(tidyr)
dat %>%
extract(motif_name_binned, c('motif', 'inst', 'binno'), regex = "^(.*?)\\/.*?\\/.*?\\.instid_(.*?)\\.bin(\\d+)", remove = FALSE)
# A tibble: 3 x 5
# motif_name_binned motif inst binno motif_score
#* <chr> <chr> <chr> <chr> <dbl>
#1 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1 Ddit3::Cebpa chr1:183286845-183287245 1 6.816695
#2 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2 Ddit3::Cebpa chr1:183286845-183287245 2 6.816695
#3 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin3 Ddit3::Cebpa chr1:183286845-183287245 3 6.816695
# A tibble: 3 x 5
motif_name_binned motif_score motif inst binno
<chr> <dbl> <chr> <chr> <int>
1 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1 6.816695 Ddit3::Cebpa chr1:183286845-183287245 1
2 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2 6.816695 Ddit3::Cebpa chr1:183286845-183287245 2
3 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin3 6.816695 Ddit3::Cebpa chr1:183286845-183287245 3
str_match(motif_name_binned,"^(.*?)\\/.*?\\/.*?\\.instid_(.*?)\\.bin(\\d+)")[,c(2,3,4)]
library(tidyr)
dat %>%
extract(motif_name_binned, c('motif', 'inst', 'binno'), regex = "^(.*?)\\/.*?\\/.*?\\.instid_(.*?)\\.bin(\\d+)", remove = FALSE)
# A tibble: 3 x 5
# motif_name_binned motif inst binno motif_score
#* <chr> <chr> <chr> <chr> <dbl>
#1 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin1 Ddit3::Cebpa chr1:183286845-183287245 1 6.816695
#2 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin2 Ddit3::Cebpa chr1:183286845-183287245 2 6.816695
#3 Ddit3::Cebpa/MA0019.1/Jaspar.instid_chr1:183286845-183287245.bin3 Ddit3::Cebpa chr1:183286845-183287245 3 6.816695