在R数据帧中将行值拆分为多行
我有一个R数据帧,如下所示-在R数据帧中将行值拆分为多行,r,dataframe,split,R,Dataframe,Split,我有一个R数据帧,如下所示- df <- data.frame( FDR = c (0.009, 0.007, 0.007), Probe_ID = c("1555272_at", "1557203_at", "1557384_at"), Gene.Symbol = c("RSPH10B2///RSPH10B","PABPC1L2B///PABPC1L2A","LOC100506639///ZNF131"), Gene.ID = c("728194///22
df <- data.frame(
FDR = c (0.009, 0.007, 0.007),
Probe_ID = c("1555272_at", "1557203_at", "1557384_at"),
Gene.Symbol = c("RSPH10B2///RSPH10B","PABPC1L2B///PABPC1L2A","LOC100506639///ZNF131"),
Gene.ID = c("728194///222967","645974///340529","100506639///7690"))
df
FDR Probe_ID Gene.Symbol Gene.ID
1 0.009 1555272_at RSPH10B2///RSPH10B 728194///222967
2 0.007 1557203_at PABPC1L2B///PABPC1L2A 645974///340529
3 0.007 1557384_at LOC100506639///ZNF131 100506639///7690
我试过下面的代码,但不起作用,生成了包含重复元素的列-
s <- strsplit(gsub("///","",df$Gene.symbol),", ",fixed = TRUE)
res <- data.frame(Id = rep(df$Gene.symbol, lengths(s)), result = unlist(s))
ans <- merge(annotated,res)
s带有dplyr的解决方案
:
library(dplyr)
df %>%
separate_rows(Gene.Symbol, Gene.ID, sep = "///")
# A tibble: 6 x 4
FDR Probe_ID Gene.Symbol Gene.ID
<dbl> <chr> <chr> <chr>
1 0.009 1555272_at RSPH10B2 728194
2 0.009 1555272_at RSPH10B 222967
3 0.007 1557203_at PABPC1L2B 645974
4 0.007 1557203_at PABPC1L2A 340529
5 0.007 1557384_at LOC100506639 100506639
6 0.007 1557384_at ZNF131 7690
库(dplyr)
df%>%
单独的_行(Gene.Symbol、Gene.ID、sep=“/”)
#一个tibble:6x4
FDR探针_ID Gene.Symbol Gene.ID
RSPH10B2 728194处的1 0.009 1555272_
RSPH10B 222967处的2 0.009 1555272_
PABPC1L2B 645974处的3 0.007 1557203_
4 0.007 1557203_,位于PABPC1L2A 340529
5 0.007 1557384_,位置100506639 100506639
ZNF131 7690处6 0.007 1557384_
使用strsplit
和by
res <- do.call(rbind, by(df, df$Probe_ID, function(x) {
cbind(`rownames<-`(x[,1:2], NULL), sapply(x[,-(1:2)], strsplit, "///"))
}))
res
# FDR Probe_ID Gene.Symbol Gene.ID
# 1555272_at.1 0.009 1555272_at RSPH10B2 728194
# 1555272_at.2 0.009 1555272_at RSPH10B 222967
# 1557203_at.1 0.007 1557203_at PABPC1L2B 645974
# 1557203_at.2 0.007 1557203_at PABPC1L2A 340529
# 1557384_at.1 0.007 1557384_at LOC100506639 100506639
# 1557384_at.2 0.007 1557384_at ZNF131 7690
请尝试gsub(“\\/\/\\\\/”,“”,df$Gene.Symbol)
因为/
是一个特殊字符,在gsub或regex中的任何特殊字符之前添加\`非常重要。有关详细信息,请参见R-console中的
?regex`
res <- do.call(rbind, by(df, df$Probe_ID, function(x) {
cbind(`rownames<-`(x[,1:2], NULL), sapply(x[,-(1:2)], strsplit, "///"))
}))
res
# FDR Probe_ID Gene.Symbol Gene.ID
# 1555272_at.1 0.009 1555272_at RSPH10B2 728194
# 1555272_at.2 0.009 1555272_at RSPH10B 222967
# 1557203_at.1 0.007 1557203_at PABPC1L2B 645974
# 1557203_at.2 0.007 1557203_at PABPC1L2A 340529
# 1557384_at.1 0.007 1557384_at LOC100506639 100506639
# 1557384_at.2 0.007 1557384_at ZNF131 7690
dat <- structure(list(FDR = c(0.009, 0.007, 0.007), Probe_ID = c("1555272_at",
"1557203_at", "1557384_at"), Gene.Symbol = c("RSPH10B2///RSPH10B",
"PABPC1L2B///PABPC1L2A", "LOC100506639///ZNF131"), Gene.ID = c("728194///222967",
"645974///340529", "100506639///7690")), class = "data.frame", row.names = c(NA,
-3L))