如何在R中将两组数据合并成两列?
我在下面有这些数据。我想将所有如何在R中将两组数据合并成两列?,r,data.table,melt,R,Data.table,Melt,我在下面有这些数据。我想将所有No\u of.readscloumns都融化在一列中,将所有\u污染物列融化在另一列中。因此,最终的数据帧将具有稀释样本,无任何读取和污染列。我试着分两步来做,但这会让我反复观察。正确的方法是什么 代码: test.dput.melled使用tidyr::gather和dplyr: test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads")
No\u of.reads
cloumns都融化在一列中,将所有\u污染物
列融化在另一列中。因此,最终的数据帧将具有稀释样本
,无任何读取
和污染
列。我试着分两步来做,但这会让我反复观察。正确的方法是什么
代码:
test.dput.melled使用tidyr::gather
和dplyr
:
test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads"))
test.melted.NA12878<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12878",] %>%
gather(key="contamination_type",value="contamination",
contains("contamination_of_NA12878"))
test.melted.NA12877<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12877",] %>%
gather(key="contamination_type",value="contamination",
contains("contamination_of_NA12877"))
test.melted.full<-rbind(test.melted.NA12877[,c(-2:-3)],test.melted.NA12878[,c(-2:-3)])
test.melt因为您标记了data.table
和melt
library(magrittr)
library(data.table)
setDT(test.dput)
n.reads <-
test.dput[, grep('diluted|reads', names(test.dput)), with = F] %>%
melt(1, variable.name = 'Which_No_of_reads',
value.name = 'No_of_reads') %>%
.[, Which_No_of_reads := gsub('No_of_reads_from_', '', Which_No_of_reads)]
contam <-
test.dput[, grep('diluted|contamination', names(test.dput)), with = F] %>%
melt(1, variable.name = 'Which_contamination',
value.name = '_contamination_') %>%
.[, Which_contamination := gsub('contamination_of_', '', Which_contamination)]
cbind(n.reads, contam) %>%
.[, unique(names(.)), with = F]
# diluted_sample Which_No_of_reads No_of_reads Which_contamination _contamination_
# 1: 100% NA12878 15,000,000 tEst_NA12878 99.60%
# 2: 95% NA12878 14,250,000 tEst_NA12878 99.10%
# 3: 90% NA12878 13,500,000 tEst_NA12878 96.80%
# 4: 85% NA12878 12,750,000 tEst_NA12878 92.60%
# 5: 80% NA12878 12,000,000 tEst_NA12878 88%
# 6: 75% NA12878 11,250,000 tEst_NA12878 82.60%
# 7: 100% NA12877 0 pair_NA12878 100.00%
# 8: 95% NA12877 750,000 pair_NA12878 94.15%
# 9: 90% NA12877 1,500,000 pair_NA12878 88.72%
# 10: 85% NA12877 2,250,000 pair_NA12878 83.36%
# 11: 80% NA12877 3,000,000 pair_NA12878 78.20%
# 12: 75% NA12877 3,750,000 pair_NA12878 73.08%
# 13: 100% NA12878 15,000,000 tEst_NA12877 0.10%
# 14: 95% NA12878 14,250,000 tEst_NA12877 7%
# 15: 90% NA12878 13,500,000 tEst_NA12877 13.60%
# 16: 85% NA12878 12,750,000 tEst_NA12877 20.10%
# 17: 80% NA12878 12,000,000 tEst_NA12877 26.20%
# 18: 75% NA12878 11,250,000 tEst_NA12877 32.10%
# 19: 100% NA12877 0 pair_NA12877 0.10%
# 20: 95% NA12877 750,000 pair_NA12877 5.21%
# 21: 90% NA12877 1,500,000 pair_NA12877 10.50%
# 22: 85% NA12877 2,250,000 pair_NA12877 15.85%
# 23: 80% NA12877 3,000,000 pair_NA12877 20.92%
# 24: 75% NA12877 3,750,000 pair_NA12877 26.04%
库(magrittr)
库(数据表)
setDT(测试dput)
n、 读%
melt(1,variable.name='其中的\u不读取',
value.name='没有读取的读取'%>%
[,Which_No_of_reads:=gsub('No_of_reads_from_','',Which_No_of_reads)]
康塔姆%
melt(1,variable.name='哪个污染',
value.name=''u污染''%>%
[,哪种污染:=gsub('污染','',哪种污染)]
cbind(n.reads,contam)%>%
[,唯一(名称(.)),带=F]
#稀释后的样品,其读数为:其读数为:其读数为:其读数为:其污染_
#1:100%NA12878 15000000测试_NA12878 99.60%
#2:95%NA12878 14250000测试_NA12878 99.10%
#3:90%NA12878 13500000测试_NA12878 96.80%
#4:85%NA12878 12750000测试_NA12878 92.60%
#5:80%NA12878 12000000测试_NA12878 88%
#6:75%NA12878 11250000测试_NA12878 82.60%
#7:100%NA12877 0对\u NA12878 100.00%
#8:95%NA12877750000双\u NA1287894.15%
#9:90%NA12877 1500000对\u NA12878 88.72%
#10:85%NA12877 225000双\u NA12878 83.36%
#11:80%NA12877 300万对\u NA12878 78.20%
#12:75%NA12877 3750000双\u NA12878 73.08%
#13:100%NA12878 15000000测试_NA12877 0.10%
#14:95%NA12878 14250000测试_NA12877 7%
#15:90%NA12878 13500000测试_NA12877 13.60%
#16:85%NA12878 12750000测试_NA12877 20.10%
#17:80%NA12878 12000000测试_NA12877 26.20%
#18:75%NA12878 11250000测试_NA12877 32.10%
#19:100%NA12877 0对\u NA12877 0.10%
#20:95%NA12877750000双\u NA128775.21%
#21:90%NA12877 1500000双\u NA12877 10.50%
#22:85%NA12877 225000双\u NA12877 15.85%
#23:80%NA12877 300万对\u NA12877 20.92%
#24:75%NA12877 3750000双\u NA12877 26.04%
你想要读取和污染的所有可能组合吗?@iod我不想要组合,只想要长格式的数据,包含读取和污染列。我现在不在电脑旁,但我的解决方案是创建一个包含熔化的读取列的df和一个包含熔化的污染列的df,然后左键连接。谢谢,但这仍然给出了过度表示的值。你能指出这些吗?我不知道你的意思更新了我的解决方案完美!现在终于可以了。我不知道为什么您的解决方案会给我类似于上面代码的结果,但它不起作用。然而,iod的解决方案对我有效。无论如何谢谢你!
test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads"))
test.melted.NA12878<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12878",] %>%
gather(key="contamination_type",value="contamination",
contains("contamination_of_NA12878"))
test.melted.NA12877<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12877",] %>%
gather(key="contamination_type",value="contamination",
contains("contamination_of_NA12877"))
test.melted.full<-rbind(test.melted.NA12877[,c(-2:-3)],test.melted.NA12878[,c(-2:-3)])
library(magrittr)
library(data.table)
setDT(test.dput)
n.reads <-
test.dput[, grep('diluted|reads', names(test.dput)), with = F] %>%
melt(1, variable.name = 'Which_No_of_reads',
value.name = 'No_of_reads') %>%
.[, Which_No_of_reads := gsub('No_of_reads_from_', '', Which_No_of_reads)]
contam <-
test.dput[, grep('diluted|contamination', names(test.dput)), with = F] %>%
melt(1, variable.name = 'Which_contamination',
value.name = '_contamination_') %>%
.[, Which_contamination := gsub('contamination_of_', '', Which_contamination)]
cbind(n.reads, contam) %>%
.[, unique(names(.)), with = F]
# diluted_sample Which_No_of_reads No_of_reads Which_contamination _contamination_
# 1: 100% NA12878 15,000,000 tEst_NA12878 99.60%
# 2: 95% NA12878 14,250,000 tEst_NA12878 99.10%
# 3: 90% NA12878 13,500,000 tEst_NA12878 96.80%
# 4: 85% NA12878 12,750,000 tEst_NA12878 92.60%
# 5: 80% NA12878 12,000,000 tEst_NA12878 88%
# 6: 75% NA12878 11,250,000 tEst_NA12878 82.60%
# 7: 100% NA12877 0 pair_NA12878 100.00%
# 8: 95% NA12877 750,000 pair_NA12878 94.15%
# 9: 90% NA12877 1,500,000 pair_NA12878 88.72%
# 10: 85% NA12877 2,250,000 pair_NA12878 83.36%
# 11: 80% NA12877 3,000,000 pair_NA12878 78.20%
# 12: 75% NA12877 3,750,000 pair_NA12878 73.08%
# 13: 100% NA12878 15,000,000 tEst_NA12877 0.10%
# 14: 95% NA12878 14,250,000 tEst_NA12877 7%
# 15: 90% NA12878 13,500,000 tEst_NA12877 13.60%
# 16: 85% NA12878 12,750,000 tEst_NA12877 20.10%
# 17: 80% NA12878 12,000,000 tEst_NA12877 26.20%
# 18: 75% NA12878 11,250,000 tEst_NA12877 32.10%
# 19: 100% NA12877 0 pair_NA12877 0.10%
# 20: 95% NA12877 750,000 pair_NA12877 5.21%
# 21: 90% NA12877 1,500,000 pair_NA12877 10.50%
# 22: 85% NA12877 2,250,000 pair_NA12877 15.85%
# 23: 80% NA12877 3,000,000 pair_NA12877 20.92%
# 24: 75% NA12877 3,750,000 pair_NA12877 26.04%