Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/75.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在R中将两组数据合并成两列?_R_Data.table_Melt - Fatal编程技术网

如何在R中将两组数据合并成两列?

如何在R中将两组数据合并成两列?,r,data.table,melt,R,Data.table,Melt,我在下面有这些数据。我想将所有No\u of.readscloumns都融化在一列中,将所有\u污染物列融化在另一列中。因此,最终的数据帧将具有稀释样本,无任何读取和污染列。我试着分两步来做,但这会让我反复观察。正确的方法是什么 代码: test.dput.melled使用tidyr::gather和dplyr: test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads")

我在下面有这些数据。我想将所有
No\u of.reads
cloumns都融化在一列中,将所有
\u污染物
列融化在另一列中。因此,最终的数据帧将具有
稀释样本
无任何读取
污染
列。我试着分两步来做,但这会让我反复观察。正确的方法是什么

代码:


test.dput.melled使用
tidyr::gather
dplyr

test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads"))
test.melted.NA12878<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12878",] %>% 
  gather(key="contamination_type",value="contamination",
         contains("contamination_of_NA12878"))
test.melted.NA12877<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12877",] %>% 
  gather(key="contamination_type",value="contamination",
         contains("contamination_of_NA12877"))
test.melted.full<-rbind(test.melted.NA12877[,c(-2:-3)],test.melted.NA12878[,c(-2:-3)])

test.melt因为您标记了
data.table
melt

library(magrittr)
library(data.table)
setDT(test.dput)


n.reads <- 
  test.dput[, grep('diluted|reads', names(test.dput)), with = F] %>% 
    melt(1, variable.name = 'Which_No_of_reads',
            value.name    = 'No_of_reads') %>% 
    .[, Which_No_of_reads := gsub('No_of_reads_from_', '', Which_No_of_reads)]

contam <- 
  test.dput[, grep('diluted|contamination', names(test.dput)), with = F] %>% 
    melt(1, variable.name = 'Which_contamination',
            value.name    = '_contamination_') %>% 
    .[, Which_contamination := gsub('contamination_of_', '', Which_contamination)]

cbind(n.reads, contam) %>% 
  .[, unique(names(.)), with = F]

#    diluted_sample Which_No_of_reads No_of_reads Which_contamination _contamination_
#  1:           100%           NA12878  15,000,000        tEst_NA12878          99.60%
#  2:            95%           NA12878  14,250,000        tEst_NA12878          99.10%
#  3:            90%           NA12878  13,500,000        tEst_NA12878          96.80%
#  4:            85%           NA12878  12,750,000        tEst_NA12878          92.60%
#  5:            80%           NA12878  12,000,000        tEst_NA12878             88%
#  6:            75%           NA12878  11,250,000        tEst_NA12878          82.60%
#  7:           100%           NA12877           0        pair_NA12878         100.00%
#  8:            95%           NA12877     750,000        pair_NA12878          94.15%
#  9:            90%           NA12877   1,500,000        pair_NA12878          88.72%
# 10:            85%           NA12877   2,250,000        pair_NA12878          83.36%
# 11:            80%           NA12877   3,000,000        pair_NA12878          78.20%
# 12:            75%           NA12877   3,750,000        pair_NA12878          73.08%
# 13:           100%           NA12878  15,000,000        tEst_NA12877           0.10%
# 14:            95%           NA12878  14,250,000        tEst_NA12877              7%
# 15:            90%           NA12878  13,500,000        tEst_NA12877          13.60%
# 16:            85%           NA12878  12,750,000        tEst_NA12877          20.10%
# 17:            80%           NA12878  12,000,000        tEst_NA12877          26.20%
# 18:            75%           NA12878  11,250,000        tEst_NA12877          32.10%
# 19:           100%           NA12877           0        pair_NA12877           0.10%
# 20:            95%           NA12877     750,000        pair_NA12877           5.21%
# 21:            90%           NA12877   1,500,000        pair_NA12877          10.50%
# 22:            85%           NA12877   2,250,000        pair_NA12877          15.85%
# 23:            80%           NA12877   3,000,000        pair_NA12877          20.92%
# 24:            75%           NA12877   3,750,000        pair_NA12877          26.04%
库(magrittr)
库(数据表)
setDT(测试dput)
n、 读%
melt(1,variable.name='其中的\u不读取',
value.name='没有读取的读取'%>%
[,Which_No_of_reads:=gsub('No_of_reads_from_','',Which_No_of_reads)]
康塔姆%
melt(1,variable.name='哪个污染',
value.name=''u污染''%>%
[,哪种污染:=gsub('污染','',哪种污染)]
cbind(n.reads,contam)%>%
[,唯一(名称(.)),带=F]
#稀释后的样品,其读数为:其读数为:其读数为:其读数为:其污染_
#1:100%NA12878 15000000测试_NA12878 99.60%
#2:95%NA12878 14250000测试_NA12878 99.10%
#3:90%NA12878 13500000测试_NA12878 96.80%
#4:85%NA12878 12750000测试_NA12878 92.60%
#5:80%NA12878 12000000测试_NA12878 88%
#6:75%NA12878 11250000测试_NA12878 82.60%
#7:100%NA12877 0对\u NA12878 100.00%
#8:95%NA12877750000双\u NA1287894.15%
#9:90%NA12877 1500000对\u NA12878 88.72%
#10:85%NA12877 225000双\u NA12878 83.36%
#11:80%NA12877 300万对\u NA12878 78.20%
#12:75%NA12877 3750000双\u NA12878 73.08%
#13:100%NA12878 15000000测试_NA12877 0.10%
#14:95%NA12878 14250000测试_NA12877 7%
#15:90%NA12878 13500000测试_NA12877 13.60%
#16:85%NA12878 12750000测试_NA12877 20.10%
#17:80%NA12878 12000000测试_NA12877 26.20%
#18:75%NA12878 11250000测试_NA12877 32.10%
#19:100%NA12877 0对\u NA12877 0.10%
#20:95%NA12877750000双\u NA128775.21%
#21:90%NA12877 1500000双\u NA12877 10.50%
#22:85%NA12877 225000双\u NA12877 15.85%
#23:80%NA12877 300万对\u NA12877 20.92%
#24:75%NA12877 3750000双\u NA12877 26.04%

你想要读取和污染的所有可能组合吗?@iod我不想要组合,只想要长格式的数据,包含读取和污染列。我现在不在电脑旁,但我的解决方案是创建一个包含熔化的读取列的df和一个包含熔化的污染列的df,然后左键连接。谢谢,但这仍然给出了过度表示的值。你能指出这些吗?我不知道你的意思更新了我的解决方案完美!现在终于可以了。我不知道为什么您的解决方案会给我类似于上面代码的结果,但它不起作用。然而,iod的解决方案对我有效。无论如何谢谢你!
test.melted<-gather(test.dput,key="reads_source",value="reads",starts_with("No_of_reads"))
test.melted.NA12878<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12878",] %>% 
  gather(key="contamination_type",value="contamination",
         contains("contamination_of_NA12878"))
test.melted.NA12877<-test.melted[test.melted$reads_source=="No_of_reads_from_NA12877",] %>% 
  gather(key="contamination_type",value="contamination",
         contains("contamination_of_NA12877"))
test.melted.full<-rbind(test.melted.NA12877[,c(-2:-3)],test.melted.NA12878[,c(-2:-3)])
library(magrittr)
library(data.table)
setDT(test.dput)


n.reads <- 
  test.dput[, grep('diluted|reads', names(test.dput)), with = F] %>% 
    melt(1, variable.name = 'Which_No_of_reads',
            value.name    = 'No_of_reads') %>% 
    .[, Which_No_of_reads := gsub('No_of_reads_from_', '', Which_No_of_reads)]

contam <- 
  test.dput[, grep('diluted|contamination', names(test.dput)), with = F] %>% 
    melt(1, variable.name = 'Which_contamination',
            value.name    = '_contamination_') %>% 
    .[, Which_contamination := gsub('contamination_of_', '', Which_contamination)]

cbind(n.reads, contam) %>% 
  .[, unique(names(.)), with = F]

#    diluted_sample Which_No_of_reads No_of_reads Which_contamination _contamination_
#  1:           100%           NA12878  15,000,000        tEst_NA12878          99.60%
#  2:            95%           NA12878  14,250,000        tEst_NA12878          99.10%
#  3:            90%           NA12878  13,500,000        tEst_NA12878          96.80%
#  4:            85%           NA12878  12,750,000        tEst_NA12878          92.60%
#  5:            80%           NA12878  12,000,000        tEst_NA12878             88%
#  6:            75%           NA12878  11,250,000        tEst_NA12878          82.60%
#  7:           100%           NA12877           0        pair_NA12878         100.00%
#  8:            95%           NA12877     750,000        pair_NA12878          94.15%
#  9:            90%           NA12877   1,500,000        pair_NA12878          88.72%
# 10:            85%           NA12877   2,250,000        pair_NA12878          83.36%
# 11:            80%           NA12877   3,000,000        pair_NA12878          78.20%
# 12:            75%           NA12877   3,750,000        pair_NA12878          73.08%
# 13:           100%           NA12878  15,000,000        tEst_NA12877           0.10%
# 14:            95%           NA12878  14,250,000        tEst_NA12877              7%
# 15:            90%           NA12878  13,500,000        tEst_NA12877          13.60%
# 16:            85%           NA12878  12,750,000        tEst_NA12877          20.10%
# 17:            80%           NA12878  12,000,000        tEst_NA12877          26.20%
# 18:            75%           NA12878  11,250,000        tEst_NA12877          32.10%
# 19:           100%           NA12877           0        pair_NA12877           0.10%
# 20:            95%           NA12877     750,000        pair_NA12877           5.21%
# 21:            90%           NA12877   1,500,000        pair_NA12877          10.50%
# 22:            85%           NA12877   2,250,000        pair_NA12877          15.85%
# 23:            80%           NA12877   3,000,000        pair_NA12877          20.92%
# 24:            75%           NA12877   3,750,000        pair_NA12877          26.04%