如何在R中找到这些范围的重叠值?
我有一个称为范围的df1,如: 我还有一个data.frame,名为: 基本上我有从chromStart到chromEnd的范围值。我还有一个df2中的范围列表。可行的范围要小得多。我想测试范围,从范围,并确保整个范围内的范围是可行的。我该怎么做 我想要的输出是data.frame,如下所示:如何在R中找到这些范围的重叠值?,r,dataframe,range,R,Dataframe,Range,我有一个称为范围的df1,如: 我还有一个data.frame,名为: 基本上我有从chromStart到chromEnd的范围值。我还有一个df2中的范围列表。可行的范围要小得多。我想测试范围,从范围,并确保整个范围内的范围是可行的。我该怎么做 我想要的输出是data.frame,如下所示: 1 bin chrom chromStart chromEnd name score 2 12 chr1 840000 856723 -5.7648 599 3
1 bin chrom chromStart chromEnd name score
2 12 chr1 840000 856723 -5.7648 599
3 116 chr1 1693001 1739032 -4.8403 473
6 133 chr1 1750780 1880930 -4.8096 469
您可以尝试使用Genomic Ranges软件包
这里我们加载示例输入数据。这是一种不雅观的方式-我知道。。。但我很懒,多行编辑的效果很好。注意:我不知道1列的意思是什么,但我把它保存在了数据中
ranges <-
rbind(
c("2","12","chr1","836780","856723","-5.7648","599"),
c("3","116","chr1","1693001","1739032","-4.8403","473"),
c("4","117","chr1","1750780","1880930","-5.3036","536"),
c("5","121","chr1","2020123","2108890","-4.4165","415")
) %>%
as.data.frame()
colnames(ranges) <-
c("1","bin","chrom","chromStart","chromEnd","name","score")
viable <-
rbind(
c("chr1","840000","890000","1566"),
c("chr1","1690000","1740000","1566"),
c("chr1","1700000","1750000","1566"),
c("chr1","1710000","1760000","1566"),
c("chr1","1720000","1770000","1566"),
c("chr1","1730000","1780000","1566"),
c("chr1","1740000","1790000","1566"),
c("chr1","1750000","1800000","1566"),
c("chr1","1760000","1810000","1566")
) %>%
as.data.frame()
colnames(viable) <-
c("chrom","chromStart","chromEnd","N")
## Need columns to be integers
ranges <-
ranges %>%
tbl_df() %>%
mutate(
chromStart = chromStart %>% as.character %>% as.integer,
chromEnd = chromEnd %>% as.character %>% as.integer
)
viable <-
viable %>%
tbl_df() %>%
mutate(
chromStart = chromStart %>% as.character %>% as.integer,
chromEnd = chromEnd %>% as.character %>% as.integer
)
data.table::foverlaps或package-irange为什么双重过帐?答案已经给出。不同的数字并不意味着这是一个不同的问题。
1 bin chrom chromStart chromEnd name score
2 12 chr1 840000 856723 -5.7648 599
3 116 chr1 1693001 1739032 -4.8403 473
6 133 chr1 1750780 1880930 -4.8096 469
library(dplyr)
library(GenomicRanges)
ranges <-
rbind(
c("2","12","chr1","836780","856723","-5.7648","599"),
c("3","116","chr1","1693001","1739032","-4.8403","473"),
c("4","117","chr1","1750780","1880930","-5.3036","536"),
c("5","121","chr1","2020123","2108890","-4.4165","415")
) %>%
as.data.frame()
colnames(ranges) <-
c("1","bin","chrom","chromStart","chromEnd","name","score")
viable <-
rbind(
c("chr1","840000","890000","1566"),
c("chr1","1690000","1740000","1566"),
c("chr1","1700000","1750000","1566"),
c("chr1","1710000","1760000","1566"),
c("chr1","1720000","1770000","1566"),
c("chr1","1730000","1780000","1566"),
c("chr1","1740000","1790000","1566"),
c("chr1","1750000","1800000","1566"),
c("chr1","1760000","1810000","1566")
) %>%
as.data.frame()
colnames(viable) <-
c("chrom","chromStart","chromEnd","N")
## Need columns to be integers
ranges <-
ranges %>%
tbl_df() %>%
mutate(
chromStart = chromStart %>% as.character %>% as.integer,
chromEnd = chromEnd %>% as.character %>% as.integer
)
viable <-
viable %>%
tbl_df() %>%
mutate(
chromStart = chromStart %>% as.character %>% as.integer,
chromEnd = chromEnd %>% as.character %>% as.integer
)
gr.ranges <-
makeGRangesFromDataFrame(ranges,
keep.extra.columns = T,
seqnames.field = "chrom",
start.field = "chromStart",
end.field = "chromEnd")
gr.viable <-
makeGRangesFromDataFrame(viable,
keep.extra.columns = T,
seqnames.field = "chrom",
start.field = "chromStart",
end.field = "chromEnd")
# To find the intersects
gr.intersect <-
GenomicRanges::intersect(gr.ranges, gr.viable)
# For linking up the non- chrom,start,end columns
gr.hits <-
GenomicRanges::findOverlaps(gr.intersect, gr.ranges)
output <-
gr.intersect[queryHits(gr.hits)]
mcols(output) <-
mcols(gr.ranges[subjectHits(gr.hits)])
output
# Reformat to dataframe
output %>%
as.data.frame() %>%
select(`1` = X1, bin, chrom = seqnames, chromStart = start, chromEnd = end, name, score)