Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/76.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在R中找到这些范围的重叠值?_R_Dataframe_Range - Fatal编程技术网

如何在R中找到这些范围的重叠值?

如何在R中找到这些范围的重叠值?,r,dataframe,range,R,Dataframe,Range,我有一个称为范围的df1,如: 我还有一个data.frame,名为: 基本上我有从chromStart到chromEnd的范围值。我还有一个df2中的范围列表。可行的范围要小得多。我想测试范围,从范围,并确保整个范围内的范围是可行的。我该怎么做 我想要的输出是data.frame,如下所示: 1 bin chrom chromStart chromEnd name score 2 12 chr1 840000 856723 -5.7648 599 3

我有一个称为范围的df1,如:

我还有一个data.frame,名为:

基本上我有从chromStart到chromEnd的范围值。我还有一个df2中的范围列表。可行的范围要小得多。我想测试范围,从范围,并确保整个范围内的范围是可行的。我该怎么做

我想要的输出是data.frame,如下所示:

1    bin chrom chromStart  chromEnd    name score
2     12  chr1   840000    856723    -5.7648   599
3    116  chr1   1693001   1739032   -4.8403   473
6    133  chr1   1750780   1880930   -4.8096   469
您可以尝试使用Genomic Ranges软件包

这里我们加载示例输入数据。这是一种不雅观的方式-我知道。。。但我很懒,多行编辑的效果很好。注意:我不知道1列的意思是什么,但我把它保存在了数据中

ranges <-
  rbind(
    c("2","12","chr1","836780","856723","-5.7648","599"),
    c("3","116","chr1","1693001","1739032","-4.8403","473"),
    c("4","117","chr1","1750780","1880930","-5.3036","536"),
    c("5","121","chr1","2020123","2108890","-4.4165","415")
  ) %>% 
  as.data.frame()
colnames(ranges) <-
  c("1","bin","chrom","chromStart","chromEnd","name","score")

viable <-
  rbind(
    c("chr1","840000","890000","1566"),
    c("chr1","1690000","1740000","1566"),
    c("chr1","1700000","1750000","1566"),
    c("chr1","1710000","1760000","1566"),
    c("chr1","1720000","1770000","1566"),
    c("chr1","1730000","1780000","1566"),
    c("chr1","1740000","1790000","1566"),
    c("chr1","1750000","1800000","1566"),
    c("chr1","1760000","1810000","1566")
  ) %>%
  as.data.frame()
colnames(viable) <-
  c("chrom","chromStart","chromEnd","N")

## Need columns to be integers
ranges <-
  ranges %>%
  tbl_df() %>%
  mutate(
    chromStart = chromStart %>% as.character %>% as.integer,
    chromEnd = chromEnd %>% as.character %>% as.integer
    )
viable <-
  viable %>%
  tbl_df() %>%
  mutate(
    chromStart = chromStart %>% as.character %>% as.integer,
    chromEnd = chromEnd %>% as.character %>% as.integer
    )

data.table::foverlaps或package-irange为什么双重过帐?答案已经给出。不同的数字并不意味着这是一个不同的问题。
1    bin chrom chromStart  chromEnd    name score
2     12  chr1   840000    856723    -5.7648   599
3    116  chr1   1693001   1739032   -4.8403   473
6    133  chr1   1750780   1880930   -4.8096   469
library(dplyr)
library(GenomicRanges)
ranges <-
  rbind(
    c("2","12","chr1","836780","856723","-5.7648","599"),
    c("3","116","chr1","1693001","1739032","-4.8403","473"),
    c("4","117","chr1","1750780","1880930","-5.3036","536"),
    c("5","121","chr1","2020123","2108890","-4.4165","415")
  ) %>% 
  as.data.frame()
colnames(ranges) <-
  c("1","bin","chrom","chromStart","chromEnd","name","score")

viable <-
  rbind(
    c("chr1","840000","890000","1566"),
    c("chr1","1690000","1740000","1566"),
    c("chr1","1700000","1750000","1566"),
    c("chr1","1710000","1760000","1566"),
    c("chr1","1720000","1770000","1566"),
    c("chr1","1730000","1780000","1566"),
    c("chr1","1740000","1790000","1566"),
    c("chr1","1750000","1800000","1566"),
    c("chr1","1760000","1810000","1566")
  ) %>%
  as.data.frame()
colnames(viable) <-
  c("chrom","chromStart","chromEnd","N")

## Need columns to be integers
ranges <-
  ranges %>%
  tbl_df() %>%
  mutate(
    chromStart = chromStart %>% as.character %>% as.integer,
    chromEnd = chromEnd %>% as.character %>% as.integer
    )
viable <-
  viable %>%
  tbl_df() %>%
  mutate(
    chromStart = chromStart %>% as.character %>% as.integer,
    chromEnd = chromEnd %>% as.character %>% as.integer
    )
gr.ranges <-
  makeGRangesFromDataFrame(ranges,
                           keep.extra.columns = T,
                           seqnames.field = "chrom",
                           start.field = "chromStart",
                           end.field = "chromEnd")
gr.viable <-
  makeGRangesFromDataFrame(viable,
                           keep.extra.columns = T,
                           seqnames.field = "chrom",
                           start.field = "chromStart",
                           end.field = "chromEnd")

# To find the intersects
gr.intersect <-
  GenomicRanges::intersect(gr.ranges, gr.viable)

# For linking up the non- chrom,start,end columns
gr.hits <-
  GenomicRanges::findOverlaps(gr.intersect, gr.ranges)

output <-
  gr.intersect[queryHits(gr.hits)]
mcols(output) <-
  mcols(gr.ranges[subjectHits(gr.hits)])
output

# Reformat to dataframe
output %>%
  as.data.frame() %>%
  select(`1` = X1, bin, chrom = seqnames, chromStart = start, chromEnd = end, name, score)