
R中基于多列的有条件行删除,r,dataframe,R,Dataframe,我有一个4列500多行的数据框。我希望根据多列有条件地从数据框中删除行

df.original

chr start end type 
1 chrI 232613 232625 ins 
2 chrI 834151 834151 snp 
3 chrI 834161 834161 snp 
4 chrI 834171 834177 del 
5 chrI 1123752 1123805 del 
6 chrI 1377649 1377649 snp


   chr   start    end   type     
1 chrI  232613  232625  ins  
2 chrI  834151  834151  snp  
3 chrI  834161  834161  snp  
4 chrI  834171  834177  del 
5 chrI 1123752 1123805  del 
6 chrI 1377649 1377649  snp 

# A tibble: 4 x 4
  chr     start     end type 
  <chr>   <int>   <int> <chr>
1 chrI   232613  232625 ins  
2 chrI   834171  834177 del  
3 chrI  1123752 1123805 del  
4 chrI  1377649 1377649 snp  



#Turn your data.frame into S4 object IRanges
IR <- IRanges(
  start = c(232613, 834151, 834161, 834171, 1123752, 1377649),
  end = c(232625, 834151, 834161, 834177, 1123805, 1377649),
  type = c("ins", "snp", "snp", "del", "del", "snp")

>IRanges object with 6 ranges and 1 metadata column:
          start       end     width |        type
      <integer> <integer> <integer> | <character>
  [1]    232613    232625        13 |         ins
  [2]    834151    834151         1 |         snp
  [3]    834161    834161         1 |         snp
  [4]    834171    834177         7 |         del
  [5]   1123752   1123805        54 |         del
  [6]   1377649   1377649         1 |         snp
types <- mcols(IR)$type %>% unique()

#A loop (less than ideal) to make each 'type' a element of a list
list.IR <- list()
for(i in 1:length(types)){
  list.IR[i] <- IR[mcols(IR)$type == types[i]]
#create a function that removes IRanges with more than one overlap (ie, other than itself)
ovlp_rm <- function(IR){
  IR.flank <- flank(IR, width = 25, both = T)
  n_ovlp <- countOverlaps(IR.flank)
  indx_no.ovlp <- n_ovlp == 1
#apply the function on your list of IRanges, organized by type
lapply(list.IR, FUN = ovlp_rm) 

> lapply(list.IR, ovlp_rm)
IRanges object with 1 range and 1 metadata column:
          start       end     width |        type
      <integer> <integer> <integer> | <character>
  [1]    232613    232625        13 |         ins

IRanges object with 1 range and 1 metadata column:
          start       end     width |        type
      <integer> <integer> <integer> | <character>
  [1]   1377649   1377649         1 |         snp

IRanges object with 2 ranges and 1 metadata column:
          start       end     width |        type
      <integer> <integer> <integer> | <character>
  [1]    834171    834177         7 |         del
  [2]   1123752   1123805        54 |         del