使用Perl确定不重叠的位置_Perl_Combinatorics

使用Perl确定不重叠的位置

perl

使用Perl确定不重叠的位置,perl,combinatorics,Perl,Combinatorics,我有一个位置集合——这里是数据结构的一个示例 my $locations = { loc_1 => { start => 1, end => 193, }, loc_2 => { start => 180, end => 407, }, loc_3 => { start => 329, end => 684, }, loc_4 =>

我有一个位置集合——这里是数据结构的一个示例

my $locations =
{
  loc_1 =>
  {
    start => 1,
    end   => 193,
  },
  loc_2 =>
  {
    start => 180,
    end   => 407,
  },
  loc_3 =>
  {
    start => 329,
    end   => 684,
  },
  loc_4 =>
  {
    start => 651,
    end   => 720,
  },
};

确定每个可能的非重叠位置组合的最佳方法是什么？本例的答案如下所示。请记住，可能有一个或多个位置，这些位置可能重叠，也可能不重叠

my $non_overlapping_locations =
[
  {
    loc_1 =>
    {
      start => 1,
      end   => 193,
    },
    loc_3 =>
    {
      start => 329,
      end   => 684,
    },
  },
  {
    loc_1 =>
    {
      start => 1,
      end   => 193,
    },
    loc_4 =>
    {
      start => 651,
      end   => 720,
    },
  },
  {
    loc_2 =>
    {
      start => 180,
      end   => 407,
    },
    loc_4 =>
    {
      start => 651,
      end   => 720,
    },
  }
];

更新：

ysth

的回答帮助我看到了自己措辞中的一个缺陷。我想我对//所有可能的//非重叠位置的组合不感兴趣，我只对不是其他解决方案子集的解决方案感兴趣。

我不是一个CS人，所以我对所有最好的算法都不感兴趣，但我想知道是否有比以下更好的方法：

my @location_keys = keys %{$locations};
while (my $key_for_checking = (shift @location_keys) {
    foreach my $key_to_compare (@location_keys) {
        if ( do_not_overlap($locations->{$key_for_checking}, 
                            $locations->{$key_to_compare} ) {
            add_to_output($key_for_checking, $key_to_compare);
        }
    }
}

使用

不重叠和将\u添加到输出中
定义合适的
如果您想检查重叠。。。这很简单。
在下列情况下，A和B不重叠：
( (A->start < B->start) && (A->end < B->start) ) ||
( (A->start > B->end)   && (A->end > B->end) )

（（A->startstart）和&（A->endstart））||
（（A->start>B->end）&&（A->end>B->end））

可能需要根据共享边界是否构成重叠进行调整。此外，如果您知道A和B是否以某种方式（按开始或结束）排序，则可以简化此过程。
首先，我将收集所有单独的点（每个位置的开始和结束），对它们进行排序并将它们保存在列表中。在您的情况下，这将是：
1,180,193,329,407,651,684,720. 

对于该列表中的每个间隔，找出重叠的段数。在您的情况下，这将是：
1, 180 -> 1
180, 193 -> 2
193, 329 -> 1
329, 407 -> 2
407, 651 -> 1
651, 684 -> 2
684, 720 -> 1

并在大于1的线段处循环（在本例中为3个）。因此，案例总数为2 x 2 x 2=8个解决方案（在一个解决方案中，只能选择一个分段加工多个区间）
我们找到了2，2，2（或2，3，4）。将它们放在一个数组中，从最后一个开始。减小它直到达到0。当达到1时，减小上一个数字，并将第一个数字设置为初始值减去1
假设我们已经对初始段进行了编号：（在本例中为1,2,3,4,5,6
）。重叠段将包含以下段[1,2]、[2,3]、[3,4]。我们有3个重叠的部分。现在，我们开始选择/消除的递归过程：
在每个步骤中，我们都会看到一个具有多个段的重叠段。我们重复选择，对于每个选择，我们做两件事：从每个后续重叠段中删除我们现在没有选择的段，并在每个后续重叠段中强制选择当前段，这些重叠段可能有此选择。每个成为非重叠段的段都将被视为新的选择。搜索下一个多项选择并递归。一旦我们找不到选择，我们就有了部分解决方案。我们需要添加不涉及任何重叠的部分。打印出来
在这种情况下，它将如下所示：第一步：
we are here [1,2], [2,3], [3,4]:
  chose 1 -> // eliminate 2 from rest and force 1 (3 is a single choice so we do the same)
      [1], [3], [3] -> [1, 3] solution 
  chose 2 -> // eliminate 1 from the rest and force 2 (2 single choice so we do the same). 
      [2], [2], [4] -> [2, 4] solution

这应该能正常工作
现在是实现这一点的代码（我认为这不是最漂亮的perl代码，但我真的不是perl高手）：
#/bin/perl
严格使用；
使用警告；
使用5.010；
使用数据：：转储程序；
我的$locs={
loc_1=>{
开始=>1，
结束=>193，
},
loc_2=>{
开始=>180，
结束=>407，
},
loc_3=>{
开始=>329，
结束=>684，
},
loc_4=>{
开始=>651，
结束=>720，
}
};
我的（%开始，%结束）；
地图{
my（$start，$end）=（$locs->{$}->{start}，$locs->{$}->{end}）；
推送{$starts{$start}，$\；
推送{$ends{$end}，$\；
}钥匙%$LOC；
我的@overlaps，我的%tmp；
地图{
map{$tmp{$}=1}{$starts{$}}；
映射{delete$tmp{$}}{$ends{$}}；
my@segs=键%tmp；
如果1<@segs，则按@overlaps，\@segs
}排序（键%开始，键%结束）；
子解析非重叠{
my（$array，$pos）=（$\u0]，$\u1]）；
my@node=@{$array->[$pos]}；
foreach my$值（@node）{
my@work=map{[@$\]}@$数组；
$work[$pos]=[$value]；
my（$removed，$forced）=（{}，{$value=>1}）；
映射{$removed->{$\}=1，如果$\$ne$value}@node；
我的（$i，$new_pos）=（0，-1）；
对于（$i=$pos+1；$i{$}）}@$；
如果（$#$==0）{$forced->{@$[0]}=1}
#施力
my@tmp=grep{defined$forced->{$\u}}@$；
如果（$#tmp==0）{
如果$tmp[0]ne$\}@$，则映射{$removed->{$\}=1；
@$u=@tmp；
}
如果（$#$0&&$新位置==-1）{
$new_pos=$i；
}
$work[$i]=$\；
}
如果（$new_pos！=-1）{
解析非重叠（\@work，$new\u pos）；
}否则{
打印转储程序\@工作
#@work的部分解minux完全不重叠段。
}
}
}    
解析非重叠（\@重叠，0）；
（现实生活中的闯入-道歉，我会写一个解释-然后去看那些空的数组引用，尽管这很琐碎-稍后！）
#/usr/bin/perl
严格使用；
使用警告；
使用5.010；
使用列表：：MoreUtils qw（任何）；
使用数据：：转储程序；
我的$locations={
loc_1=>{
开始=>1，
结束=>193，
},
loc_2=>{
开始=>180，
结束=>407，
},
loc_3=>{
开始=>329，
结束=>684，
},
loc_4=>{
开始=>651，
结束=>720，
},
};
我的@keys=keys%$位置；
我的%决赛；
对于我的$key（@keys）{
按@{$final{$key}}，映射{
如果（$locations->{$key}->{start}>=$locations->{$}->{start}
&&$locations->{$key}->{start}{$}->{end}
或$locations->{$key}->{end}>=$locations->{$}->{start}
&&$locations->{$key}->{end}{${}->{end}）
{
();
}
否则{
my$return=[sort$key，$\n]；
if（任意{$return~~$}{$final{$}}，@{$final{$key}）{
();
}
else{$return；}
}
}grep{$\une$key}keys%$位置；
}
说
#!/bin/perl

use strict;
use warnings;
use 5.010;
use Data::Dumper;

my $locs = {
  loc_1 => {
    start => 1,
    end   => 193,
  },
  loc_2 => {
    start => 180,
    end   => 407,
  },
  loc_3 => {
    start => 329,
    end   => 684,
  },
  loc_4 => {
            start => 651,
    end   => 720,
  }
};

my (%starts, %ends);
map {
        my ($start, $end) = ($locs->{$_}->{start}, $locs->{$_}->{end});

        push @{ $starts{$start} }, $_;
        push @{ $ends{$end} }, $_;
} keys %$locs;

my @overlaps, my %tmp;

map {
        map { $tmp{$_} = 1 } @{$starts{$_}};
        map { delete $tmp{$_} } @{$ends{$_}};

        my @segs = keys %tmp;
        push @overlaps, \@segs if 1 < @segs
} sort (keys %starts, keys %ends);

sub parse_non_overlapping {
  my ($array,$pos)=($_[0], $_[1]);

  my @node = @{$array->[$pos]};
  foreach my $value ( @node ) {

    my @work = map { [@$_] } @$array;
    $work[$pos] = [ $value ];

    my ($removed, $forced) = ( {}, {$value => 1});
    map { $removed->{$_} = 1 if $_ ne $value } @node;

    my ($i, $new_pos) = (0, -1);
    for ( $i = $pos + 1; $i <= $#work; $i++ ) {
        $_ = $work[$i];

        #apply map
        @$_ = grep { not defined($removed->{$_}) } @$_;
        if ( $#$_ == 0 ) { $forced->{@$_[0]} = 1 }

        #apply force
            my @tmp = grep { defined $forced->{$_} } @$_;
        if ( $#tmp == 0 ) {
             map { $removed->{$_} = 1 if $tmp[0] ne $_ } @$_;
             @$_ = @tmp;
        }

        if ( $#$_ > 0 && $new_pos == -1 ) {
                $new_pos = $i;
        }

        $work[$i] = $_;
    }

    if ( $new_pos != -1 ) {
      parse_non_overlapping(\@work, $new_pos);
    } else {
      print Dumper \@work
       # @work has the partial solution minux completely non overlapping segments.
    }
  }
}    

parse_non_overlapping(\@overlaps, 0);

#! /usr/bin/perl
use strict;
use warnings;
use 5.010;
use List::MoreUtils qw(any);
use Data::Dumper;

my $locations = {
    loc_1 => {
        start => 1,
        end   => 193,
    },
    loc_2 => {
        start => 180,
        end   => 407,
    },
    loc_3 => {
        start => 329,
        end   => 684,
    },
    loc_4 => {
        start => 651,
        end   => 720,
    },
};

my @keys = keys %$locations;

my %final;

for my $key (@keys) {
    push @{ $final{$key} }, map {
        if (   $locations->{$key}->{start} >= $locations->{$_}->{start}
            && $locations->{$key}->{start} <= $locations->{$_}->{end}
            or $locations->{$key}->{end} >= $locations->{$_}->{start}
            && $locations->{$key}->{end} <= $locations->{$_}->{end} )
        {
            ();
        }
        else {
            my $return = [ sort $key, $_ ];
            if ( any { $return ~~ $_ } @{ $final{$_} }, @{ $final{$key} } ) {
                ();
            }
            else { $return; }
        }
    } grep { $_ ne $key } keys %$locations;
}

say Dumper \%final;

use strict;
use warnings;

my $locations =
{
  loc_1 =>
  {
    start => 1,
    end   => 193,
  },
  loc_2 =>
  {
    start => 180,
    end   => 407,
  },
  loc_3 =>
  {
    start => 329,
    end   => 684,
  },
  loc_4 =>
  {
    start => 651,
    end   => 720,
  },
};
my $non_overlapping_locations = [];
my @locations = sort keys %$locations;

get_location_combinations( $locations, $non_overlapping_locations, [], @locations );

use Data::Dumper;
print Data::Dumper::Dumper($non_overlapping_locations);

sub get_location_combinations {
    my ($locations, $results, $current, @remaining) = @_;

    if ( ! @remaining ) {
        if ( not_a_subset_combination( $results, $current ) ) {
            push @$results, $current;
        }
    }
    else {
        my $next = shift @remaining;
        if (can_add_location( $locations, $current, $next )) {
            get_location_combinations( $locations, $results, [ @$current, $next ], @remaining );
        }
        get_location_combinations( $locations, $results, [ @$current ], @remaining );
    }
}

sub can_add_location {
    my ($locations, $current, $candidate) = @_;

    # not clear if == is an overlap; modify to use >=  and <= if so.
    0 == grep $locations->{$candidate}{end} > $locations->{$_}{start} && $locations->{$candidate}{start} < $locations->{$_}{end}, @$current;
}

sub not_a_subset_combination {
    my ($combinations, $candidate) = @_;

    for my $existing (@$combinations) {
        my %candidate;
        @candidate{@$candidate} = ();
        delete @candidate{@$existing};
        if ( 0 == keys %candidate ) {
            return 0;
        }
    }
    return 1;
}