Perl 合并列表的列表

Perl 合并列表的列表,perl,Perl,我有一个列表,类似这样: [ [ 1, 2, 3 ], [ 20, 30, 40, 50 ], [ 11, 15, 17 ], [ 20, 22, 25, 27 ], [ 1, 5, 10 ], [ 1, 100 ] ] 我想合并一个列表中的任何元素与另一个列表中的任何元素匹配的内部列表。这还需要能够处理多个重叠(因此在上面的示例中,列表中的3个将合并为一个)。因此,在这种情况下,结果如下: [ [ 1, 2, 3, 5, 10, 100

我有一个列表,类似这样:

[
    [ 1, 2, 3 ],
    [ 20, 30, 40, 50 ],
    [ 11, 15, 17 ],
    [ 20, 22, 25, 27 ],
    [ 1, 5, 10 ],
    [ 1, 100 ]
]
我想合并一个列表中的任何元素与另一个列表中的任何元素匹配的内部列表。这还需要能够处理多个重叠(因此在上面的示例中,列表中的3个将合并为一个)。因此,在这种情况下,结果如下:

[
    [ 1, 2, 3, 5, 10, 100 ],     # 3 lists have been merged into one
    [ 11, 15, 17 ],              # Untouched due to no overlap
    [ 20, 22, 25, 30, 40, 50 ],  # 2 lists merged
]

是否有明显的算法或Perl模块可供使用?

只有子数组的顺序不同

use strict;
use warnings;

sub merge {
  my ($arr) = @_;

  my $i = 0;
  while ($i < $#$arr) {
    my $current = $arr->[$i];
    my %h;
    @h{@$current} = ();
    my @ovlap = grep { 
      grep exists $h{$_}, @{$arr->[$_]} 

    } ($i+1) .. $#$arr;

    my %seen;
    @$current = 
      sort {$a <=> $b}
      grep !$seen{$_}++,
      (@$current, map @$_, @$arr[@ovlap]);

    @$arr[@ovlap] = ();
    @$arr = grep defined, @$arr;

    $i++;
  }
  return $arr;
}

my $arr = [
    [ 1, 2, 3 ],
    [ 20, 30, 40, 50 ],
    [ 11, 15, 17 ],
    [ 20, 22, 25, 27 ],
    [ 1, 5, 10 ],
    [ 1, 100 ],
];
merge(merge($arr));
use Data::Dumper; print Dumper $arr;

我使用散列来找到解决方案。如果列表中可能包含重复的成员,此解决方案会将其数目减少到1

%shared表示哪些列表共享哪些数字。然后,当存在共享编号时,合并列表(即,更改%shared中的信息)。一旦没有共享号码,就可以从散列中创建列表

#!/usr/bin/perl
use warnings;
use strict;

use Data::Dumper;

my @lists = (
             [ 1, 5, 10 ],
             [ 10, 15, 17 ],
             [ 20, 22, 25, 27 ],
             [ 20, 30, 40, 50 ],
             [ 1, 2, 3 ],
             [ 1, 100 ],
            );

my %shared;
for my $i (0 .. $#lists) {
    undef $shared{$_}{$i} for @{ $lists[$i] };
}

while (my ($num) = grep 1 < keys %{ $shared{$_} }, keys %shared) {
    my @to_merge = keys %{ $shared{$num} };
    my $list = shift @to_merge;
    for my $merge (@to_merge) {
        print "Merging list $merge to $list\n";
        for my $h (values %shared) {
            if (exists $h->{$merge}) {
                delete $h->{$merge};
                undef $h->{$list};
            }
        }
    }
}

my %left;
undef $left{ (keys %{ $shared{$_} })[0] }{$_} for keys %shared;
my @result = map [ keys %$_ ], values %left;

print Dumper \@result;
#/usr/bin/perl
使用警告;
严格使用;
使用数据::转储程序;
我的@lists=(
[ 1, 5, 10 ],
[ 10, 15, 17 ],
[ 20, 22, 25, 27 ],
[ 20, 30, 40, 50 ],
[ 1, 2, 3 ],
[ 1, 100 ],
);
我的%shared;
对于我的$i(0..$#列表){
为@{$lists[$i]}取消定义$shared{${$i};
}
while(my($num)=grep 1{$merge}){
删除$h->{$merge};
undef$h->{$list};
}
}
}
}
我的%左;
未定义$left{(键%{$shared{$}})[0]}{$}用于键%shared;
my@result=map[keys%$\ux],值%左;
打印转储程序\@结果;

我的两便士。使用简单的循环和缓存来存储结果列表中每个元素的位置,以避免每次搜索

#! /usr/bin/perl

use warnings;
use strict;
use Data::Dumper;

my $lol = [
    [ 1, 2, 3 ],
    [ 20, 30, 40, 50 ],
    [ 11, 15, 17 ],
    [ 20, 22, 25, 27 ],
    [ 1, 5, 10 ],
    [ 1, 100 ]
    ];

my @results = ();
my %resultCache;

sub elementInResults {
    my ($element) = $_[0];

    # return value for cache, or search if not in cache
    if (!defined $resultCache{$element} ) {
        # search for target in destination arrays
        for (my $destIndex = 0; $destIndex < @results;  $destIndex++) {
            if (grep (/$element/, @{$results[$destIndex]}) > 0 ) {
                $resultCache{$element} = $destIndex;
                last;
            }
        }
    }

    return $resultCache{$element};
}

my $srcCount=0;
# loop through  source arrays
for my $srcList (@$lol) {
    my $destIndex ;
    # loop through elements of array
    for (my $srcElementIndex=0; $srcElementIndex < @$srcList; $srcElementIndex++) {
        $destIndex = elementInResults($srcList->[$srcElementIndex]);
        if (defined $destIndex ) {
            # element exists in an existing result array so merge
            print "Merging source array $srcCount into result array $destIndex, match on:" . $srcList->[$srcElementIndex] . "\n";
            # remove the duplicate element from src list first
            splice(@$srcList,$srcElementIndex,1);
            # then merge into dest list
            push (@{$results[$destIndex]}, @$srcList);
            last;
        }
        $srcElementIndex++;
    }

    # if no elements in list found in existing results add list as new one to results
    push (@results, $srcList) if (!defined $destIndex ) ;

    $srcCount++;
}

map {@$_ = sort ({$a <=> $b} @$_)} @results;

print Dumper \@results;
#/usr/bin/perl
使用警告;
严格使用;
使用数据::转储程序;
我的$lol=[
[ 1, 2, 3 ],
[ 20, 30, 40, 50 ],
[ 11, 15, 17 ],
[ 20, 22, 25, 27 ],
[ 1, 5, 10 ],
[ 1, 100 ]
];
我的@results=();
我的%resultCache;
子元素结果{
我的($element)=$\u0;
#返回缓存的值,如果不在缓存中,则进行搜索
if(!defined$resultCache{$element}){
#在目标阵列中搜索目标
对于(我的$destIndex=0;$destIndex<@results;$destIndex++){
如果(grep(/$element/,@{$results[$destIndex]})>0){
$resultCache{$element}=$destIndex;
最后;
}
}
}
返回$resultCache{$element};
}
我的$srcCount=0;
#循环源阵列
对于我的$srcList(@$lol){
我的$destIndex;
#循环遍历数组的元素
对于(我的$srcElementIndex=0;$srcElementIndex<@$srcList;$srcElementIndex++){
$destIndex=elementInResults($srcList->[$srcElementIndex]);
如果(定义为$destIndex){
#元素存在于现有结果数组中,因此合并
打印“正在将源数组$srcCount合并到结果数组$destIndex中,匹配时间:“..$srcList->[$srcElementIndex]”“\n”;
#首先从src列表中删除重复的元素
拼接(@$srcList,$srcElementIndex,1);
#然后合并到dest列表中
推送(@{$results[$destIndex]},@$srcList);
最后;
}
$srcElementIndex++;
}
#若在现有结果中找不到列表中的元素,则将列表作为新元素添加到结果中
如果(!defined$destIndex),则推送(@results,$srcList);
$srcCount++;
}
映射{@$\=sort({$a$b}@$\}@results;
打印转储程序\@结果;

以下是一个单程解决方案,它使用数组引用实现了一点魔力

有关by和其他解决方案,请查看perlmonks:


我不明白你为什么决定不将
[1,2,3,5,10100]
[10,15,17]
合并。它们都包含10,这似乎正是它们应该合并的规则。27也不见了。因为我是个白痴-我会编辑以消除重叠-抱歉!你不能只通过一次;e、 g.未能在
[1]、[2,3]、[1,3]
@ysth上将所有内容合并为一个,那么
合并(合并($arr))如何?我认为您需要
执行{$count=@$arr;merge($arr)}直到$count==@$arr
@mpapec你可能会喜欢这个解决方案,我肯定他会喜欢,但现在我给他一个+1,因为我也喜欢!聪明-现在,如果你能推广它做密切匹配。。。(如5对4-6);)
#! /usr/bin/perl

use warnings;
use strict;
use Data::Dumper;

my $lol = [
    [ 1, 2, 3 ],
    [ 20, 30, 40, 50 ],
    [ 11, 15, 17 ],
    [ 20, 22, 25, 27 ],
    [ 1, 5, 10 ],
    [ 1, 100 ]
    ];

my @results = ();
my %resultCache;

sub elementInResults {
    my ($element) = $_[0];

    # return value for cache, or search if not in cache
    if (!defined $resultCache{$element} ) {
        # search for target in destination arrays
        for (my $destIndex = 0; $destIndex < @results;  $destIndex++) {
            if (grep (/$element/, @{$results[$destIndex]}) > 0 ) {
                $resultCache{$element} = $destIndex;
                last;
            }
        }
    }

    return $resultCache{$element};
}

my $srcCount=0;
# loop through  source arrays
for my $srcList (@$lol) {
    my $destIndex ;
    # loop through elements of array
    for (my $srcElementIndex=0; $srcElementIndex < @$srcList; $srcElementIndex++) {
        $destIndex = elementInResults($srcList->[$srcElementIndex]);
        if (defined $destIndex ) {
            # element exists in an existing result array so merge
            print "Merging source array $srcCount into result array $destIndex, match on:" . $srcList->[$srcElementIndex] . "\n";
            # remove the duplicate element from src list first
            splice(@$srcList,$srcElementIndex,1);
            # then merge into dest list
            push (@{$results[$destIndex]}, @$srcList);
            last;
        }
        $srcElementIndex++;
    }

    # if no elements in list found in existing results add list as new one to results
    push (@results, $srcList) if (!defined $destIndex ) ;

    $srcCount++;
}

map {@$_ = sort ({$a <=> $b} @$_)} @results;

print Dumper \@results;
use strict;
use warnings;

use List::MoreUtils qw(uniq);

my @data = map {[split]} <DATA>;

my %group = ();
for my $array (@data) {
    my @values = map {@$_} uniq map {$group{$_} || [$_]} @$array;
    @group{@values} = (\@values) x @values;
}
@data = uniq values %group;

# Resort to make things pretty
@$_ = sort {$a <=> $b} @$_ for @data;

use Data::Dump;
dd @data;

__DATA__
1 2 3
20 30 40 50
11 15 17
20 22 25 27
1 5 10
1 100
(
  [20, 22, 25, 27, 30, 40, 50],
  [11, 15, 17],
  [1, 2, 3, 5, 10, 100],
)