Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/perl/9.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Regex 如果文件中项目的顺序发生更改,则正则表达式不匹配_Regex_Perl - Fatal编程技术网

Regex 如果文件中项目的顺序发生更改,则正则表达式不匹配

Regex 如果文件中项目的顺序发生更改,则正则表达式不匹配,regex,perl,Regex,Perl,这是我第一次尝试Perl,所以我知道这段代码很难看。有些是因为不知道我在做什么,有些是因为解决各种问题。我试图做的是在一个文件(samplefile.txt)中搜索各种信息(9 parse_updates函数),除非顺序发生变化,否则效果很好。例如,如果一个示例文件在僵尸网络定义之前有证书包,那么它将无法找到证书包信息。 我希望每个函数都开始搜索samplefile“fresh”,但事实似乎并非如此,我也不知道为什么。 不包括一个示例文件,因为代码已经足够长了,我认为问题在于我的函数逻辑 us

这是我第一次尝试Perl,所以我知道这段代码很难看。有些是因为不知道我在做什么,有些是因为解决各种问题。我试图做的是在一个文件(samplefile.txt)中搜索各种信息(9 parse_updates函数),除非顺序发生变化,否则效果很好。例如,如果一个示例文件在僵尸网络定义之前有证书包,那么它将无法找到证书包信息。 我希望每个函数都开始搜索samplefile“fresh”,但事实似乎并非如此,我也不知道为什么。 不包括一个示例文件,因为代码已经足够长了,我认为问题在于我的函数逻辑


use strict;
use warnings;
use diagnostics;
use File::Slurp;
my @autoupdate;
my $autoupdate;

my $av_regex;
my @av_updates;

my $avdev_regex;
my @avdef_updates;

my $ipsatt_regex;
my @ipsatt_updates;

my $attdef_regex;
my @attdef_updates;


my $ipsmal_regex;
my @ipsmal_updates;

my $flowav_regex;
my @flowav_updates;

my $botnet_regex;
my @botnet_updates;

my $appdef_regex;
my @appdef_updates;

my $ipgeo_regex;
my @ipgeo_updates;

my $certbun_regex;
my @certbun_updates;

my $str1;
my $str2;
my $str3;
my $str4;
my $str5;
my $str6;
my $str7;
my $str8;
my $str9;


 
parse_updates1(); #AV Engine
parse_updates2(); #Virus Defs
parse_updates3(); #IPS Attack Engine
parse_updates4(); #Attack Defs
parse_updates5(); #IPS Mal URL DB
parse_updates6(); #Flow virus Defs
parse_updates7(); #Botnet Defs
parse_updates8(); #IP Geo DB
parse_updates9(); #Cert Bundle


sub parse_updates1{
print "\nTHIS IS AV Engine Section!!\n\n";
read_file('samplefile.txt', buf_ref => \$str1);


my $av_regex =qr/(AV Engine)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;
if ( $str1 =~ /$av_regex/g ) {
  #putting each regex group into the array
  push @av_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds
  chomp @av_updates;

     print "$_\n" for @av_updates;

}
else {
  print "\n\nGot Nothing!\n\n";
  @av_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @av_updates;

}
}
sub parse_updates2{
read_file('samplefile.txt', buf_ref => \$str2);

print "\nTHIS IS Virus Definitions Section!!\n\n";

my $avdef_regex =qr/(Application Definitions)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str2 =~ /$avdef_regex/g ) {
  #putting each regex group into the array
  push @avdef_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @avdef_updates;
 
print "$_\n" for @avdef_updates;

}
else {
  print "\n\nGot Nothing!\n\n";
  @avdef_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @avdef_updates;


}
}
sub parse_updates3{
read_file('samplefile.txt', buf_ref => \$str3);

 print "\nTHIS IS IPS Attack Engine Section!!\n\n";

my $ipsatt_regex =qr/(IPS Attack Engine)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str3 =~ /$ipsatt_regex/g ) {
  #putting each regex group into the array
  push @ipsatt_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @ipsatt_updates;
 

print "$_\n" for @ipsatt_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @ipsatt_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @ipsatt_updates;

}
}
sub parse_updates4{
read_file('samplefile.txt', buf_ref => \$str4);

 print "\nTHIS IS Attack Definitions Section!!\n\n";

my $attdef_regex =qr/(Attack Definitions)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str4 =~ /$attdef_regex/g ) {
  #putting each regex group into the array
  push @attdef_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @attdef_updates;
 

print "$_\n" for @attdef_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @attdef_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @attdef_updates;

}
}
sub parse_updates5{
read_file('samplefile.txt', buf_ref => \$str5);

print "\nTHIS IS IPS Malicious URL Database Section!!\n\n";

my $ipsmal_regex =qr/(IPS Malicious URL Database)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str5 =~ /$ipsmal_regex/g ) {
  #putting each regex group into the array
  push @ipsmal_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @ipsmal_updates;
 

print "$_\n" for @ipsmal_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @ipsatt_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @ipsatt_updates;

}
}
sub parse_updates6{
read_file('samplefile.txt', buf_ref => \$str6);

print "\nTHIS IS Flow-Based Virus Definitions Section!!\n\n";

my $flowav_regex =qr/(Flow-based Virus Definitions)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str6 =~ /$flowav_regex/g ) {
  #putting each regex group into the array
  push @flowav_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @flowav_updates;
 

print "$_\n" for @flowav_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @flowav_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @flowav_updates;

}
}
sub parse_updates7{
read_file('samplefile.txt', buf_ref => \$str7);

print "\nTHIS IS Botnet Definitions Section!!\n\n";

my $botnet_regex =qr/(Botnet Definitions)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str7 =~ /$botnet_regex/g ) {
  #putting each regex group into the array
  push @botnet_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @botnet_updates;
 

print "$_\n" for @botnet_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @botnet_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @botnet_updates;

}
}
sub parse_updates8{
read_file('samplefile.txt', buf_ref => \$str8);


print "\nTHIS IS IP geography DB Section!!\n\n";

my $ipgeo_regex =qr/(IP Geography DB)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str8 =~ /$ipgeo_regex/g ) {
  #putting each regex group into the array
  push @ipgeo_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @ipgeo_updates;
 

print "$_\n" for @ipgeo_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @ipgeo_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @ipgeo_updates;

}
}
sub parse_updates9{
read_file('samplefile.txt', buf_ref => \$str9);


print "\nTHIS IS Certificate Bundle Section!!\n\n";

my $certbun_regex =qr/(Certificate Bundle)(.*\n)*?(Version:)(.*\n)*?(Contract Expiry Date:)(.*\n)*?(Last Updated using )(.*\n)*?(Last Update Attempt: )(.*\n)*?(Result: )(.*\n).*/p;

if ( $str9 =~ /$certbun_regex/g ) {
  #putting each regex group into the array
  push @certbun_updates, $1, $2, $3 ,$4, $5, $6, $7, $8, $9, $10, $11, $12;
  #Removing new linefeeds 
  chomp @certbun_updates;
 

print "$_\n" for @certbun_updates;
}
else {
  print "\n\nGot Nothing!\n\n";
  @certbun_updates = qw(notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound notfound);
    print "$_\n" for @certbun_updates;

}


# End of sub parse_updates
}








即使在没有看到一些数据的情况下,这个问题也无法得到明确的回答,但我还是想先重新编写这个程序。这也可以解决问题

所有这些功能都是没有理由的;他们都做同样的事。也不需要变量之海;哈希对于命名事物的集合很有用。我至少保留了一些原始选择,如总体流程、使用
File::Slurp

use warnings;
use strict;
use feature 'say';    

use Data::Dump qw(dd);
use File::Slurp;

my $fname = shift // die "Usage: $0 file\n";   #/

my %update = (
    av => { 
        re => qr/pattern-for-av/,
        name => q(AV Engine Section),
    },
    avdev => { 
        re => qr/pattern-for-avdev/, 
        name => q(Virus Definitions Section),
    },
    # ...
);

my $file_content = read_file($fname);

foreach my $code (sort keys %update) {
    say "This is $update{$code}{name}";
    my $captures = parse_update( $file_content, $update{$code}{re} );
    $update{$code}{captures} = $captures;
}    
dd \%update;

sub parse_update {
    my ($file_content, $re) = @_;

    my @captures = $file_content =~ /$re/;  
    if (not @captures) {
        say "Got nohting!";
        @captures = ( 'notfound' ) x 12;  # apparently exactly 12
    }
    else { chomp @captures }

    say for @captures;  

    return \@captures;
}
正则表达式模式和节名都在哈希
%update
中,然后添加结果(捕获)。这种数据组织的选择有点随意,因为我不知道上下文

文件打开一次,其所有内容重复复制到子文件。如果需要,请进行调整。例如,如果文件很大,还有其他方法可以让sub使用这些数据

问题中偶尔出现的
if(/…/g)
,毫无意义,很容易出错——也可能导致问题中所述的问题。†当在标量上下文中使用时,它服务于复杂的需求,而不是用于单独的
if
语句

成功匹配(从而捕获)的条件取自问题。sub中的代码可以以多种其他方式组织,从更紧凑到更精细

请注意,sub不直接从较高的范围使用任何内容;它所需要的一切都显式地传递给它,并返回其结果。这是非常重要的,因此要避免不同的代码组件(这里是sub及其调用者)的耦合;它们甚至可以跨不同的编译单元驻留

这次重写很可能弥补了错误并解决了问题;或者它可能没有。如果我们能够看到数据样本,那么就有可能进行更有针对性的故障排除

上面的代码已经过测试,使用了一个组合文件和合适的正则表达式模式


†虽然我需要查看一些数据来确定导致报告行为的原因,但一个很好的选择是毫无疑问地使用
if(/…/g)
。该修饰符使正则表达式记住其匹配的位置,并且在下一次对同一字符串调用正则表达式时,它开始从上一个匹配的字符串中的位置查找匹配

一个简单的例子

use warnings; use strict; use feature 'say';

my $s = q(one simple string); 

if ($s =~ /(\w+)/g) { say $1 }; 
if ($s =~ /(\w+)/g) { say $1 }; 
say pos($s);
哪张照片

one simple 10 这张照片

one two 一 二 就这样做了;没有了。这是因为引擎在第二次匹配中通过了单词
two
,因此在
for
循环的下一次迭代中没有匹配的内容

有关上述示例及其上下文的更多信息,请参见和

第二个例子与问题中给出的例子特别相似


上面的一些行为可以通过锚和其他修饰符进行修改,
/g
当然非常有用——但是需要知道它的作用。

如果(/…/g)
没有意义。放下
g
?否则,太多太多的方式看。对于一个合理的问题,请把问题的范围缩小太多。不必要的是:你所有的函数都是一样的,完全一样(我可以通过快速浏览所有这些来判断),所以你需要一个带模式的数组,一个函数。还有很多其他的细节,但首先——甚至没有数据样本,任何正则表达式都可能是问题所在。我建议修改这个问题。我投票赞成“需要调试细节”。请不要误解为“需要更多代码”。相反,请提供一个答案。要清楚,无意义的
g
可能是您的问题的原因。但是我只看了你的代码5秒钟。正如我在回答中所暗示的,我也发现
if(/…/g)
可能是引起麻烦的原因(而不是对你的指责!)。现在,我在回答的末尾添加了一个部分,详细说明了可能发生的情况。但是,如果没有看到一些数据,就不能说任何确定的话。 one two