
Perl:Regex-用字母表匹配值,regex,bash,perl,Regex,Bash,Perl,我编写了一个小的perl“hack”,用制表符分隔文件中一系列列中的字母替换1。该文件如下所示: Chr Start End Name Score Strand Donor Acceptor Merged_Transcript Gencode Colon Heart Kidney Liver Lung Stomach chr10 100177483 100177931 . . - 1 1 1 1 1 0


Chr Start   End Name    Score   Strand  Donor   Acceptor    Merged_Transcript   Gencode Colon   Heart   Kidney  Liver   Lung    Stomach
chr10   100177483   100177931   .   .   -   1   1   1   1   1   0   1   1   0   0
chr10   100178014   100179801   .   .   -   1   1   1   1   1   1   1   1   1   0
chr10   100179915   100182125   .   .   -   1   1   1   1   1   1   1   0   1   0
chr10   100182270   100183359   .   .   -   1   1   1   1   0   0   1   0   1   0
chr10   100183644   100184069   .   .   -   1   1   1   1   0   0   1   0   1   0

cat infile.txt \
| perl -ne '@alphabet=("A".."Z");
            $is_known_intron = 0;
            $is_known_donor = 1;
            $is_known_acceptor = 1;
            $_ =~ s/^\s+//;
            @d = split /\s+/, $_;
            $known_intron = $d[$10];
            $num_of_overlapping_gene = $d[$9];
            $known_acceptor = $d[$8];
            $known_donor = $d[$7];
            if (($known_intron == $is_known_intron) and ($known_donor == $is_known_donor) and ($known_acceptor == $is_known_acceptor)) {
               for ($i = 0; $i < scalar @d_bool; $i++){
                   $k.=$alphabet[$i] if ($d_bool[$i])
               foreach $k (sort keys %alphabet_ct){
                   print join("\t", $k, $alphabet_ct{$k}), "\n";
            } '\
   > Outfile.txt



use strict;
use warnings;

my %alphabet_ct;
my @alphabet = ( "A" .. "Z" );

my $is_known_intron   = 0;
my $is_known_donor    = 1;
my $is_known_acceptor = 1;

while (<DATA>) {
    # don't process the first line
    next unless /chr10/;
    # this should remove whitespace at the beginning of the line but is doing nothing as there is none
    $_ =~ s/^\s+//;

    my @d = split /\s+/, $_;
    # the range operator in perl is .. (not "-")
    my @d_bool         = @d[ 10 .. 15 ];
    my $known_intron   = $d[9];
    my $known_acceptor = $d[7];
    my $known_donor    = $d[6];
    my $k              = "";
    # this expression is false for all the data in the sample you provided as
    # $is_known_intron is set to 0
    if (    ( $known_intron   == $is_known_intron )
        and ( $known_donor    == $is_known_donor )
        and ( $known_acceptor == $is_known_acceptor ) )
        for ( my $i = 0; $i < scalar @d_bool; $i++ ) {
            $k .= $alphabet[$i] if $d_bool[$i];
        # it is more idiomatic to write $alphabet_ct{$k}++;
        # $alphabet_ct{$k} += $ct;
foreach my $k ( sort keys %alphabet_ct ) {
    print join( "\t", $k, $alphabet_ct{$k} ) . "\n";

Chr Start   End Name    Score   Strand  Donor   Acceptor    Merged_Transcript   Gencode Colon   Heart   Kidney  Liver   Lung    Stomach
chr10   100177483   100177931   .   .   -   1   1   1   1   1   0   1   1   0   0
chr10   100178014   100179801   .   .   -   1   1   1   1   1   1   1   1   1   0
chr10   100179915   100182125   .   .   -   1   1   1   1   1   1   1   0   1   0
chr10   100182270   100183359   .   .   -   1   1   1   1   0   0   1   0   1   0
chr10   100183644   100184069   .   .   -   1   1   1   1   0   0   1   0   1   0

use strict;使用警告到脚本的开头,为您提供出错的线索。您知道数组在perl中是0索引的吗?所有对
use strict;
use warnings;

my %alphabet_ct;
my @alphabet = ( "A" .. "Z" );

my $is_known_intron   = 0;
my $is_known_donor    = 1;
my $is_known_acceptor = 1;

while (<DATA>) {
    # don't process the first line
    next unless /chr10/;
    # this should remove whitespace at the beginning of the line but is doing nothing as there is none
    $_ =~ s/^\s+//;

    my @d = split /\s+/, $_;
    # the range operator in perl is .. (not "-")
    my @d_bool         = @d[ 10 .. 15 ];
    my $known_intron   = $d[9];
    my $known_acceptor = $d[7];
    my $known_donor    = $d[6];
    my $k              = "";
    # this expression is false for all the data in the sample you provided as
    # $is_known_intron is set to 0
    if (    ( $known_intron   == $is_known_intron )
        and ( $known_donor    == $is_known_donor )
        and ( $known_acceptor == $is_known_acceptor ) )
        for ( my $i = 0; $i < scalar @d_bool; $i++ ) {
            $k .= $alphabet[$i] if $d_bool[$i];
        # it is more idiomatic to write $alphabet_ct{$k}++;
        # $alphabet_ct{$k} += $ct;
foreach my $k ( sort keys %alphabet_ct ) {
    print join( "\t", $k, $alphabet_ct{$k} ) . "\n";

Chr Start   End Name    Score   Strand  Donor   Acceptor    Merged_Transcript   Gencode Colon   Heart   Kidney  Liver   Lung    Stomach
chr10   100177483   100177931   .   .   -   1   1   1   1   1   0   1   1   0   0
chr10   100178014   100179801   .   .   -   1   1   1   1   1   1   1   1   1   0
chr10   100179915   100182125   .   .   -   1   1   1   1   1   1   1   0   1   0
chr10   100182270   100183359   .   .   -   1   1   1   1   0   0   1   0   1   0
chr10   100183644   100184069   .   .   -   1   1   1   1   0   0   1   0   1   0
ABCE    1
CE  2