如何使用awk解析多个文件并将输出放在生成的动态文件中?

如何使用awk解析多个文件并将输出放在生成的动态文件中?,awk,Awk,这是我的awk脚本filtered.awk,它可以很好地用于单个输入文件 #Field Seperator BEGIN { FS="[,:\"]" } #Searching and Storing in an Array /searchKeyword/ {a[$5]=a[$5]OFS$6} #Looping on Array END { for (k in a) { print FILENAME, k, gsub(OFS,OFS,a[k]) > (

这是我的awk脚本
filtered.awk
,它可以很好地用于单个输入文件

#Field Seperator
BEGIN { FS="[,:\"]" }

#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}

#Looping on Array
END {
     for (k in a)
      {
        print FILENAME, k, gsub(OFS,OFS,a[k]) > ("output_" FILENAME)
      }
}
样本输入-

cat input1.txt
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594889937362
"YY/XX","searchKeyword-ZZZZ.abc:13","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594822937362
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594823937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594890937362
"YY/XX","searchKeyword-ZZZZ.pqr:08","200OK",64594877937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594870937362
"YY/XX","searchKeyword-ZZZZ.cde:12","200OK",64594803937362
"YY/XX","searchKeyword-ZZZZ.cde:00","200OK",64594870937362
"YY/XX","searchKeyword-ZZZZ.cde:00","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.cde:08","200OK",64594825193736
第二个输入文件-

cat input2.txt
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439865231,"4334373212"
"XXX/YYY","searchKeyword-YYYYY.cde:99904","200OK",439868231,"4334953212"
"XXX/YYY","searchKeyword-YYYYY.mno:99909","200OK",439827231,"4334178212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99911","200OK",439874231,"4334353212"
"XXX/YYY","searchKeyword-YYYYY.cde:99900","200OK",439893231,"4334130212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439886231,"4334868212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99905","200OK",439850231,"4334495212"
"XXX/YYY","searchKeyword-YYYYY.cde:99905","200OK",439878231,"4334131212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439871231,"4334895212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439874231,"4334353212"
"XXX/YYY","searchKeyword-YYYYY.cde:99908","200OK",439848231,"4334823212"
"XXX/YYY","searchKeyword-YYYYY.mno:99914","200OK",439820231,"4334177212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439882231,"4334579212"
"XXX/YYY","searchKeyword-YYYYY.cde:99903","200OK",439840231,"4334966212"
"XXX/YYY","searchKeyword-YYYYY.mno:99908","200OK",439894231,"4334365212"
第三输入文件

cat input3.txt
"XXX/YYY","searchKeyword-YYYYY.cde:99900","200OK",439893231,"4334130212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439886231,"4334868212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99905","200OK",439850231,"4334495212"
"XXX/YYY","searchKeyword-YYYYY.cde:99905","200OK",439878231,"4334131212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439871231,"4334895212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439874231,"4334353212"
"PPP/QQQ","searchKeyword-ZZZZ.abc:06","200OK",64594822937362
"PPP/QQQ","searchKeyword-ZZZZ.abc:06","200OK",64594823937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594890937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:08","200OK",64594877937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594860937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594870937362
"PPP/QQQ","searchKeyword-ZZZZ.cde:12","200OK",64594803937362
"PPP/QQQ","searchKeyword-ZZZZ.cde:00","200OK",64594870937362
我像下面这样传递输入文件,并在
output\u input3.txt
文件中获得输出

awk -f filtered.awk input*
cat output_input3.txt
input3.txt searchKeyword-ZZZZ.cde 6
input3.txt searchKeyword-YYYYY.cde 7
input3.txt searchKeyword-ZZZZ.pqr 8
input3.txt searchKeyword-YYYYY.pqr 7
input3.txt searchKeyword-ZZZZ.abc 6
input3.txt searchKeyword-YYYYY.mno 7
看起来它根本没有处理前两个文件

我希望在动态生成的文件中输出,如下所示-

==> output_input1.txt <==
input1.txt searchKeyword-ZZZZ.cde 4
input1.txt searchKeyword-ZZZZ.pqr 4
input1.txt searchKeyword-ZZZZ.abc 4

==> output_input2.txt <==
input2.txt searchKeyword-YYYYY.cde 5
input2.txt searchKeyword-YYYYY.pqr 5
input2.txt searchKeyword-YYYYY.mno 5

==> output_input3.txt <==
input3.txt searchKeyword-ZZZZ.cde 2
input3.txt searchKeyword-YYYYY.cde 2
input3.txt searchKeyword-ZZZZ.pqr 4
input3.txt searchKeyword-YYYYY.pqr 2
input3.txt searchKeyword-ZZZZ.abc 2
input3.txt searchKeyword-YYYYY.mno 2
注意:我正在mac上使用awk(awk版本20070501)并尝试使用ENDFILE,我认为mac上的awk中不存在ENDFILE。

END
只能看到
文件名的最后一个实例。如果您正在使用GNU awk,请尝试将
END
替换为
ENDFILE
,看看这是否是您想要的(您可能需要
删除一个
,或者添加
关闭
。使用GNU awk(由于
ENDFILE
):

结果:

$ cat output_input1 
input1 searchKeyword-ZZZZ.abc 4
input1 searchKeyword-ZZZZ.cde 4
input1 searchKeyword-ZZZZ.pqr 4
$ cat output_input2
input2 searchKeyword-YYYYY.mno 5
input2 searchKeyword-YYYYY.cde 5
input2 searchKeyword-YYYYY.pqr 5
$ cat output_input3
input3 searchKeyword-ZZZZ.abc 2
input3 searchKeyword-YYYYY.mno 2
input3 searchKeyword-ZZZZ.cde 2
input3 searchKeyword-ZZZZ.pqr 4
input3 searchKeyword-YYYYY.pqr 2
input3 searchKeyword-YYYYY.cde 2
如果没有GNU awk和
ENDFILE
可用,则需要在
FNR==1
中处理
FILENAME
END
块。当然,您可以(也应该)创建
函数()
并从前面提到的块调用它们,但要突出显示:

#Field Seperator
BEGIN { FS="[,:\"]" }

FNR==1 {
    if(filename!="") {                                  # no file before the first
        out="output_" filename                          # using previous filename
        for (k in a)
        {
            print filename, k, gsub(OFS,OFS,a[k]) > out
        }
        delete a                                        # empty env
        close(out)                                      # close used file
    }
    filename=FILENAME                                   # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}

#Looping on Array
END {
     out="output_" FILENAME
     for (k in a)
     {
         print FILENAME, k, gsub(OFS,OFS,a[k]) > out
     }
     delete a                                        # good habit but more for
     close(out)                                      # symmetricity
}
更新:根据评论中的要求进行更新。对不起,我第一次完全错过了这一部分

#Field Seperator                                                                  
BEGIN { FS="[,:\"]" }

FNR==1 {
    if(filename!="") {                                # no file before the first  
        for (k in a)
        {
            n=split(k,f,".")                          # get the abc etc           
            out="output_" filename "_" f[n] ".txt"    # construct the filename    
            print filename, k, a[k] >> out            # appending to files        
            close(out)                                # spare the fds             
        }
        delete a                                      # empty env                 
    }
    filename=FILENAME                                 # remember filename         
}
#Searching and Storing in an Array                                                
/searchKeyword/ {a[$5]++}                             # changed the counting      

#Looping on Array                                                                 
END {
    for (k in a)
    {
        n=split(k,f,".")                              # etc                       
        out="output_" filename "_" f[n] ".txt"        # construct                 
        print filename, k, a[k] >> out                # append                    
        close(out)                                    # fds                       
    }
}
END
只能看到
FILENAME
的最后一个实例。如果您正在使用GNU awk,请尝试用
ENDFILE
替换
END
,看看这是否是您想要的(您可能需要
删除一个
,可能需要添加
关闭
。使用GNU awk(由于
ENDFILE
):

结果:

$ cat output_input1 
input1 searchKeyword-ZZZZ.abc 4
input1 searchKeyword-ZZZZ.cde 4
input1 searchKeyword-ZZZZ.pqr 4
$ cat output_input2
input2 searchKeyword-YYYYY.mno 5
input2 searchKeyword-YYYYY.cde 5
input2 searchKeyword-YYYYY.pqr 5
$ cat output_input3
input3 searchKeyword-ZZZZ.abc 2
input3 searchKeyword-YYYYY.mno 2
input3 searchKeyword-ZZZZ.cde 2
input3 searchKeyword-ZZZZ.pqr 4
input3 searchKeyword-YYYYY.pqr 2
input3 searchKeyword-YYYYY.cde 2
如果没有GNU awk和
ENDFILE
可用,则需要在
FNR==1
中处理
FILENAME
END
块。当然,您可以(也应该)创建
函数()
并从前面提到的块调用它们,但要突出显示:

#Field Seperator
BEGIN { FS="[,:\"]" }

FNR==1 {
    if(filename!="") {                                  # no file before the first
        out="output_" filename                          # using previous filename
        for (k in a)
        {
            print filename, k, gsub(OFS,OFS,a[k]) > out
        }
        delete a                                        # empty env
        close(out)                                      # close used file
    }
    filename=FILENAME                                   # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}

#Looping on Array
END {
     out="output_" FILENAME
     for (k in a)
     {
         print FILENAME, k, gsub(OFS,OFS,a[k]) > out
     }
     delete a                                        # good habit but more for
     close(out)                                      # symmetricity
}
更新:根据评论中的要求进行更新。对不起,我第一次完全错过了这一部分

#Field Seperator                                                                  
BEGIN { FS="[,:\"]" }

FNR==1 {
    if(filename!="") {                                # no file before the first  
        for (k in a)
        {
            n=split(k,f,".")                          # get the abc etc           
            out="output_" filename "_" f[n] ".txt"    # construct the filename    
            print filename, k, a[k] >> out            # appending to files        
            close(out)                                # spare the fds             
        }
        delete a                                      # empty env                 
    }
    filename=FILENAME                                 # remember filename         
}
#Searching and Storing in an Array                                                
/searchKeyword/ {a[$5]++}                             # changed the counting      

#Looping on Array                                                                 
END {
    for (k in a)
    {
        n=split(k,f,".")                              # etc                       
        out="output_" filename "_" f[n] ".txt"        # construct                 
        print filename, k, a[k] >> out                # append                    
        close(out)                                    # fds                       
    }
}

我看不出你想用它实现什么,但是
END
只能看到
FILENAME
的最后一个实例。如果你正在使用GNU awk,试着用
ENDFILE
替换
END
,看看这是否是你想要的(你可能需要
删除(a[k])
,可能添加
关闭
,但正如我所说的,我不明白你想做什么)。具有预期输出的示例数据通常会有很大帮助。您请求帮助分析多个文件,但仅提供了一个文件,其中仅包含一行,作为您尝试处理的输入示例。请考虑这是否真正/充分代表了您要处理的实际输入。@VIPIN KUMAR,请检查我的解决方案是否对您有效?@RavinderSingh13-我尝试了您的解决方案,但只在一个文件中获得了输出,并且只在第三个输入文件中获得了输出。@RavinderSingh13-我将要求您检查问题“预期输出”的更新部分您的代码的行为与我的代码类似,在我的代码中,前两个文件没有得到处理或没有显示在输出文件中。我看不出您试图用它来实现什么,但
END
只能看到
FILENAME
的最后一个实例。如果您使用的是GNU awk,请尝试将
END
替换为
ENDFILE
,然后查看这是您想要的(您可能需要
删除(a[k])
,可能需要添加
关闭
,但正如我所说,我不明白您想做什么)。具有预期输出的示例数据通常会有很大帮助。您请求帮助分析多个文件,但仅提供了一个文件,其中仅包含一行,作为您尝试处理的输入示例。请考虑这是否真正/充分代表了您要处理的实际输入。@VIPIN KUMAR,请检查我的解决方案是否对您有效?@RavinderSingh13-我尝试了您的解决方案,但只在一个文件中获得了输出,并且只在第三个输入文件中获得了输出。@RavinderSingh13-我将要求您检查问题“预期输出”的更新部分你们的代码和我的代码一样,前两个文件并没有得到处理,或者并没有显示在输出文件中。你们能建议如何处理问题的第二部分,根据行中的公共字符串创建子文件吗?@VIPINKUMAR添加了一个版本。我更改了主块,希望这就是你们想要的。你们能吗请建议如何处理问题的第二部分,根据行中的公共字符串创建子文件。@VIPINKUMAR添加了一个版本。我更改了主块,希望这是您想要的。