如何使用awk解析多个文件并将输出放在生成的动态文件中?
这是我的awk脚本如何使用awk解析多个文件并将输出放在生成的动态文件中?,awk,Awk,这是我的awk脚本filtered.awk,它可以很好地用于单个输入文件 #Field Seperator BEGIN { FS="[,:\"]" } #Searching and Storing in an Array /searchKeyword/ {a[$5]=a[$5]OFS$6} #Looping on Array END { for (k in a) { print FILENAME, k, gsub(OFS,OFS,a[k]) > (
filtered.awk
,它可以很好地用于单个输入文件
#Field Seperator
BEGIN { FS="[,:\"]" }
#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}
#Looping on Array
END {
for (k in a)
{
print FILENAME, k, gsub(OFS,OFS,a[k]) > ("output_" FILENAME)
}
}
样本输入-
cat input1.txt
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594889937362
"YY/XX","searchKeyword-ZZZZ.abc:13","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594822937362
"YY/XX","searchKeyword-ZZZZ.abc:06","200OK",64594823937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594890937362
"YY/XX","searchKeyword-ZZZZ.pqr:08","200OK",64594877937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.pqr:13","200OK",64594870937362
"YY/XX","searchKeyword-ZZZZ.cde:12","200OK",64594803937362
"YY/XX","searchKeyword-ZZZZ.cde:00","200OK",64594870937362
"YY/XX","searchKeyword-ZZZZ.cde:00","200OK",64594860937362
"YY/XX","searchKeyword-ZZZZ.cde:08","200OK",64594825193736
第二个输入文件-
cat input2.txt
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439865231,"4334373212"
"XXX/YYY","searchKeyword-YYYYY.cde:99904","200OK",439868231,"4334953212"
"XXX/YYY","searchKeyword-YYYYY.mno:99909","200OK",439827231,"4334178212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99911","200OK",439874231,"4334353212"
"XXX/YYY","searchKeyword-YYYYY.cde:99900","200OK",439893231,"4334130212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439886231,"4334868212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99905","200OK",439850231,"4334495212"
"XXX/YYY","searchKeyword-YYYYY.cde:99905","200OK",439878231,"4334131212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439871231,"4334895212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439874231,"4334353212"
"XXX/YYY","searchKeyword-YYYYY.cde:99908","200OK",439848231,"4334823212"
"XXX/YYY","searchKeyword-YYYYY.mno:99914","200OK",439820231,"4334177212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439882231,"4334579212"
"XXX/YYY","searchKeyword-YYYYY.cde:99903","200OK",439840231,"4334966212"
"XXX/YYY","searchKeyword-YYYYY.mno:99908","200OK",439894231,"4334365212"
第三输入文件
cat input3.txt
"XXX/YYY","searchKeyword-YYYYY.cde:99900","200OK",439893231,"4334130212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439886231,"4334868212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99905","200OK",439850231,"4334495212"
"XXX/YYY","searchKeyword-YYYYY.cde:99905","200OK",439878231,"4334131212"
"XXX/YYY","searchKeyword-YYYYY.mno:99910","200OK",439871231,"4334895212"
"XXX/YYY","searchKeyword-YYYYY.pqr:99910","200OK",439874231,"4334353212"
"PPP/QQQ","searchKeyword-ZZZZ.abc:06","200OK",64594822937362
"PPP/QQQ","searchKeyword-ZZZZ.abc:06","200OK",64594823937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594890937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:08","200OK",64594877937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594860937362
"PPP/QQQ","searchKeyword-ZZZZ.pqr:13","200OK",64594870937362
"PPP/QQQ","searchKeyword-ZZZZ.cde:12","200OK",64594803937362
"PPP/QQQ","searchKeyword-ZZZZ.cde:00","200OK",64594870937362
我像下面这样传递输入文件,并在output\u input3.txt
文件中获得输出
awk -f filtered.awk input*
cat output_input3.txt
input3.txt searchKeyword-ZZZZ.cde 6
input3.txt searchKeyword-YYYYY.cde 7
input3.txt searchKeyword-ZZZZ.pqr 8
input3.txt searchKeyword-YYYYY.pqr 7
input3.txt searchKeyword-ZZZZ.abc 6
input3.txt searchKeyword-YYYYY.mno 7
看起来它根本没有处理前两个文件
我希望在动态生成的文件中输出,如下所示-
==> output_input1.txt <==
input1.txt searchKeyword-ZZZZ.cde 4
input1.txt searchKeyword-ZZZZ.pqr 4
input1.txt searchKeyword-ZZZZ.abc 4
==> output_input2.txt <==
input2.txt searchKeyword-YYYYY.cde 5
input2.txt searchKeyword-YYYYY.pqr 5
input2.txt searchKeyword-YYYYY.mno 5
==> output_input3.txt <==
input3.txt searchKeyword-ZZZZ.cde 2
input3.txt searchKeyword-YYYYY.cde 2
input3.txt searchKeyword-ZZZZ.pqr 4
input3.txt searchKeyword-YYYYY.pqr 2
input3.txt searchKeyword-ZZZZ.abc 2
input3.txt searchKeyword-YYYYY.mno 2
注意:我正在mac上使用awk(awk版本20070501)并尝试使用ENDFILE,我认为mac上的awk中不存在ENDFILE。END
只能看到文件名的最后一个实例。如果您正在使用GNU awk,请尝试将END
替换为ENDFILE
,看看这是否是您想要的(您可能需要删除一个,或者添加关闭
。使用GNU awk(由于ENDFILE
):
结果:
$ cat output_input1
input1 searchKeyword-ZZZZ.abc 4
input1 searchKeyword-ZZZZ.cde 4
input1 searchKeyword-ZZZZ.pqr 4
$ cat output_input2
input2 searchKeyword-YYYYY.mno 5
input2 searchKeyword-YYYYY.cde 5
input2 searchKeyword-YYYYY.pqr 5
$ cat output_input3
input3 searchKeyword-ZZZZ.abc 2
input3 searchKeyword-YYYYY.mno 2
input3 searchKeyword-ZZZZ.cde 2
input3 searchKeyword-ZZZZ.pqr 4
input3 searchKeyword-YYYYY.pqr 2
input3 searchKeyword-YYYYY.cde 2
如果没有GNU awk和ENDFILE
可用,则需要在FNR==1
中处理FILENAME
和END
块。当然,您可以(也应该)创建函数()
并从前面提到的块调用它们,但要突出显示:
#Field Seperator
BEGIN { FS="[,:\"]" }
FNR==1 {
if(filename!="") { # no file before the first
out="output_" filename # using previous filename
for (k in a)
{
print filename, k, gsub(OFS,OFS,a[k]) > out
}
delete a # empty env
close(out) # close used file
}
filename=FILENAME # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}
#Looping on Array
END {
out="output_" FILENAME
for (k in a)
{
print FILENAME, k, gsub(OFS,OFS,a[k]) > out
}
delete a # good habit but more for
close(out) # symmetricity
}
更新:根据评论中的要求进行更新。对不起,我第一次完全错过了这一部分
#Field Seperator
BEGIN { FS="[,:\"]" }
FNR==1 {
if(filename!="") { # no file before the first
for (k in a)
{
n=split(k,f,".") # get the abc etc
out="output_" filename "_" f[n] ".txt" # construct the filename
print filename, k, a[k] >> out # appending to files
close(out) # spare the fds
}
delete a # empty env
}
filename=FILENAME # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]++} # changed the counting
#Looping on Array
END {
for (k in a)
{
n=split(k,f,".") # etc
out="output_" filename "_" f[n] ".txt" # construct
print filename, k, a[k] >> out # append
close(out) # fds
}
}
END
只能看到FILENAME
的最后一个实例。如果您正在使用GNU awk,请尝试用ENDFILE
替换END
,看看这是否是您想要的(您可能需要删除一个,可能需要添加关闭。使用GNU awk(由于ENDFILE
):
结果:
$ cat output_input1
input1 searchKeyword-ZZZZ.abc 4
input1 searchKeyword-ZZZZ.cde 4
input1 searchKeyword-ZZZZ.pqr 4
$ cat output_input2
input2 searchKeyword-YYYYY.mno 5
input2 searchKeyword-YYYYY.cde 5
input2 searchKeyword-YYYYY.pqr 5
$ cat output_input3
input3 searchKeyword-ZZZZ.abc 2
input3 searchKeyword-YYYYY.mno 2
input3 searchKeyword-ZZZZ.cde 2
input3 searchKeyword-ZZZZ.pqr 4
input3 searchKeyword-YYYYY.pqr 2
input3 searchKeyword-YYYYY.cde 2
如果没有GNU awk和ENDFILE
可用,则需要在FNR==1
中处理FILENAME
和END
块。当然,您可以(也应该)创建函数()
并从前面提到的块调用它们,但要突出显示:
#Field Seperator
BEGIN { FS="[,:\"]" }
FNR==1 {
if(filename!="") { # no file before the first
out="output_" filename # using previous filename
for (k in a)
{
print filename, k, gsub(OFS,OFS,a[k]) > out
}
delete a # empty env
close(out) # close used file
}
filename=FILENAME # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]=a[$5]OFS$6}
#Looping on Array
END {
out="output_" FILENAME
for (k in a)
{
print FILENAME, k, gsub(OFS,OFS,a[k]) > out
}
delete a # good habit but more for
close(out) # symmetricity
}
更新:根据评论中的要求进行更新。对不起,我第一次完全错过了这一部分
#Field Seperator
BEGIN { FS="[,:\"]" }
FNR==1 {
if(filename!="") { # no file before the first
for (k in a)
{
n=split(k,f,".") # get the abc etc
out="output_" filename "_" f[n] ".txt" # construct the filename
print filename, k, a[k] >> out # appending to files
close(out) # spare the fds
}
delete a # empty env
}
filename=FILENAME # remember filename
}
#Searching and Storing in an Array
/searchKeyword/ {a[$5]++} # changed the counting
#Looping on Array
END {
for (k in a)
{
n=split(k,f,".") # etc
out="output_" filename "_" f[n] ".txt" # construct
print filename, k, a[k] >> out # append
close(out) # fds
}
}
我看不出你想用它实现什么,但是END
只能看到FILENAME
的最后一个实例。如果你正在使用GNU awk,试着用ENDFILE
替换END
,看看这是否是你想要的(你可能需要删除(a[k])
,可能添加关闭
,但正如我所说的,我不明白你想做什么)。具有预期输出的示例数据通常会有很大帮助。您请求帮助分析多个文件,但仅提供了一个文件,其中仅包含一行,作为您尝试处理的输入示例。请考虑这是否真正/充分代表了您要处理的实际输入。@VIPIN KUMAR,请检查我的解决方案是否对您有效?@RavinderSingh13-我尝试了您的解决方案,但只在一个文件中获得了输出,并且只在第三个输入文件中获得了输出。@RavinderSingh13-我将要求您检查问题“预期输出”的更新部分您的代码的行为与我的代码类似,在我的代码中,前两个文件没有得到处理或没有显示在输出文件中。我看不出您试图用它来实现什么,但END
只能看到FILENAME
的最后一个实例。如果您使用的是GNU awk,请尝试将END
替换为ENDFILE
,然后查看这是您想要的(您可能需要删除(a[k])
,可能需要添加关闭
,但正如我所说,我不明白您想做什么)。具有预期输出的示例数据通常会有很大帮助。您请求帮助分析多个文件,但仅提供了一个文件,其中仅包含一行,作为您尝试处理的输入示例。请考虑这是否真正/充分代表了您要处理的实际输入。@VIPIN KUMAR,请检查我的解决方案是否对您有效?@RavinderSingh13-我尝试了您的解决方案,但只在一个文件中获得了输出,并且只在第三个输入文件中获得了输出。@RavinderSingh13-我将要求您检查问题“预期输出”的更新部分你们的代码和我的代码一样,前两个文件并没有得到处理,或者并没有显示在输出文件中。你们能建议如何处理问题的第二部分,根据行中的公共字符串创建子文件吗?@VIPINKUMAR添加了一个版本。我更改了主块,希望这就是你们想要的。你们能吗请建议如何处理问题的第二部分,根据行中的公共字符串创建子文件。@VIPINKUMAR添加了一个版本。我更改了主块,希望这是您想要的。