Awk 使用csv文件的摘要数据
我想在所需的输出文件中添加更多细节 请选择改进下面的代码以获得所需的文件 代码 输入文件 我得到了这个输出文件 所需的输出如下所示 AWK可以这样做吗?下面是我的例子Awk 使用csv文件的摘要数据,awk,Awk,我想在所需的输出文件中添加更多细节 请选择改进下面的代码以获得所需的文件 代码 输入文件 我得到了这个输出文件 所需的输出如下所示 AWK可以这样做吗?下面是我的例子 CODE-1T COD-Area CODE-1 CODE-S CODE-T ------------------------------------------------------------ 333000004 AS/RR 22 0
CODE-1T COD-Area CODE-1 CODE-S CODE-T
------------------------------------------------------------
333000004 AS/RR 22 0 803
333000004 SS/RR 11 10 142
Total 33 10 945 988
---------------------------------------------------------------------
333000005 SS/RR 13 4 1638
333000005 ST/RR 11 3 1134
Total 24 7 2772 2803
---------------------------------------------------------------------
333000006 SS/RR 7 9 1740
333000006 ST/RR 5 3 258
Total 12 12 1998 2202
---------------------------------------------------------------------
333000007 SS/RR 4 59 912
333000007 ST/RR 3 5 1682
333000007 SX/RR 3 34 876
Total 10 98 3470 3578
---------------------------------------------------------------------
---------------------------------------------------------------------
Gran Total: 79 127 9185 9391
---------------------------------------------------------------------
提前感谢这不是一个完整的解决方案我现在对格式没有足够的耐心,但在一个字段上显示了小计的主要思想,您可以概括和复制其他格式
$ awk -F, '{k=$2 OFS $7; f2[$2]; f7[$7]; f4[k]+=$4; f5[k]+=$5; f6[k]+=$6}
END{for(i2 in f2)
{s4=0;
for(i7 in f7)
{k=i2 OFS i7;
if(k in f4)
{print k, f4[k];
s4+=f4[k]}}
print "Total","-",s4}}' file | column -t
333000004 SS/RR 11
333000004 AS/RR 22
Total - 33
333000005 SS/RR 13
333000005 ST/RR 11
Total - 24
333000006 SS/RR 7
333000006 ST/RR 5
Total - 12
333000007 SS/RR 4
333000007 SX/RR 3
333000007 ST/RR 3
Total - 10
不是完整的解决方案我现在对格式没有足够的耐心,但在一个字段上显示了小计的主要思想,您可以概括和复制其他格式
$ awk -F, '{k=$2 OFS $7; f2[$2]; f7[$7]; f4[k]+=$4; f5[k]+=$5; f6[k]+=$6}
END{for(i2 in f2)
{s4=0;
for(i7 in f7)
{k=i2 OFS i7;
if(k in f4)
{print k, f4[k];
s4+=f4[k]}}
print "Total","-",s4}}' file | column -t
333000004 SS/RR 11
333000004 AS/RR 22
Total - 33
333000005 SS/RR 13
333000005 ST/RR 11
Total - 24
333000006 SS/RR 7
333000006 ST/RR 5
Total - 12
333000007 SS/RR 4
333000007 SX/RR 3
333000007 ST/RR 3
Total - 10
编辑:根据OP第4列和第5列的零值,以前的代码不起作用,所以现在修复它
awk '
BEGIN{ ##Starting BEGIN section here of awk.
FS="," ##Setting FS as comma here.
OFS="\t\t" ##Setting OFS as 2 TABs as output field separator.
s1="------------------------------------------------------------------------------------------------" ##Setting s1 as dashes.
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1 ##printing headers before output prints.
}
FNR==NR{ ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
code1[$2,$NF]+=$4 ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
codes[$2,$NF]+=$5 ##Creating array codes index is $2,$NF value is $5 and adding to itself.
codet[$2,$NF]+=$6 ##Creating array codet index is $2,$NF value is $6 and adding to itself.
next ##next will skip all further statements from here.
}
prev!=$2 && prev{ ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
sum_col=val1+val2+val3 ##creating sum_col whose value is val1+val2+val3.
SUM+=sum_col ##creating SUM whose value is sum_col and adding to itself too.
sum_val1+=val1 ##Creating variable sum_val1 whose value is val1 and adding to itself.
sum_val2+=val2 ##Creating variable sum_val2 whose value is val2 and adding to itself.
sum_val3+=val3 ##Creating variable sum_val3 whose value is val3 and adding to itself.
print "Total\t\t\t\t\t"val1,val2,val3,sum_col ORS s1 ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
val1=val2=val3="" ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]!=""{ ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF] ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF] ##Creating variable val1 who is array code1 value and adding to itself.
val2+=codes[$2,$NF] ##Creating variable val2 who is array codes value and adding to itself.
val3+=codet[$2,$NF] ##Creating variable val3 who is array codet value and adding to itself.
delete code1[$2,$NF] ##Deleting array code1 whose index is $2,$NF here.
}
{
prev=$2 ##Setting prev value to $2.
}
END{ ##Starting END block of awk here now.
if(val1){ ##Checking condition if variable val1 is NOT NULL then do following.
sum_col=val1+val2+val3 ##Creating sum_col whose value is addition of val1+val2+val3.
sum_val1+=val1 ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
sum_val2+=val2 ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
sum_val3+=val3 ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
print "\t\t\t"val1,val2,val3,sum_col ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1 ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file Input_file ##mentioning Input_file 2 times here.
你能试试下面的吗
awk -F, '
BEGIN{
s1="------------------------------------------------------------------------------------------------"
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1
}
FNR==NR{
code1[$2,$NF]+=$4
codes[$2,$NF]+=$5
codet[$2,$NF]+=$6
next
}
prev!=$2 && prev{
sum_col=val1+val2+val3
SUM+=sum_col
sum_val1+=val1
sum_val2+=val2
sum_val3+=val3
print "\t\t\t"val1,val2,val3,sum_col ORS s1
val1=val2=val3=""
}
code1[$2,$NF]{
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF]
val2+=codes[$2,$NF]
val3+=codet[$2,$NF]
delete code1[$2,$NF]
}
{
prev=$2
}
END{
if(val1){
sum_col=val1+val2+val3
sum_val1+=val1
sum_val2+=val2
sum_val3+=val3
print "\t\t\t"val1,val2,val3,sum_col
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1
}' OFS="\t\t" Input_file Input_file
说明:在此处也添加说明
awk '
BEGIN{ ##Starting BEGIN section here of awk.
FS="," ##Setting FS as comma here.
OFS="\t\t" ##Setting OFS as 2 TABs as output field separator.
s1="------------------------------------------------------------------------------------------------" ##Setting s1 as dashes.
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1 ##printing headers before output prints.
}
FNR==NR{ ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
code1[$2,$NF]+=$4 ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
codes[$2,$NF]+=$5 ##Creating array codes index is $2,$NF value is $5 and adding to itself.
codet[$2,$NF]+=$6 ##Creating array codet index is $2,$NF value is $6 and adding to itself.
next ##next will skip all further statements from here.
}
prev!=$2 && prev{ ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
sum_col=val1+val2+val3 ##creating sum_col whose value is val1+val2+val3.
SUM+=sum_col ##creating SUM whose value is sum_col and adding to itself too.
sum_val1+=val1 ##Creating variable sum_val1 whose value is val1 and adding to itself.
sum_val2+=val2 ##Creating variable sum_val2 whose value is val2 and adding to itself.
sum_val3+=val3 ##Creating variable sum_val3 whose value is val3 and adding to itself.
print "\t\t\t"val1,val2,val3,sum_col ORS s1 ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
val1=val2=val3="" ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]{ ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF] ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF] ##Creating variable val1 who is array code1 value and adding to itself.
val2+=codes[$2,$NF] ##Creating variable val2 who is array codes value and adding to itself.
val3+=codet[$2,$NF] ##Creating variable val3 who is array codet value and adding to itself.
delete code1[$2,$NF] ##Deleting array code1 whose index is $2,$NF here.
}
{
prev=$2 ##Setting prev value to $2.
}
END{ ##Starting END block of awk here now.
if(val1){ ##Checking condition if variable val1 is NOT NULL then do following.
sum_col=val1+val2+val3 ##Creating sum_col whose value is addition of val1+val2+val3.
sum_val1+=val1 ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
sum_val2+=val2 ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
sum_val3+=val3 ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
print "\t\t\t"val1,val2,val3,sum_col ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1 ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file Input_file ##mentioning Input_file 2 times here.
编辑:根据OP第4列和第5列的零值,以前的代码不起作用,所以现在修复它
awk '
BEGIN{ ##Starting BEGIN section here of awk.
FS="," ##Setting FS as comma here.
OFS="\t\t" ##Setting OFS as 2 TABs as output field separator.
s1="------------------------------------------------------------------------------------------------" ##Setting s1 as dashes.
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1 ##printing headers before output prints.
}
FNR==NR{ ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
code1[$2,$NF]+=$4 ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
codes[$2,$NF]+=$5 ##Creating array codes index is $2,$NF value is $5 and adding to itself.
codet[$2,$NF]+=$6 ##Creating array codet index is $2,$NF value is $6 and adding to itself.
next ##next will skip all further statements from here.
}
prev!=$2 && prev{ ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
sum_col=val1+val2+val3 ##creating sum_col whose value is val1+val2+val3.
SUM+=sum_col ##creating SUM whose value is sum_col and adding to itself too.
sum_val1+=val1 ##Creating variable sum_val1 whose value is val1 and adding to itself.
sum_val2+=val2 ##Creating variable sum_val2 whose value is val2 and adding to itself.
sum_val3+=val3 ##Creating variable sum_val3 whose value is val3 and adding to itself.
print "Total\t\t\t\t\t"val1,val2,val3,sum_col ORS s1 ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
val1=val2=val3="" ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]!=""{ ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF] ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF] ##Creating variable val1 who is array code1 value and adding to itself.
val2+=codes[$2,$NF] ##Creating variable val2 who is array codes value and adding to itself.
val3+=codet[$2,$NF] ##Creating variable val3 who is array codet value and adding to itself.
delete code1[$2,$NF] ##Deleting array code1 whose index is $2,$NF here.
}
{
prev=$2 ##Setting prev value to $2.
}
END{ ##Starting END block of awk here now.
if(val1){ ##Checking condition if variable val1 is NOT NULL then do following.
sum_col=val1+val2+val3 ##Creating sum_col whose value is addition of val1+val2+val3.
sum_val1+=val1 ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
sum_val2+=val2 ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
sum_val3+=val3 ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
print "\t\t\t"val1,val2,val3,sum_col ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1 ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file Input_file ##mentioning Input_file 2 times here.
你能试试下面的吗
awk -F, '
BEGIN{
s1="------------------------------------------------------------------------------------------------"
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1
}
FNR==NR{
code1[$2,$NF]+=$4
codes[$2,$NF]+=$5
codet[$2,$NF]+=$6
next
}
prev!=$2 && prev{
sum_col=val1+val2+val3
SUM+=sum_col
sum_val1+=val1
sum_val2+=val2
sum_val3+=val3
print "\t\t\t"val1,val2,val3,sum_col ORS s1
val1=val2=val3=""
}
code1[$2,$NF]{
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF]
val2+=codes[$2,$NF]
val3+=codet[$2,$NF]
delete code1[$2,$NF]
}
{
prev=$2
}
END{
if(val1){
sum_col=val1+val2+val3
sum_val1+=val1
sum_val2+=val2
sum_val3+=val3
print "\t\t\t"val1,val2,val3,sum_col
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1
}' OFS="\t\t" Input_file Input_file
说明:在此处也添加说明
awk '
BEGIN{ ##Starting BEGIN section here of awk.
FS="," ##Setting FS as comma here.
OFS="\t\t" ##Setting OFS as 2 TABs as output field separator.
s1="------------------------------------------------------------------------------------------------" ##Setting s1 as dashes.
print "CODE-1T COD-Area CODE-1 CODE-S CODE-T" ORS s1 ##printing headers before output prints.
}
FNR==NR{ ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
code1[$2,$NF]+=$4 ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
codes[$2,$NF]+=$5 ##Creating array codes index is $2,$NF value is $5 and adding to itself.
codet[$2,$NF]+=$6 ##Creating array codet index is $2,$NF value is $6 and adding to itself.
next ##next will skip all further statements from here.
}
prev!=$2 && prev{ ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
sum_col=val1+val2+val3 ##creating sum_col whose value is val1+val2+val3.
SUM+=sum_col ##creating SUM whose value is sum_col and adding to itself too.
sum_val1+=val1 ##Creating variable sum_val1 whose value is val1 and adding to itself.
sum_val2+=val2 ##Creating variable sum_val2 whose value is val2 and adding to itself.
sum_val3+=val3 ##Creating variable sum_val3 whose value is val3 and adding to itself.
print "\t\t\t"val1,val2,val3,sum_col ORS s1 ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
val1=val2=val3="" ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]{ ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF] ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
val1+=code1[$2,$NF] ##Creating variable val1 who is array code1 value and adding to itself.
val2+=codes[$2,$NF] ##Creating variable val2 who is array codes value and adding to itself.
val3+=codet[$2,$NF] ##Creating variable val3 who is array codet value and adding to itself.
delete code1[$2,$NF] ##Deleting array code1 whose index is $2,$NF here.
}
{
prev=$2 ##Setting prev value to $2.
}
END{ ##Starting END block of awk here now.
if(val1){ ##Checking condition if variable val1 is NOT NULL then do following.
sum_col=val1+val2+val3 ##Creating sum_col whose value is addition of val1+val2+val3.
sum_val1+=val1 ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
sum_val2+=val2 ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
sum_val3+=val3 ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
print "\t\t\t"val1,val2,val3,sum_col ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
}
print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1 ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file Input_file ##mentioning Input_file 2 times here.
你能再解释一下你的产出吗?例如,由于不清楚,您是如何添加columnsrule的,因为您的最后两个输出列似乎很混乱。还可以按什么顺序添加它们?假设第二列的数字是输入文件总和的关键,那个么若它们的计数是奇数,那个么我们怎么加呢?请详细说明您的问题?13,是的,求和的关键是第2列和第7列,然后求和第4、5、6列,每列2、7。目的是对第1列中每一组的小计求和。然后将总数写在最后一列。这是每个数组第4、5、6列中所有值的总和。我不懂奇数的那部分?。。TKS你能再解释一下你的输出吗?例如,由于不清楚,您是如何添加columnsrule的,因为您的最后两个输出列似乎很混乱。还可以按什么顺序添加它们?假设第二列的数字是输入文件总和的关键,那个么若它们的计数是奇数,那个么我们怎么加呢?请详细说明您的问题?13,是的,求和的关键是第2列和第7列,然后求和第4、5、6列,每列2、7。目的是对第1列中每一组的小计求和。然后将总数写在最后一列。这是每个数组第4、5、6列中所有值的总和。我不懂奇数的那部分?。。tksHi,karakfa,许多TK我将尝试调整所有字段的代码,并尝试进行格式化。。。你能把总数加起来吗。所有小计的总和..嗨,karakfa,很多TK我将尝试调整所有字段的代码并尝试进行格式化。。。你能把总数加起来吗。所有小计的总和..RavinderSingh13,代码运行良好。很好的解释。。但我注意到了一些事情。如果第4列和第5列中的值为0,则代码不起作用。请尝试将第4列和第5列中的值重新编译为0。许多的thanks@OXXO,请尝试我的编辑解决方案,然后让我知道?RavinderSingh13,代码现在适用于第4列和第5列的零值,但仍然需要解决一些小问题,在最后一个块333000007中,代码没有写入子总计。如果可以做到这一点,那么代码将完美工作。RavinderSingh13,我复制了第二部分中的行print Total\t\t\tval1,val2,val3,sum_col s1,它可以工作RavinderSingh13,请注意,您分享了您的知识,tks,代码是惊人的。RavinderSingh13,代码工作良好。很好的解释。。但我注意到了一些事情。如果第4列和第5列中的值为0,则代码不起作用。请尝试将第4列和第5列中的值重新编译为0。许多的thanks@OXXO,请尝试我的编辑解决方案,然后让我知道?RavinderSingh13,代码现在适用于第4列和第5列的零值,但仍然需要解决一些小问题,在最后一个块333000007中,代码没有写入子总计。如果可以做到这一点,那么代码将完美工作..RavinderSingh13,我复制了第二部分中的行print Total\t\t\tval1,val2,val3,sum_col s1,它工作了RavinderSingh13,请注意,您分享了您的知识,tks,代码是惊人的。