如何使用linux和awks将一个字段文件拆分为多个字段文件?
我有以下文件如何使用linux和awks将一个字段文件拆分为多个字段文件?,awk,Awk,我有以下文件list.txt: AbateI. D AcatulloM. A AcerbiF. D AcquafrescaR. A AcquahA. C AdjapongC. D AdnanA. D AdrianoL. A AjetiA. D AlbiolR. D AldeganiG. P AleesamiH. D AlexSandro D AlissonR. P 我想用awk重新排
list.txt
:
AbateI. D
AcatulloM. A
AcerbiF. D
AcquafrescaR. A
AcquahA. C
AdjapongC. D
AdnanA. D
AdrianoL. A
AjetiA. D
AlbiolR. D
AldeganiG. P
AleesamiH. D
AlexSandro D
AlissonR. P
我想用awk
重新排列文件,按第二列将它们分组,如下所示:
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
这就是我所尝试的:
#!/usr/bin/awk -f
BEGIN {
FORMAT="\t%-20s%-20s%-20s%s\n"
printf FORMAT,"P","D","C","A"
}
($2=="P") {a[$1] = $1}
($2=="D") {b[$1] = $1}
($2=="C") {c[$1] = $1}
($2=="A") {d[$1] = $1}
END{for(i in a) printf FORMAT, a[i],"","",""}
但我不知道如何循环和打印其他阵列。$cat tst.awk
$ cat tst.awk
BEGIN { OFS="\t" }
{
rowNr = ++numColRows[$2]
val[rowNr,$2] = $1
numRows = (rowNr > numRows ? rowNr : numRows)
}
END {
for (colName in numColRows) {
printf "%s%s", (c++ ? OFS : ""), colName
}
print ""
for (rowNr=1; rowNr<=numRows; rowNr++) {
c = 0
for (colName in numColRows) {
printf "%s%s", (c++ ? OFS : ""), val[rowNr,colName]
}
print ""
}
}
$ awk -f tst.awk file | column -s$'\t' -t
A P C D
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
开始{OFS=“\t”}
{
行数=++numColRows[$2]
val[rowNr,$2]=1美元
numRows=(行编号>行编号?行编号:numRows)
}
结束{
for(numColRows中的colName){
printf“%s%s”,(c++?OFS:),colName
}
打印“”
对于(rowNr=1;rowNr,您可以使用paste
和column
进行一些流程替换:
$ paste \
<(awk '/P$/ {print $1}'<input) \
<(awk '/D$/ {print $1}'<input) \
<(awk '/C$/ {print $1}'<input) \
<(awk '/A$/ {print $1}'<input) | column -s $'\t' -t
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
$paste\
这是一种非传统的方法
$ awk -v OFS='\n' '{a[$2]=a[$2] OFS $1;
c[$2]++;
if(c[$2]>max) max=c[$2]}
END{pr="pr -"length(c)"t";
for(k in a)
{print k a[k] | pr;
for(i=c[k];i<max;i++)
{print "" | pr}}}'
A P C D
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
$awk-vofs='\n'{a[$2]=a[$2]OFS$1;
c[$2]++;
如果(c[$2]>max)max=c[$2]}
结束{pr=“pr-”长度(c)“t”;
对于(a中的k)
{打印ka[k]| pr;
对于(i=c[k];i您也可以使用grep剪切粘贴展开
组合
paste \
<(echo "P";grep 'P$' list.txt |cut -d ' ' -f1 ) \
<(echo "D";grep 'D$' list.txt |cut -d ' ' -f1 ) \
<(echo "C";grep 'C$' list.txt |cut -d ' ' -f1 ) \
<(echo "A";grep 'A$' list.txt |cut -d ' ' -f1) | expand -t 20
您可以将grep cut
替换为sed
,如下所示
paste \
<(echo "P";sed -n '/P$/{s/[[:blank:]]*P$//;p}' file ) \
<(echo "D";sed -n '/D$/{s/[[:blank:]]*D$//;p}' file ) \
<(echo "C";sed -n '/C$/{s/[[:blank:]]*C$//;p}' file ) \
<(echo "A";sed -n '/A$/{s/[[:blank:]]*A$//;p}' file ) | expand -t 20
你也可以这样做
paste \
<(awk 'BEGIN{print "P"}/P$/{print $1}' file )
<(awk 'BEGIN{print "D"}/D$/{print $1}' file )
<(awk 'BEGIN{print "C"}/C$/{print $1}' file )
<(awk 'BEGIN{print "A"}/A$/{print $1}' file ) | expand -t 20
在GNU awk中:
$ cat > list.awk
{
n=(n<++b[$2]?b[$2]:n) # n is the max count of words in one group
a[$2][b[$2]]=$1 # put words to two dimensional array
}
END {
for(i=1;i<=n;i++) { # from 1 to n
for(j in a) # for all groups
printf "%14-s%s",a[j][i],OFS # print a word
printf "%s",ORS # ORS in the end
}
}
$ -f list.awk list.txt
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
$cat>list.awk
{
n=(n使用awk 4.0 2D阵列的解决方案-允许以任何顺序输出任意数量的组
# output order of groups
order=$*
awk -vorderstr="$order" '
BEGIN { split(orderstr, order) }
{
# grpnames[group][index]=name
grpnames[$2][grpi[$2]++]=$1
# track max group size
if(grpi[$2] > maxgrpsz)
maxgrpsz=grpi[$2]
}
END {
# print groups header in order
printf("%-20s", order[1])
for(j=2; j <= length(order); ++j) {
printf("\t%-20s", order[j])
}
printf("\n")
for(i=0; i < maxgrpsz; ++i) {
# run across each group in output order
printf("%-20s", grpnames[order[1]][i])
for(j=2; j <= length(order); ++j) {
grp=order[j]
printf("\t%-20s", grpnames[grp][i])
}
printf("\n")
}
}
'
#组的输出顺序
命令=$*
awk-vorderstr=“$order””
开始{split(orderstr,order)}
{
#GRP名称[组][索引]=名称
GRP名称[$2][grpi[$2]+]=$1
#跟踪最大组大小
如果(grpi[$2]>maxgrpsz)
maxgrpsz=grpi[$2]
}
结束{
#按顺序打印组标题
printf(“%-20s”,订单[1])
对于(j=2;j创建4个数组,并将$1
添加到与$2
对应的数组中。然后在最后,从行上的每个数组中打印一个元素,并保持循环,直到最长数组中的条目用完。我们不会为您编写它,这不是免费的编码服务。因此,请使用该提示,尝试实现它,然后返回b。)如果你不能让它工作,我会写它。@Barmar如果我觉得这个问题有趣,我会写它,就像我在这个例子中所做的那样。
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
$ cat > list.awk
{
n=(n<++b[$2]?b[$2]:n) # n is the max count of words in one group
a[$2][b[$2]]=$1 # put words to two dimensional array
}
END {
for(i=1;i<=n;i++) { # from 1 to n
for(j in a) # for all groups
printf "%14-s%s",a[j][i],OFS # print a word
printf "%s",ORS # ORS in the end
}
}
$ -f list.awk list.txt
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
# output order of groups
order=$*
awk -vorderstr="$order" '
BEGIN { split(orderstr, order) }
{
# grpnames[group][index]=name
grpnames[$2][grpi[$2]++]=$1
# track max group size
if(grpi[$2] > maxgrpsz)
maxgrpsz=grpi[$2]
}
END {
# print groups header in order
printf("%-20s", order[1])
for(j=2; j <= length(order); ++j) {
printf("\t%-20s", order[j])
}
printf("\n")
for(i=0; i < maxgrpsz; ++i) {
# run across each group in output order
printf("%-20s", grpnames[order[1]][i])
for(j=2; j <= length(order); ++j) {
grp=order[j]
printf("\t%-20s", grpnames[grp][i])
}
printf("\n")
}
}
'
./myscr.sh P D C A <in.txt
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
./myscr.sh D A P C <in.txt
D A P C
AbateI. AcatulloM. AldeganiG. AcquahA.
AcerbiF. AcquafrescaR. AlissonR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
./myscr.sh A P <in.txt
A P
AcatulloM. AldeganiG.
AcquafrescaR. AlissonR.
AdrianoL.