如何使用linux和awks将一个字段文件拆分为多个字段文件？_Awk

如何使用linux和awks将一个字段文件拆分为多个字段文件？

awk

如何使用linux和awks将一个字段文件拆分为多个字段文件？,awk,Awk,我有以下文件list.txt： AbateI. D AcatulloM. A AcerbiF. D AcquafrescaR. A AcquahA. C AdjapongC. D AdnanA. D AdrianoL. A AjetiA. D AlbiolR. D AldeganiG. P AleesamiH. D AlexSandro D AlissonR. P 我想用awk重新排

我有以下文件

list.txt

：

AbateI.       D
AcatulloM.    A
AcerbiF.      D
AcquafrescaR. A
AcquahA.      C
AdjapongC.    D
AdnanA.       D
AdrianoL.     A
AjetiA.       D
AlbiolR.      D
AldeganiG.    P
AleesamiH.    D
AlexSandro    D
AlissonR.     P

我想用

awk

重新排列文件，按第二列将它们分组，如下所示：

P                    D              C                 A
AldeganiG.         AbateI.         AcquahA.         AcatulloM. 
AlissonR.          AcerbiF.                         AcquafrescaR.
                   AdjapongC.                       AdrianoL. 
                   AdnanA. 
                   AjetiA. 
                   AlbiolR. 
                   AleesamiH.
                   AlexSandro

这就是我所尝试的：

#!/usr/bin/awk -f

BEGIN {
FORMAT="\t%-20s%-20s%-20s%s\n"
printf FORMAT,"P","D","C","A"
}

($2=="P")  {a[$1] = $1}
($2=="D")  {b[$1] = $1}
($2=="C")  {c[$1] = $1}
($2=="A")  {d[$1] = $1}

END{for(i in a) printf FORMAT, a[i],"","",""}

但我不知道如何循环和打印其他阵列。

$cat tst.awk
$ cat tst.awk
BEGIN { OFS="\t" }
{
    rowNr = ++numColRows[$2]
    val[rowNr,$2] = $1
    numRows = (rowNr > numRows ? rowNr : numRows)
}
END {
    for (colName in numColRows) {
        printf "%s%s", (c++ ? OFS : ""), colName
    }
    print ""
    for (rowNr=1; rowNr<=numRows; rowNr++) {
        c = 0
        for (colName in numColRows) {
            printf "%s%s", (c++ ? OFS : ""), val[rowNr,colName]
        }
        print ""
    }
}

$ awk -f tst.awk file | column -s$'\t' -t
A              P           C         D
AcatulloM.     AldeganiG.  AcquahA.  AbateI.
AcquafrescaR.  AlissonR.             AcerbiF.
AdrianoL.                            AdjapongC.
                                     AdnanA.
                                     AjetiA.
                                     AlbiolR.
                                     AleesamiH.
                                     AlexSandro

开始{OFS=“\t”}
{
行数=++numColRows[$2]
val[rowNr，$2]=1美元
numRows=（行编号>行编号？行编号：numRows）
}
结束{
for（numColRows中的colName）{
printf“%s%s”，（c++？OFS:），colName
}
打印“”
对于（rowNr=1；rowNr，您可以使用paste
和column
进行一些流程替换：
$ paste \
      <(awk '/P$/ {print $1}'<input) \
      <(awk '/D$/ {print $1}'<input) \
      <(awk '/C$/ {print $1}'<input) \
      <(awk '/A$/ {print $1}'<input) | column -s $'\t' -t
AldeganiG.  AbateI.     AcquahA.  AcatulloM.
AlissonR.   AcerbiF.              AcquafrescaR.
            AdjapongC.            AdrianoL.
            AdnanA.
            AjetiA.
            AlbiolR.
            AleesamiH.
            AlexSandro

$paste\
这是一种非传统的方法
$ awk -v OFS='\n' '{a[$2]=a[$2] OFS $1; 
                    c[$2]++; 
                    if(c[$2]>max) max=c[$2]} 
                END{pr="pr -"length(c)"t"; 
                    for(k in a) 
                       {print k a[k] | pr; 
                        for(i=c[k];i<max;i++) 
                           {print ""  | pr}}}'

A                 P                 C                 D
AcatulloM.        AldeganiG.        AcquahA.          AbateI.
AcquafrescaR.     AlissonR.                           AcerbiF.
AdrianoL.                                             AdjapongC.
                                                      AdnanA.
                                                      AjetiA.
                                                      AlbiolR.
                                                      AleesamiH.
                                                      AlexSandro

$awk-vofs='\n'{a[$2]=a[$2]OFS$1；
c[$2]++；
如果（c[$2]>max）max=c[$2]}
结束{pr=“pr-”长度（c）“t”；
对于（a中的k）
{打印ka[k]| pr；
对于（i=c[k]；i您也可以使用grep剪切粘贴展开
组合
paste \
   <(echo "P";grep 'P$' list.txt |cut -d ' ' -f1 ) \
   <(echo "D";grep 'D$' list.txt |cut -d ' ' -f1 ) \
   <(echo "C";grep 'C$' list.txt |cut -d ' ' -f1 ) \
   <(echo "A";grep 'A$' list.txt |cut -d ' ' -f1) | expand -t 20

您可以将grep cut
替换为sed
，如下所示
paste \
    <(echo "P";sed -n '/P$/{s/[[:blank:]]*P$//;p}' file ) \
    <(echo "D";sed -n '/D$/{s/[[:blank:]]*D$//;p}' file ) \
    <(echo "C";sed -n '/C$/{s/[[:blank:]]*C$//;p}' file ) \
    <(echo "A";sed -n '/A$/{s/[[:blank:]]*A$//;p}' file ) | expand -t 20

你也可以这样做
paste \
     <(awk 'BEGIN{print "P"}/P$/{print $1}' file )
     <(awk 'BEGIN{print "D"}/D$/{print $1}' file )
     <(awk 'BEGIN{print "C"}/C$/{print $1}' file )
     <(awk 'BEGIN{print "A"}/A$/{print $1}' file ) | expand -t 20

在GNU awk中：
$ cat > list.awk
{
    n=(n<++b[$2]?b[$2]:n)                # n is the max count of words in one group
    a[$2][b[$2]]=$1                      # put words to two dimensional array
} 
END {
    for(i=1;i<=n;i++) {                  # from 1 to n
        for(j in a)                      # for all groups
            printf "%14-s%s",a[j][i],OFS # print a word
        printf "%s",ORS                  # ORS in the end
    }
}
$ -f list.awk list.txt
AcatulloM.     AldeganiG.     AcquahA.       AbateI.        
AcquafrescaR.  AlissonR.                     AcerbiF.       
AdrianoL.                                    AdjapongC.     
                                             AdnanA.        
                                             AjetiA.        
                                             AlbiolR.       
                                             AleesamiH.     
                                             AlexSandro     

$cat>list.awk
{
n=（n使用awk 4.0 2D阵列的解决方案-允许以任何顺序输出任意数量的组
# output order of groups
order=$*
awk -vorderstr="$order" '
BEGIN { split(orderstr, order) }
{
# grpnames[group][index]=name
  grpnames[$2][grpi[$2]++]=$1
# track max group size
  if(grpi[$2] > maxgrpsz)
    maxgrpsz=grpi[$2]
}
END {
# print groups header in order
printf("%-20s", order[1])
for(j=2; j <= length(order); ++j) {
  printf("\t%-20s", order[j])
}
printf("\n")
for(i=0; i < maxgrpsz; ++i) {
# run across each group in output order
  printf("%-20s", grpnames[order[1]][i])
  for(j=2; j <= length(order); ++j) {
    grp=order[j]
    printf("\t%-20s", grpnames[grp][i])
  }
  printf("\n")
}
}
'

#组的输出顺序
命令=$*
awk-vorderstr=“$order””
开始{split（orderstr，order）}
{
#GRP名称[组][索引]=名称
GRP名称[$2][grpi[$2]+]=$1
#跟踪最大组大小
如果（grpi[$2]>maxgrpsz）
maxgrpsz=grpi[$2]
}
结束{
#按顺序打印组标题
printf（“%-20s”，订单[1]）
对于（j=2；j创建4个数组，并将$1
添加到与$2对应的数组中。然后在最后，从行上的每个数组中打印一个元素，并保持循环，直到最长数组中的条目用完。我们不会为您编写它，这不是免费的编码服务。因此，请使用该提示，尝试实现它，然后返回b。）如果你不能让它工作，我会写它。@Barmar如果我觉得这个问题有趣，我会写它，就像我在这个例子中所做的那样。
P                   D                   C                   A
AldeganiG.          AbateI.             AcquahA.            AcatulloM.
AlissonR.           AcerbiF.                                AcquafrescaR.
                    AdjapongC.                              AdrianoL.
                    AdnanA.                                 
                    AjetiA.                                 
                    AlbiolR.                                
                    AleesamiH.                              
                    AlexSandro                              

$ cat > list.awk
{
    n=(n<++b[$2]?b[$2]:n)                # n is the max count of words in one group
    a[$2][b[$2]]=$1                      # put words to two dimensional array
} 
END {
    for(i=1;i<=n;i++) {                  # from 1 to n
        for(j in a)                      # for all groups
            printf "%14-s%s",a[j][i],OFS # print a word
        printf "%s",ORS                  # ORS in the end
    }
}
$ -f list.awk list.txt
AcatulloM.     AldeganiG.     AcquahA.       AbateI.        
AcquafrescaR.  AlissonR.                     AcerbiF.       
AdrianoL.                                    AdjapongC.     
                                             AdnanA.        
                                             AjetiA.        
                                             AlbiolR.       
                                             AleesamiH.     
                                             AlexSandro     

# output order of groups
order=$*
awk -vorderstr="$order" '
BEGIN { split(orderstr, order) }
{
# grpnames[group][index]=name
  grpnames[$2][grpi[$2]++]=$1
# track max group size
  if(grpi[$2] > maxgrpsz)
    maxgrpsz=grpi[$2]
}
END {
# print groups header in order
printf("%-20s", order[1])
for(j=2; j <= length(order); ++j) {
  printf("\t%-20s", order[j])
}
printf("\n")
for(i=0; i < maxgrpsz; ++i) {
# run across each group in output order
  printf("%-20s", grpnames[order[1]][i])
  for(j=2; j <= length(order); ++j) {
    grp=order[j]
    printf("\t%-20s", grpnames[grp][i])
  }
  printf("\n")
}
}
'

./myscr.sh P D C A <in.txt
P                       D                       C                       A
AldeganiG.              AbateI.                 AcquahA.                AcatulloM.
AlissonR.               AcerbiF.                                        AcquafrescaR.
                        AdjapongC.                                      AdrianoL.
                        AdnanA.
                        AjetiA.
                        AlbiolR.
                        AleesamiH.
                        AlexSandro
./myscr.sh D A P C <in.txt
D                       A                       P                       C
AbateI.                 AcatulloM.              AldeganiG.              AcquahA.
AcerbiF.                AcquafrescaR.           AlissonR.
AdjapongC.              AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro

./myscr.sh A P <in.txt
A                       P
AcatulloM.              AldeganiG.
AcquafrescaR.           AlissonR.
AdrianoL.