如何在awk中将数组转换为字符串?

如何在awk中将数组转换为字符串?,awk,gawk,Awk,Gawk,这个问题的目的是为常见问题提供一个健壮、灵活的解决方案 处理文本时的一种常见情况是需要将输入拆分为字段,操作字段,然后重新组合以进行打印。例如,给定此输入: $ cat file A 7 C 3 如果我们想确保每个单个数字都是.2f格式的,并且我们想保留字段前/后/之间的间距,那么我们可能会写入(使用GNU awk作为第四个要拆分的参数()): 最后一个循环是将数组展平为字符串进行打印,这对于许多awk脚本来说是常见的。有时分隔符存储在如上所述的不同数组中,有时

这个问题的目的是为常见问题提供一个健壮、灵活的解决方案

处理文本时的一种常见情况是需要将输入拆分为字段,操作字段,然后重新组合以进行打印。例如,给定此输入:

$ cat file
    A      7  C       3
如果我们想确保每个单个数字都是.2f格式的,并且我们想保留字段前/后/之间的间距,那么我们可能会写入(使用GNU awk作为第四个要拆分的参数()):

最后一个循环是将数组展平为字符串进行打印,这对于许多awk脚本来说是常见的。有时分隔符存储在如上所述的不同数组中,有时是特定字符,有时不需要分隔符。此外,我们希望flds[]打印的顺序可以基于如上所述的数字升序索引,也可以是降序索引(例如,模拟UNIX工具“rev”),也可以基于flds[]值而不是它们的索引


那么-是否有一个awk实用程序函数可以使用提供的分隔符按指定顺序将数组转换为字符串?

我们一直在与GNU awk开发人员讨论,以提供问题中所述的函数,但在该函数到达之前,下面的用户空间会根据具体情况而改变(对于
排序的
)函数将完成这项工作。它注意不要将在调用之前不存在的元素添加到FLD、seps或PROCINFO数组中。它可以按如下方式使用:

$ cat tst.awk
{
    split($0,flds,FS,seps)
    for (i in flds) {
        if (flds[i] ~ /[0-9]/) {
            flds[i] = sprintf("%.2f",flds[i])
        }
    }

    print "arr2str() usage examples:"
    print "1)", arr2str(flds,OFS)
    print "2)", arr2str(flds,seps)
    print "3)", arr2str(flds,seps,"@ind_num_desc")
    print "4)", arr2str(flds,seps,"@val_str_asc")
    print "5)", arr2str(flds,",")
}

$ awk -f arr2str.awk -f tst.awk file
arr2str() usage examples:
1) A 7.00 C 3.00
2)     A      7.00  C       3.00
3) 3.00       C  7.00      A
4)     3.007.00  A      C
5) A,7.00,C,3.00

$cat arr2str.awk
#用法:
#arr2str(flds[,seps,[分拣员])
#
#flds:
#此函数用于将必需的“flds”数组参数转换为字符串。
#
#seps:
#如果“seps”不存在,则“flds”值将简单地连接起来
#在返回的字符串中。
#
#如果“seps”存在并且是字符串,则将插入该“seps”值
#在返回字符串中的每个“flds”值之间。
#
#如果“seps”存在并且是一个数组,则每个“seps”值都具有相同的索引
#作为“flds”,索引将插入到返回的字符串之前或之后
#(取决于排序顺序)具有相同索引的对应“flds”值。
#-所有在“flds”中没有索引的“seps”值将插入
#在所有“FLD”和其他“seps”值之前或之后返回的字符串。
#这确保了一个“seps”数组,例如,其结果是从零开始
#上一次拆分(str、flds、re、seps)的第0个条目将包括在内。
#
#排序器:
#如果存在“sortOrder”,则将其用作“flds”值的顺序
#在中访问,否则将使用PROCINFO[“sorted_in”]如果已设置,否则
#使用升序数字索引。
#-如果排序顺序为降序(以“desc”结尾)且“seps”为数组,则
#在每个“flds”值之前插入“seps”值,否则在它们之后插入。
#
#例如:
#$cat tst.awk
#开始{
#orig=“,a+b:c-d=”
#拆分(原始,FLD,/[^[:alpha:][]/,seps)
#
#printf“orig:\n”,orig
#打印“asc:\n”,arr2str(flds,seps)
#printf“desc:\n”,arr2str(flds,seps,“@ind\u num\u desc”)
#    }
#$awk-f arr2str.awk-f tst.awk
#原文:
#asc:
#描述:
功能arr2str(flds、seps、分拣机、分拣机、分拣机当前、分拣机当前值、currIdx、prevIdx、idxCnt、EXTR){
如果(PROCINFO中的“已排序”){
当前值=1
sortedInValue=PROCINFO[“已排序”]
}
如果(排序器==“”){
排序器=(SortedIn当前?SortedIn值:“@ind_num_asc”)
}
PROCINFO[“已排序的”]=排序器
如果(isarray(seps)){
#一组分离器。
if(sortOrder~/desc$/){
用于(FLD中的currIdx){
outStr=outStr(seps中的currIdx?seps[currIdx]:“”)flds[currIdx]
}
}
用于(seps中的currIdx){
如果(!(FLD中的currIdx)){
outStr=outStr seps[currIdx]
}
}
if(排序器!~/desc$/){
用于(FLD中的currIdx){
outStr=outStr flds[currIdx](seps中的currIdx?seps[currIdx]:“”)
}
}
}
否则{
#固定标量分隔符。
#如果我们能够区分未设置变量arg和缺少的arg,我们将使用此选项:
#seps=(魔法参数存在测试==true?seps:OFS)
#但我们不能只使用传入的任何值。
用于(FLD中的currIdx){
outStr=outStr(idxCnt++?seps:)flds[currIdx]
}
}
如果(存在){
PROCINFO[“排序的_in”]=排序值
}
否则{
删除PROCINFO[“已排序”]
}
返出
}
$ cat tst.awk
{
    split($0,flds,FS,seps)
    for (i in flds) {
        if (flds[i] ~ /[0-9]/) {
            flds[i] = sprintf("%.2f",flds[i])
        }
    }

    print "arr2str() usage examples:"
    print "1)", arr2str(flds,OFS)
    print "2)", arr2str(flds,seps)
    print "3)", arr2str(flds,seps,"@ind_num_desc")
    print "4)", arr2str(flds,seps,"@val_str_asc")
    print "5)", arr2str(flds,",")
}

$ awk -f arr2str.awk -f tst.awk file
arr2str() usage examples:
1) A 7.00 C 3.00
2)     A      7.00  C       3.00
3) 3.00       C  7.00      A
4)     3.007.00  A      C
5) A,7.00,C,3.00
$ cat arr2str.awk
# Usage:
#    arr2str(flds[,seps,[sortOrder]])
#
# flds:
#    This function converts the mandatory "flds" array argument into a string.
#
# seps:
#    If "seps" is not present then the "flds" values will simply be concatenated
#    in the returned string.
#
#    If "seps" is present and is a string then that "seps" value will be inserted
#    between each "flds" value in the returned string.
#
#    If "seps" is present and is an array then each "seps" value with the same index
#    as a "flds" index will be inserted in the returned string before or after
#    (sort order dependent) the corresponding "flds" value with that same index.
#    - All "seps" values that do not have an index in "flds" will be inserted in
#      the returned string before or after all of the "flds" and other "seps" values.
#      This ensures that a "seps" array that, for example, starts at zero as a result
#      of a previous split(str,flds,re,seps) will have its zeroth entry included.
#
# sortOrder:
#    If "sortOrder" is present then it will be used as the order the "flds" values
#    are visited in, otherwise it uses PROCINFO["sorted_in"] if set, otherwise it
#    uses ascending numeric indices.
#    - If the sort order is descending (ends in "desc") and "seps" is an array then
#      the "seps" values are inserted before each "flds" value, otherwise after them.
#
# Example:
#    $ cat tst.awk
#    BEGIN {
#        orig = ",a+b:c-d="
#        split(orig,flds,/[^[:alpha:]]/,seps)
#
#        printf "orig: <%s>\n", orig
#        printf "asc:  <%s>\n", arr2str(flds,seps)
#        printf "desc: <%s>\n", arr2str(flds,seps,"@ind_num_desc")
#    }
#    $ awk -f arr2str.awk -f tst.awk
#    orig: <,a+b:c-d=>
#    asc:  <,a+b:c-d=>
#    desc: <=d-c:b+a,>

function arr2str(flds, seps, sortOrder,      sortedInPresent, sortedInValue, currIdx, prevIdx, idxCnt, outStr) {

    if ( "sorted_in" in PROCINFO ) {
        sortedInPresent = 1
        sortedInValue = PROCINFO["sorted_in"]
    }

    if ( sortOrder == "" ) {
        sortOrder = (sortedInPresent ? sortedInValue : "@ind_num_asc")
    }
    PROCINFO["sorted_in"] = sortOrder

    if ( isarray(seps) ) {
        # An array of separators.
        if ( sortOrder ~ /desc$/ ) {
            for (currIdx in flds) {
                outStr = outStr (currIdx in seps ? seps[currIdx] : "") flds[currIdx]
            }
        }

        for (currIdx in seps) {
            if ( !(currIdx in flds) ) {
                outStr = outStr seps[currIdx]
            }
        }

        if ( sortOrder !~ /desc$/ ) {
            for (currIdx in flds) {
                outStr = outStr flds[currIdx] (currIdx in seps ? seps[currIdx] : "")
            }
        }
    }
    else {
        # Fixed scalar separator.
        # We would use this if we could distinguish an unset variable arg from a missing arg:
        #    seps = (magic_argument_present_test == true ? seps : OFS)
        # but we cant so just use whatever value was passed in.
        for (currIdx in flds) {
            outStr = outStr (idxCnt++ ? seps : "") flds[currIdx]
        }
    }

    if ( sortedInPresent ) {
        PROCINFO["sorted_in"] = sortedInValue
    }
    else {
        delete PROCINFO["sorted_in"]
    }

    return outStr
}