Shell 需要了解AWK脚本逻辑方面的帮助吗

Shell 需要了解AWK脚本逻辑方面的帮助吗,shell,awk,Shell,Awk,我需要帮助理解下面的代码逻辑写在AWK的一个同事在过去。 任何具有良好AWK知识的人都可以帮助我理解此代码 谢谢, 桑德普 下面是gawk-o打印出来的代码,以便在没有gawk的情况下使其可读,这样您就有希望自己找到它: { if (FILENAME == "Parameter.txt") { a[$1] = NR aa[NR] = $1 d = NR if (NR == 1) { e = $1

我需要帮助理解下面的代码逻辑写在AWK的一个同事在过去。 任何具有良好AWK知识的人都可以帮助我理解此代码

谢谢, 桑德普


下面是gawk-o打印出来的代码,以便在没有gawk的情况下使其可读,这样您就有希望自己找到它:

{
    if (FILENAME == "Parameter.txt") {
        a[$1] = NR
        aa[NR] = $1
        d = NR
        if (NR == 1) {
            e = $1
        } else {
            e = e "," $1
        }
    } else if (FILENAME == "Traffic.csv") {
        h[FNR "," $2] = $3
        x[FNR "," $4] = $5
        k[FNR "," $2 "," $3 "," $4 "," $5] = $1
        y = FNR
    } else if (FILENAME == "Filter.txt") {
        for (i = 1; i <= NF; i = i + 3) {
            if ($(i + 1) != "ne") {
                FE[FNR "," $i] = $(i + 2)
            } else {
                FNE[FNR "," $i] = $(i + 2)
            }
        }
        FILTER = FNR
        FC[FNR] = (NF / 3)
    } else {
        if (g == "") {
            print e ",TRAFFIC_CASE"
            g = 1
        }
        for (i = 1; i <= d; i++) {
            c[i] = ""
        }
        l = ""
        m = ""
        z = ""
        FINAL = ""
        for (j = 1; j <= FILTER; j++) {
            FI[j] = ""
        }
        for (i = 1; i <= NF; i++) {
            split($i, b, "=")
            if (a[b[1]] != "") {
                c[a[b[1]]] = b[2]
            }
            for (j = 1; j <= y; j++) {
                if (h[j "," a[b[1]]] == b[2] && h[j "," a[b[1]]] != "") {
                    l = a[b[1]] "," b[2]
                }
                if (x[j "," a[b[1]]] == b[2] && x[j "," a[b[1]]] != "") {
                    z = a[b[1]] "," b[2]
                }
                if (k[j "," l "," z] != "") {
                    m = k[j "," l "," z]
                }
            }
        }
        if (substr(FILENAME, 20, 3) == "SVC") {
            Q[c[a[MSISDN]]] = 1
        } else if (Q[c[a[MSISDN]]] != 1) {
            for (i = 1; i <= d; i++) {
                if (i == 1) {
                    f = c[1]
                } else {
                    f = f "," c[i]
                }
                if (c[i] == "") {
                    c[i] = "B"
                }
                for (j = 1; j <= FILTER; j++) {
                    if (FE[j "," i] != "") {
                        if (FE[j "," i] == c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
                            FI[j] = FI[j] + 1
                        } else {
                            FI[j] = FI[j] - 1
                        }
                    }
                    if (FNE[j "," i] != "") {
                        if (FNE[j "," i] != c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
                            FI[j] = FI[j] + 1
                        } else {
                            FI[j] = FI[j] - 1
                        }
                    }
                }
            }
        }
        for (j = 1; j <= FILTER; j++) {
            if (FI[j] == FC[j]) {
                FINAL = 1
            }
        }
        if (FINAL != 1) {
            print f "," m
        }
    }
}

现在它的格式已经清晰明了了。在尝试理解脚本时,第2步是将所有变量重命名为它们真正表示的任何变量,例如a[$1]=NR->param2liner[$1]=NR和aa[NR]=1->lineNr2param[NR]=1或类似变量,具体取决于Parameter.txt文件中的$1。您还应该引入一些中间的命名良好的变量,这样您就不会试图弄清楚Q[c[a[MSISDN]]]之类的结构实际上意味着什么,因为MSISDN是一个未初始化的变量,所以下次您阅读的代码看起来非常像一个bug,顺便说一句,作者的意思可能是Q[c[a[MSISDN]],但肯定是idk。一旦您成功地对所有变量执行了该操作,脚本作为一个整体的意义就应该很清楚了。玩得开心

我们在文件中有参数、流量案例和过滤器。。。这是一个足够大的脚本,它也希望在一个独立的文件中

我的过程:

我重新格式化并将结果放入vim中 我在第一组括号外通过模式匹配消除了一个嵌套。 我看到了,在数组中。。。在进一步重构之后,可以使用Subset来实现这一点,可能会消除Subset定义。。。这无关紧要 在这些地方,我看到了一个if-foo[bar]==的检查,我将其替换为更直观的if-bar-in-foo 我开始用s/\/something\ux/g来替换它,因为可以使用\。。。 我用split,arr替换了消隐数组 我看到b[1]是一个键,b[2]是一个值。。。最好创建中间变量,使流量匹配部分更具可读性。 检查每个变量开始显示未使用的变量,如aa。 我把foo=foo+1改成了foo++ 我在if语句中适当地反转了逻辑条件,并去掉了多余的{}。 我围绕过滤位进行了一些重构,以减少数组查找的数量。 以下是我的猜测:

#! /usr/bin/awk -f

BEGIN {
    FS = "|"
    fileno = 0
    SUBSEP = ","
}

FILENAME == "Parameter.txt" {
    param[$1] = NR
    param_sz = NR
    if (FNR == 1)
        param_list = $1
    else
        param_list = param_list "," $1
    next
}

FILENAME == "Traffic.csv" {
    traffic_h[FNR, $2] = $3
    traffic_x[FNR, $4] = $5
    traffic_key[FNR, $2, $3, $4, $5] = $1
    traffic_sz = FNR
    next
}

FILENAME == "Filter.txt" {
    for (i = 1; i <= NF; i += 3) {
        if ($(i + 1) == "ne")
            filter_ne[FNR, $i] = $(i + 2)
        else
            filter_eq[FNR, $i] = $(i + 2)
    }
    filter_sz = FNR
    filters[FNR] = (NF / 3)
    next
}

fileno == 0  {
    print param_list ",TRAFFIC_CASE"
    fileno = FNR
}

{
    split("", case)
    l = ""
    traffic_match = ""
    z = ""
    final_filter = 0
    split("", filtered)

    for (i = 1; i <= NF; i++) {
        split($i, b, "=")
        key = b[1]
        val = b[2]
        if (key in param) {
            this_param = param[key]
            case[this_param] = val
            for (t = 1; t <= traffic_sz; t++) {
                if ((t, this_param == val && traffic_h[t, this_param]) in traffic_h)
                    l = val
                if ((t, this_param == val && traffic_x[j, this_param]) in traffic_x)
                    z = val
                if ((t, this_param, l, this_param, z) in traffic_key)
                    traffic_match = traffic_key[t, this_param, l, this_param, z]
            }
        }
    }
    if (substr(FILENAME, 20, 3) == "SVC") {
        svc_case[case[param["MSISDN"]]] = "" # << MSISDN was = "" so... arbitrary string should be OK or... should fix your bug?
    }
    if (!(case[param["MSISDN"]] in svc_case)) {
        for (p = 1; p <= param_sz; p++) {
            if (p == 1)
                case_list = case[1]
            else
                case_list = case_list "," case[i]
            if (!(p in case))
                case[p] = "B"
            for (f = 1; f <= filter_sz; f++) {
                if ((f, p) in filter_eq) {
                    if (filter_eq[f, p] == case[p] && (!f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                } else if ((j, i) in filter_ne) {
                    if (filter_ne[f, p] != case[p] && (!(f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                }
            }
        }
    }
    for (f = 1; f <= filter_sz; f++)
        if (filtered[f] == filters[f])
            final_filter = 1
    if (!final_filter)
        print case_list "," traffic_match
}
所以,我很有可能出了什么问题,因为我没有用于测试用例的I/O,但您可以重复我的步骤来了解实际情况。我会更仔细地检查过滤片。。。我会像Ed建议的那样检查MSISDN,看看你对它是否感兴趣


另一件看起来可疑的事情是,您表面上试图解析一个以|作为分隔符的.csv文件。。。这可能行不通。

在awk脚本上运行gawk-o-查看手册页,将其打印出来。一旦它被巧妙地格式化,你就会更好地理解它。谢谢..但是你是如何在这个文件上执行gawk-o的:我尝试了同样的方法,得到了下面的o/p[root@meylvvmnmt01/tmp/script]gawk-o Pre_CDR_1.sh用法:gawk[POSIX或GNU样式选项]-f progfile[-]file。。。用法:gawk[POSIX或GNU样式选项][-]“程序”文件。。。POSIX选项:GNU长选项:标准查找手册页和/或运行gawk-help,不难理解。另外,我猜Pre_CDR_1.sh是您的shell脚本,而不是其中存储的awk脚本。您将希望在awk脚本上运行gawk,而不是shell脚本。
#! /usr/bin/awk -f

BEGIN {
    FS = "|"
    fileno = 0
    SUBSEP = ","
}

FILENAME == "Parameter.txt" {
    param[$1] = NR
    param_sz = NR
    if (FNR == 1)
        param_list = $1
    else
        param_list = param_list "," $1
    next
}

FILENAME == "Traffic.csv" {
    traffic_h[FNR, $2] = $3
    traffic_x[FNR, $4] = $5
    traffic_key[FNR, $2, $3, $4, $5] = $1
    traffic_sz = FNR
    next
}

FILENAME == "Filter.txt" {
    for (i = 1; i <= NF; i += 3) {
        if ($(i + 1) == "ne")
            filter_ne[FNR, $i] = $(i + 2)
        else
            filter_eq[FNR, $i] = $(i + 2)
    }
    filter_sz = FNR
    filters[FNR] = (NF / 3)
    next
}

fileno == 0  {
    print param_list ",TRAFFIC_CASE"
    fileno = FNR
}

{
    split("", case)
    l = ""
    traffic_match = ""
    z = ""
    final_filter = 0
    split("", filtered)

    for (i = 1; i <= NF; i++) {
        split($i, b, "=")
        key = b[1]
        val = b[2]
        if (key in param) {
            this_param = param[key]
            case[this_param] = val
            for (t = 1; t <= traffic_sz; t++) {
                if ((t, this_param == val && traffic_h[t, this_param]) in traffic_h)
                    l = val
                if ((t, this_param == val && traffic_x[j, this_param]) in traffic_x)
                    z = val
                if ((t, this_param, l, this_param, z) in traffic_key)
                    traffic_match = traffic_key[t, this_param, l, this_param, z]
            }
        }
    }
    if (substr(FILENAME, 20, 3) == "SVC") {
        svc_case[case[param["MSISDN"]]] = "" # << MSISDN was = "" so... arbitrary string should be OK or... should fix your bug?
    }
    if (!(case[param["MSISDN"]] in svc_case)) {
        for (p = 1; p <= param_sz; p++) {
            if (p == 1)
                case_list = case[1]
            else
                case_list = case_list "," case[i]
            if (!(p in case))
                case[p] = "B"
            for (f = 1; f <= filter_sz; f++) {
                if ((f, p) in filter_eq) {
                    if (filter_eq[f, p] == case[p] && (!f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                } else if ((j, i) in filter_ne) {
                    if (filter_ne[f, p] != case[p] && (!(f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                }
            }
        }
    }
    for (f = 1; f <= filter_sz; f++)
        if (filtered[f] == filters[f])
            final_filter = 1
    if (!final_filter)
        print case_list "," traffic_match
}