Awk 1) } /派遣/{ 结束=提取日期($1) 如果(结束-开始3600){ 打印作业 打印子对象 打印 } }"档案"

Awk 1) } /派遣/{ 结束=提取日期($1) 如果(结束-开始3600){ 打印作业 打印子对象 打印 } }"档案",awk,Awk,返回: Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <# Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc Wed Jul 31 11:47:05: Dispatched 40 Task(s) o

返回:

Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug  1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
作业、作业名称、用户、项目、命令GNU awk:

awk '
function extract_date(line){
    split(line, date, /[: ]/)
    day = date[3] + 0
    if(day < 10){
        day = "0" day
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6])
}
BEGIN{
  months["Jan"] = "01"
  months["Feb"] = "02"
  months["Mar"] = "03"
  months["Apr"] = "04"
  months["May"] = "05"
  months["Jun"] = "06"
  months["Jul"] = "07"
  months["Aug"] = "08"
  months["Sep"] = "09"
  months["Oct"] = "10"
  months["Nov"] = "11"
  months["Dec"] = "12"
}
/Job Name/{job=$0}
/Submitted/{
    subm=$0;
}
/Dispatched/{
    start = extract_date(subm)
    end = extract_date($0)
    if(end-start < 0){
        end += 31536000
    }
    if(end-start > 3600){
        print job
        print subm
        print
    }
}' file
awk'
函数提取日期(行){
拆分(行,日期,/[:]/)
日期=日期[3]+0
如果(第10天){
day=“0”天
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6])
}
开始{
月份[“一月”]=“01”
月份[“二月”]=“02”
月份[“三月”]=“03”
月份[“四月”]=“04”
月份[“五月”]=“05”
月份[“六月”]=“06”
月份[“七月”]=“07”
月份[“八月”]=“08”
月份[“九月”]=“09”
月份[“十月”]=“10”
月份[“11月”]=“11”
月份[“十二月”]=“12”
}
/作业名称/{Job=$0}
/提交/{
subm=$0;
}
/派遣/{
开始=提取日期(subm)
结束=提取日期($0)
如果(结束-开始<0){
完+=31536000
}
如果(结束-开始>3600){
打印作业
打印子对象
打印
}
}"档案"
Awk:

awk -F": " '
function extract_date(str_date){
    cmd = "date -d \"" str_date "\" +%s"
    while ( ( cmd | getline result ) > 0 ) {
    }
    close(cmd);
    return result
}
/Job Name/{job=$0}
/Submitted/{
    subm=$0;
    start = extract_date($1)
}
/Dispatched/{
    end = extract_date($1)
    if(end-start < 0){
        end += 31536000
    }
    if(end-start > 3600){
        print job
        print subm
        print
    }
}' file
awk-F:“'
函数提取日期(str\u日期){
cmd=“date-d\”str\u date“\”+%s”
而((cmd | getline result)>0){
}
关闭(cmd);
返回结果
}
/作业名称/{Job=$0}
/提交/{
subm=$0;
开始=提取日期($1)
}
/派遣/{
结束=提取日期($1)
如果(结束-开始<0){
完+=31536000
}
如果(结束-开始>3600){
打印作业
打印子对象
打印
}
}"档案"
返回:

Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug  1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20

Job,Job Name,User,Project,Command我希望改进@Corentin Limier-excellent
gawk
答案

避免冗余的字符串匹配和计算

function extract_date(line){
    split(line, date, /[: ]/);
    day = date[3] + 0;
    if(day < 10){
        day = "0" day;
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
  months["Jan"] = "01";
  months["Feb"] = "02";
  months["Mar"] = "03";
  months["Apr"] = "04";
  months["May"] = "05";
  months["Jun"] = "06";
  months["Jul"] = "07";
  months["Aug"] = "08";
  months["Sep"] = "09";
  months["Oct"] = "10";
  months["Nov"] = "11";
  months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
    job = $0;
}
NR % 3 == 2 { # handel second line
    submitTime = extract_date(substr($0, 1, 19));
    submit = $0;
}
NR % 3 == 0 { # handel third line
    dispatchTime = extract_date(substr($0, 1, 19));
    timeDiff = dispatchTime - submitTime;
    if (timeDiff < 0) {timeDiff += 31536000};
    if (timeDiff <= 3600) {
        print job;
        print submit;
        print;
    }
}
如果数据结构一致,则使用位置/位置计算更有效

function extract_date(line){
    split(line, date, /[: ]/);
    day = date[3] + 0;
    if(day < 10){
        day = "0" day;
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
  months["Jan"] = "01";
  months["Feb"] = "02";
  months["Mar"] = "03";
  months["Apr"] = "04";
  months["May"] = "05";
  months["Jun"] = "06";
  months["Jul"] = "07";
  months["Aug"] = "08";
  months["Sep"] = "09";
  months["Oct"] = "10";
  months["Nov"] = "11";
  months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
    job = $0;
}
NR % 3 == 2 { # handel second line
    submitTime = extract_date(substr($0, 1, 19));
    submit = $0;
}
NR % 3 == 0 { # handel third line
    dispatchTime = extract_date(substr($0, 1, 19));
    timeDiff = dispatchTime - submitTime;
    if (timeDiff < 0) {timeDiff += 31536000};
    if (timeDiff <= 3600) {
        print job;
        print submit;
        print;
    }
}
函数提取日期(行){
拆分(行,日期,/[:]/);
日期=日期[3]+0;
如果(第10天){
day=“0”天;
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6]);
}
开始{
月份[“一月”]=“01”;
月份[“二月”]=“02”;
月份[“三月”]=“03”;
月份[“四月”]=“04”;
月份[“五月”]=“05”;
月份[“六月”]=“06”;
月份[“七月”]=“07”;
月份[“八月”]=“08”;
月份[“九月”]=“09”;
月份[“十月”]=“10”;
月份[“11月”]=“11”;
月份[“十二月”]=“12”;
}
NR%3==1{#亨德尔第一行
职位=$0;
}
NR%3==2{#亨德尔第二行
submitTime=提取日期(substr($0,1,19));
提交=0美元;
}
NR%3==0{#亨德尔第三行
dispatchTime=提取日期(substr($0,1,19));
timeDiff=调度时间-提交时间;
如果(timeDiff<0){timeDiff+=31536000};

如果(timeDiff我希望在@Corentin Limier卓越
gawk
回答上有所改进

避免冗余的字符串匹配和计算

function extract_date(line){
    split(line, date, /[: ]/);
    day = date[3] + 0;
    if(day < 10){
        day = "0" day;
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
  months["Jan"] = "01";
  months["Feb"] = "02";
  months["Mar"] = "03";
  months["Apr"] = "04";
  months["May"] = "05";
  months["Jun"] = "06";
  months["Jul"] = "07";
  months["Aug"] = "08";
  months["Sep"] = "09";
  months["Oct"] = "10";
  months["Nov"] = "11";
  months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
    job = $0;
}
NR % 3 == 2 { # handel second line
    submitTime = extract_date(substr($0, 1, 19));
    submit = $0;
}
NR % 3 == 0 { # handel third line
    dispatchTime = extract_date(substr($0, 1, 19));
    timeDiff = dispatchTime - submitTime;
    if (timeDiff < 0) {timeDiff += 31536000};
    if (timeDiff <= 3600) {
        print job;
        print submit;
        print;
    }
}
如果数据结构一致,则使用位置/位置计算更有效

function extract_date(line){
    split(line, date, /[: ]/);
    day = date[3] + 0;
    if(day < 10){
        day = "0" day;
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
  months["Jan"] = "01";
  months["Feb"] = "02";
  months["Mar"] = "03";
  months["Apr"] = "04";
  months["May"] = "05";
  months["Jun"] = "06";
  months["Jul"] = "07";
  months["Aug"] = "08";
  months["Sep"] = "09";
  months["Oct"] = "10";
  months["Nov"] = "11";
  months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
    job = $0;
}
NR % 3 == 2 { # handel second line
    submitTime = extract_date(substr($0, 1, 19));
    submit = $0;
}
NR % 3 == 0 { # handel third line
    dispatchTime = extract_date(substr($0, 1, 19));
    timeDiff = dispatchTime - submitTime;
    if (timeDiff < 0) {timeDiff += 31536000};
    if (timeDiff <= 3600) {
        print job;
        print submit;
        print;
    }
}
函数提取日期(行){
拆分(行,日期,/[:]/);
日期=日期[3]+0;
如果(第10天){
day=“0”天;
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6]);
}
开始{
月份[“一月”]=“01”;
月份[“二月”]=“02”;
月份[“三月”]=“03”;
月份[“四月”]=“04”;
月份[“五月”]=“05”;
月份[“六月”]=“06”;
月份[“七月”]=“07”;
月份[“八月”]=“08”;
月份[“九月”]=“09”;
月份[“十月”]=“10”;
月份[“11月”]=“11”;
月份[“十二月”]=“12”;
}
NR%3==1{#亨德尔第一行
职位=$0;
}
NR%3==2{#亨德尔第二行
submitTime=提取日期(substr($0,1,19));
提交=0美元;
}
NR%3==0{#亨德尔第三行
dispatchTime=提取日期(substr($0,1,19));
timeDiff=调度时间-提交时间;
如果(timeDiff<0){timeDiff+=31536000};

如果(使用GNU awk for time functions和gensub()的timeDiff,并假设您的所有日期都发生在同一个当前年份内(如果不正确,我们可以根据您的实际情况对代码进行调整):

$cat tst.awk
开始{
年份=标准时间(“%Y”)
minSecs=60*60
}
idx=指数(“1月/日/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/日”,2美元){
前置秒=秒
secs=mktime(年份“”(idx+2)/3“$3”gensub(/:/,“,”g“,$4))
}
{rec=rec$0 ORS}
$5==“已发送”{
如果((秒-秒前)>=分钟秒){
打印文件“%s”,记录
}
rec=“”
}
$awk-f tst.awk文件

作业、作业名称、用户、项目、命令使用GNU awk for time functions和gensub(),并假设您的所有日期都发生在同一个当前年份内(如果不正确,我们可以根据您的实际情况对代码进行调整):

$cat tst.awk
开始{
年份=标准时间(“%Y”)
minSecs=60*60
}
idx=指数(“1月/日/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/日”,2美元){
前置秒=秒
secs=mktime(年份“”(idx+2)/3“$3”gensub(/:/,“,”g“,$4))
}
{rec=rec$0 ORS}
$5==“已发送”{
如果((秒-秒前)>=分钟秒){
打印文件“%s”,记录
}
rec=“”
}
$awk-f tst.awk文件

作业、作业名称、用户、项目、命令@Cyrus,查看我的更新每当你在循环中调用像
awk
这样的实用程序时,你的脑海中就会响起警钟……(更不用说调用
grep 3次、sort、uniq
awk 4次
4次管道
)对于在一个循环中调用的每个实用程序和管道,您都会生成一个单独的子shell,从而降低效率。对
awk
的一次调用就可以做到这一点,整个脚本可能会在10秒内完成。这是我应该登录到Splunk的部分(Splunk可以免费使用,最高可达500MB/天)。然后,您可以从数据中获取各种信息和图形。请发布示例文件,不要使用线条wrapped@Cyrus,查看我的更新每当你在循环中调用像
awk
这样的实用程序时,你的脑海中就会响起警铃。。。(更不用说为每个实用程序调用3次
grep、sort、uniq
awk 4次
4-pipes
)和pi
awk -F, -v y=$(date '+%Y') '
    # get current year above (no year info in data file)
    # function converts, e.g. "Jan" to 1, "Feb" to 2, ...
    function month (m) {
        switch (m) {
            case "Jan":
                return 1; break;
            case "Feb":
                return 2; break;
            case "Mar":
                return 3; break;
            case "Apr":
                return 4; break;
            case "May":
                return 5; break;
            case "Jun":
                return 6; break;
            case "Jul":
                return 7; break;
            case "Aug":
                return 8; break;
            case "Sep":
                return 9; break;
            case "Oct":
                return 10; break;
            case "Nov":
                return 11; break;
            case "Dec":
                return 12; break;
            case "?":
                return 0; break;
        }
    }
    # function converts datespec (from submit/dispatch time) to timestamp
    # (e.g. "YYYY MM DD HH MM SS" to seconds since epoch)
    function mktmstamp (str) {
        sub(/:[^:]*$/,"",str)   # remove from final : to end
        gsub(/:/," ",str)       # replace : in H:M:S with space
        gsub(/[ ]0/," ",str)    # remove any leading zeros from H M S
        split (str, a, " ")     # split m d H M S into array
        # return timestamp created from datespec
        return mktime(y " " month(a[2]) " " a[3] " " a[4] " " a[5] " " a[6])
    }
    # handle Job line, store line, zero variables
    /^Job/ {jobln=$0; tmsub=0; tmdis=0; next}
    # handle dispatched line, store, get dispatch timestamp, output all >= 1 hr
    tmsub != 0 {
        disln=$0
        tmdis=mktmstamp($1)
        if (tmdis-tmsub >= 3600)
            printf "%s\n%s\n%s\n", jobln, subln, disln
    }
    # handle submit line, store line, get submit timestamp
    tmsub == 0 { subln=$0; tmsub=mktmstamp($1) }
' jobs
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug  1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
function month (m) {
    return (index("JanFebMarAprMayJunJulAugSepOctNocDec",m)+2)/3
}
awk -F, -v y=$(date '+%Y') '
    # get current year above (no year info in data file)
    # function converts datespec (from submit/dispatch time) to timestamp
    # (e.g. "YYYY MM DD HH MM SS" to seconds since epoch)
    function mktmstamp (str) {
        sub(/:[^:]*$/,"",str)   # remove from final : to end
        gsub(/:/," ",str)       # replace : in H:M:S with space
        gsub(/[ ]0/," ",str)    # remove any leading zeros from H M S
        split (str, a, " ")     # split m d H M S into array
        # convert month abrv. to number
        mnth = (index("JanFebMarAprMayJunJulAugSepOctNocDec",a[2])+2)/3
        # return timestamp created from datespec
        return mktime(y " " mnth " " a[3] " " a[4] " " a[5] " " a[6])
    }
    # handle Job line, store line, zero variables
    /^Job/ {jobln=$0; tmsub=0; tmdis=0; next}
    # handle dispatched line, store, get dispatch timestamp, output all >= 1 hr
    tmsub != 0 {
        disln=$0
        tmdis=mktmstamp($1)
        if (tmdis-tmsub >= 3600)
            printf "%s\n%s\n%s\n", jobln, subln, disln
    }
    # handle submit line, store line, get submit timestamp
    tmsub == 0 { subln=$0; tmsub=mktmstamp($1) }
' file
awk '
function extract_date(line){
    split(line, date, /[: ]/)
    day = date[3] + 0
    if(day < 10){
        day = "0" day
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6])
}
BEGIN{
  months["Jan"] = "01"
  months["Feb"] = "02"
  months["Mar"] = "03"
  months["Apr"] = "04"
  months["May"] = "05"
  months["Jun"] = "06"
  months["Jul"] = "07"
  months["Aug"] = "08"
  months["Sep"] = "09"
  months["Oct"] = "10"
  months["Nov"] = "11"
  months["Dec"] = "12"
}
/Job Name/{job=$0}
/Submitted/{
    subm=$0;
}
/Dispatched/{
    start = extract_date(subm)
    end = extract_date($0)
    if(end-start < 0){
        end += 31536000
    }
    if(end-start > 3600){
        print job
        print subm
        print
    }
}' file
awk -F": " '
function extract_date(str_date){
    cmd = "date -d \"" str_date "\" +%s"
    while ( ( cmd | getline result ) > 0 ) {
    }
    close(cmd);
    return result
}
/Job Name/{job=$0}
/Submitted/{
    subm=$0;
    start = extract_date($1)
}
/Dispatched/{
    end = extract_date($1)
    if(end-start < 0){
        end += 31536000
    }
    if(end-start > 3600){
        print job
        print subm
        print
    }
}' file
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug  1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
function extract_date(line){
    split(line, date, /[: ]/);
    day = date[3] + 0;
    if(day < 10){
        day = "0" day;
    }
    return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
  months["Jan"] = "01";
  months["Feb"] = "02";
  months["Mar"] = "03";
  months["Apr"] = "04";
  months["May"] = "05";
  months["Jun"] = "06";
  months["Jul"] = "07";
  months["Aug"] = "08";
  months["Sep"] = "09";
  months["Oct"] = "10";
  months["Nov"] = "11";
  months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
    job = $0;
}
NR % 3 == 2 { # handel second line
    submitTime = extract_date(substr($0, 1, 19));
    submit = $0;
}
NR % 3 == 0 { # handel third line
    dispatchTime = extract_date(substr($0, 1, 19));
    timeDiff = dispatchTime - submitTime;
    if (timeDiff < 0) {timeDiff += 31536000};
    if (timeDiff <= 3600) {
        print job;
        print submit;
        print;
    }
}
$ cat tst.awk
BEGIN {
    year = strftime("%Y")
    minSecs = 60 * 60
}
idx = index("JanFebMarAprMayJunJulAugSepOctNocDec",$2) {
    prevSecs = secs
    secs = mktime(year " " (idx+2)/3 " " $3 " " gensub(/:/," ","g",$4))
}
{ rec = rec $0 ORS }
$5 == "Dispatched" {
    if ( (secs - prevSecs) >= minSecs ) {
        printf "%s", rec
    }
    rec = ""
}

$ awk -f tst.awk file
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug  1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20