Awk 1) } /派遣/{ 结束=提取日期($1) 如果(结束-开始3600){ 打印作业 打印子对象 打印 } }"档案"
返回:Awk 1) } /派遣/{ 结束=提取日期($1) 如果(结束-开始3600){ 打印作业 打印子对象 打印 } }"档案",awk,Awk,返回: Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <# Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc Wed Jul 31 11:47:05: Dispatched 40 Task(s) o
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug 1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
作业、作业名称、用户、项目、命令GNU awk:
awk '
function extract_date(line){
split(line, date, /[: ]/)
day = date[3] + 0
if(day < 10){
day = "0" day
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6])
}
BEGIN{
months["Jan"] = "01"
months["Feb"] = "02"
months["Mar"] = "03"
months["Apr"] = "04"
months["May"] = "05"
months["Jun"] = "06"
months["Jul"] = "07"
months["Aug"] = "08"
months["Sep"] = "09"
months["Oct"] = "10"
months["Nov"] = "11"
months["Dec"] = "12"
}
/Job Name/{job=$0}
/Submitted/{
subm=$0;
}
/Dispatched/{
start = extract_date(subm)
end = extract_date($0)
if(end-start < 0){
end += 31536000
}
if(end-start > 3600){
print job
print subm
print
}
}' file
awk'
函数提取日期(行){
拆分(行,日期,/[:]/)
日期=日期[3]+0
如果(第10天){
day=“0”天
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6])
}
开始{
月份[“一月”]=“01”
月份[“二月”]=“02”
月份[“三月”]=“03”
月份[“四月”]=“04”
月份[“五月”]=“05”
月份[“六月”]=“06”
月份[“七月”]=“07”
月份[“八月”]=“08”
月份[“九月”]=“09”
月份[“十月”]=“10”
月份[“11月”]=“11”
月份[“十二月”]=“12”
}
/作业名称/{Job=$0}
/提交/{
subm=$0;
}
/派遣/{
开始=提取日期(subm)
结束=提取日期($0)
如果(结束-开始<0){
完+=31536000
}
如果(结束-开始>3600){
打印作业
打印子对象
打印
}
}"档案"
Awk:
awk -F": " '
function extract_date(str_date){
cmd = "date -d \"" str_date "\" +%s"
while ( ( cmd | getline result ) > 0 ) {
}
close(cmd);
return result
}
/Job Name/{job=$0}
/Submitted/{
subm=$0;
start = extract_date($1)
}
/Dispatched/{
end = extract_date($1)
if(end-start < 0){
end += 31536000
}
if(end-start > 3600){
print job
print subm
print
}
}' file
awk-F:“'
函数提取日期(str\u日期){
cmd=“date-d\”str\u date“\”+%s”
而((cmd | getline result)>0){
}
关闭(cmd);
返回结果
}
/作业名称/{Job=$0}
/提交/{
subm=$0;
开始=提取日期($1)
}
/派遣/{
结束=提取日期($1)
如果(结束-开始<0){
完+=31536000
}
如果(结束-开始>3600){
打印作业
打印子对象
打印
}
}"档案"
返回:
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug 1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
Job,Job Name,User,Project,Command我希望改进@Corentin Limier-excellentgawk
答案
避免冗余的字符串匹配和计算
function extract_date(line){
split(line, date, /[: ]/);
day = date[3] + 0;
if(day < 10){
day = "0" day;
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
months["Jan"] = "01";
months["Feb"] = "02";
months["Mar"] = "03";
months["Apr"] = "04";
months["May"] = "05";
months["Jun"] = "06";
months["Jul"] = "07";
months["Aug"] = "08";
months["Sep"] = "09";
months["Oct"] = "10";
months["Nov"] = "11";
months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
job = $0;
}
NR % 3 == 2 { # handel second line
submitTime = extract_date(substr($0, 1, 19));
submit = $0;
}
NR % 3 == 0 { # handel third line
dispatchTime = extract_date(substr($0, 1, 19));
timeDiff = dispatchTime - submitTime;
if (timeDiff < 0) {timeDiff += 31536000};
if (timeDiff <= 3600) {
print job;
print submit;
print;
}
}
如果数据结构一致,则使用位置/位置计算更有效
function extract_date(line){
split(line, date, /[: ]/);
day = date[3] + 0;
if(day < 10){
day = "0" day;
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
months["Jan"] = "01";
months["Feb"] = "02";
months["Mar"] = "03";
months["Apr"] = "04";
months["May"] = "05";
months["Jun"] = "06";
months["Jul"] = "07";
months["Aug"] = "08";
months["Sep"] = "09";
months["Oct"] = "10";
months["Nov"] = "11";
months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
job = $0;
}
NR % 3 == 2 { # handel second line
submitTime = extract_date(substr($0, 1, 19));
submit = $0;
}
NR % 3 == 0 { # handel third line
dispatchTime = extract_date(substr($0, 1, 19));
timeDiff = dispatchTime - submitTime;
if (timeDiff < 0) {timeDiff += 31536000};
if (timeDiff <= 3600) {
print job;
print submit;
print;
}
}
函数提取日期(行){
拆分(行,日期,/[:]/);
日期=日期[3]+0;
如果(第10天){
day=“0”天;
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6]);
}
开始{
月份[“一月”]=“01”;
月份[“二月”]=“02”;
月份[“三月”]=“03”;
月份[“四月”]=“04”;
月份[“五月”]=“05”;
月份[“六月”]=“06”;
月份[“七月”]=“07”;
月份[“八月”]=“08”;
月份[“九月”]=“09”;
月份[“十月”]=“10”;
月份[“11月”]=“11”;
月份[“十二月”]=“12”;
}
NR%3==1{#亨德尔第一行
职位=$0;
}
NR%3==2{#亨德尔第二行
submitTime=提取日期(substr($0,1,19));
提交=0美元;
}
NR%3==0{#亨德尔第三行
dispatchTime=提取日期(substr($0,1,19));
timeDiff=调度时间-提交时间;
如果(timeDiff<0){timeDiff+=31536000};
如果(timeDiff我希望在@Corentin Limier卓越gawk
回答上有所改进
避免冗余的字符串匹配和计算
function extract_date(line){
split(line, date, /[: ]/);
day = date[3] + 0;
if(day < 10){
day = "0" day;
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
months["Jan"] = "01";
months["Feb"] = "02";
months["Mar"] = "03";
months["Apr"] = "04";
months["May"] = "05";
months["Jun"] = "06";
months["Jul"] = "07";
months["Aug"] = "08";
months["Sep"] = "09";
months["Oct"] = "10";
months["Nov"] = "11";
months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
job = $0;
}
NR % 3 == 2 { # handel second line
submitTime = extract_date(substr($0, 1, 19));
submit = $0;
}
NR % 3 == 0 { # handel third line
dispatchTime = extract_date(substr($0, 1, 19));
timeDiff = dispatchTime - submitTime;
if (timeDiff < 0) {timeDiff += 31536000};
if (timeDiff <= 3600) {
print job;
print submit;
print;
}
}
如果数据结构一致,则使用位置/位置计算更有效
function extract_date(line){
split(line, date, /[: ]/);
day = date[3] + 0;
if(day < 10){
day = "0" day;
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
months["Jan"] = "01";
months["Feb"] = "02";
months["Mar"] = "03";
months["Apr"] = "04";
months["May"] = "05";
months["Jun"] = "06";
months["Jul"] = "07";
months["Aug"] = "08";
months["Sep"] = "09";
months["Oct"] = "10";
months["Nov"] = "11";
months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
job = $0;
}
NR % 3 == 2 { # handel second line
submitTime = extract_date(substr($0, 1, 19));
submit = $0;
}
NR % 3 == 0 { # handel third line
dispatchTime = extract_date(substr($0, 1, 19));
timeDiff = dispatchTime - submitTime;
if (timeDiff < 0) {timeDiff += 31536000};
if (timeDiff <= 3600) {
print job;
print submit;
print;
}
}
函数提取日期(行){
拆分(行,日期,/[:]/);
日期=日期[3]+0;
如果(第10天){
day=“0”天;
}
返回mktime(“1996”月[日期[2]]“日”日期[4]“日期[5]”日期[6]);
}
开始{
月份[“一月”]=“01”;
月份[“二月”]=“02”;
月份[“三月”]=“03”;
月份[“四月”]=“04”;
月份[“五月”]=“05”;
月份[“六月”]=“06”;
月份[“七月”]=“07”;
月份[“八月”]=“08”;
月份[“九月”]=“09”;
月份[“十月”]=“10”;
月份[“11月”]=“11”;
月份[“十二月”]=“12”;
}
NR%3==1{#亨德尔第一行
职位=$0;
}
NR%3==2{#亨德尔第二行
submitTime=提取日期(substr($0,1,19));
提交=0美元;
}
NR%3==0{#亨德尔第三行
dispatchTime=提取日期(substr($0,1,19));
timeDiff=调度时间-提交时间;
如果(timeDiff<0){timeDiff+=31536000};
如果(使用GNU awk for time functions和gensub()的timeDiff,并假设您的所有日期都发生在同一个当前年份内(如果不正确,我们可以根据您的实际情况对代码进行调整):
$cat tst.awk
开始{
年份=标准时间(“%Y”)
minSecs=60*60
}
idx=指数(“1月/日/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/日”,2美元){
前置秒=秒
secs=mktime(年份“”(idx+2)/3“$3”gensub(/:/,“,”g“,$4))
}
{rec=rec$0 ORS}
$5==“已发送”{
如果((秒-秒前)>=分钟秒){
打印文件“%s”,记录
}
rec=“”
}
$awk-f tst.awk文件
作业、作业名称、用户、项目、命令使用GNU awk for time functions和gensub(),并假设您的所有日期都发生在同一个当前年份内(如果不正确,我们可以根据您的实际情况对代码进行调整):
$cat tst.awk
开始{
年份=标准时间(“%Y”)
minSecs=60*60
}
idx=指数(“1月/日/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/月/日”,2美元){
前置秒=秒
secs=mktime(年份“”(idx+2)/3“$3”gensub(/:/,“,”g“,$4))
}
{rec=rec$0 ORS}
$5==“已发送”{
如果((秒-秒前)>=分钟秒){
打印文件“%s”,记录
}
rec=“”
}
$awk-f tst.awk文件
作业、作业名称、用户、项目、命令@Cyrus,查看我的更新每当你在循环中调用像awk
这样的实用程序时,你的脑海中就会响起警钟……(更不用说调用grep 3次、sort、uniq
和awk 4次
和4次管道
)对于在一个循环中调用的每个实用程序和管道,您都会生成一个单独的子shell,从而降低效率。对awk
的一次调用就可以做到这一点,整个脚本可能会在10秒内完成。这是我应该登录到Splunk的部分(Splunk可以免费使用,最高可达500MB/天)。然后,您可以从数据中获取各种信息和图形。请发布示例文件,不要使用线条wrapped@Cyrus,查看我的更新每当你在循环中调用像awk
这样的实用程序时,你的脑海中就会响起警铃。。。(更不用说为每个实用程序调用3次grep、sort、uniq
和awk 4次和4-pipes
)和pi
awk -F, -v y=$(date '+%Y') '
# get current year above (no year info in data file)
# function converts, e.g. "Jan" to 1, "Feb" to 2, ...
function month (m) {
switch (m) {
case "Jan":
return 1; break;
case "Feb":
return 2; break;
case "Mar":
return 3; break;
case "Apr":
return 4; break;
case "May":
return 5; break;
case "Jun":
return 6; break;
case "Jul":
return 7; break;
case "Aug":
return 8; break;
case "Sep":
return 9; break;
case "Oct":
return 10; break;
case "Nov":
return 11; break;
case "Dec":
return 12; break;
case "?":
return 0; break;
}
}
# function converts datespec (from submit/dispatch time) to timestamp
# (e.g. "YYYY MM DD HH MM SS" to seconds since epoch)
function mktmstamp (str) {
sub(/:[^:]*$/,"",str) # remove from final : to end
gsub(/:/," ",str) # replace : in H:M:S with space
gsub(/[ ]0/," ",str) # remove any leading zeros from H M S
split (str, a, " ") # split m d H M S into array
# return timestamp created from datespec
return mktime(y " " month(a[2]) " " a[3] " " a[4] " " a[5] " " a[6])
}
# handle Job line, store line, zero variables
/^Job/ {jobln=$0; tmsub=0; tmdis=0; next}
# handle dispatched line, store, get dispatch timestamp, output all >= 1 hr
tmsub != 0 {
disln=$0
tmdis=mktmstamp($1)
if (tmdis-tmsub >= 3600)
printf "%s\n%s\n%s\n", jobln, subln, disln
}
# handle submit line, store line, get submit timestamp
tmsub == 0 { subln=$0; tmsub=mktmstamp($1) }
' jobs
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug 1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
function month (m) {
return (index("JanFebMarAprMayJunJulAugSepOctNocDec",m)+2)/3
}
awk -F, -v y=$(date '+%Y') '
# get current year above (no year info in data file)
# function converts datespec (from submit/dispatch time) to timestamp
# (e.g. "YYYY MM DD HH MM SS" to seconds since epoch)
function mktmstamp (str) {
sub(/:[^:]*$/,"",str) # remove from final : to end
gsub(/:/," ",str) # replace : in H:M:S with space
gsub(/[ ]0/," ",str) # remove any leading zeros from H M S
split (str, a, " ") # split m d H M S into array
# convert month abrv. to number
mnth = (index("JanFebMarAprMayJunJulAugSepOctNocDec",a[2])+2)/3
# return timestamp created from datespec
return mktime(y " " mnth " " a[3] " " a[4] " " a[5] " " a[6])
}
# handle Job line, store line, zero variables
/^Job/ {jobln=$0; tmsub=0; tmdis=0; next}
# handle dispatched line, store, get dispatch timestamp, output all >= 1 hr
tmsub != 0 {
disln=$0
tmdis=mktmstamp($1)
if (tmdis-tmsub >= 3600)
printf "%s\n%s\n%s\n", jobln, subln, disln
}
# handle submit line, store line, get submit timestamp
tmsub == 0 { subln=$0; tmsub=mktmstamp($1) }
' file
awk '
function extract_date(line){
split(line, date, /[: ]/)
day = date[3] + 0
if(day < 10){
day = "0" day
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6])
}
BEGIN{
months["Jan"] = "01"
months["Feb"] = "02"
months["Mar"] = "03"
months["Apr"] = "04"
months["May"] = "05"
months["Jun"] = "06"
months["Jul"] = "07"
months["Aug"] = "08"
months["Sep"] = "09"
months["Oct"] = "10"
months["Nov"] = "11"
months["Dec"] = "12"
}
/Job Name/{job=$0}
/Submitted/{
subm=$0;
}
/Dispatched/{
start = extract_date(subm)
end = extract_date($0)
if(end-start < 0){
end += 31536000
}
if(end-start > 3600){
print job
print subm
print
}
}' file
awk -F": " '
function extract_date(str_date){
cmd = "date -d \"" str_date "\" +%s"
while ( ( cmd | getline result ) > 0 ) {
}
close(cmd);
return result
}
/Job Name/{job=$0}
/Submitted/{
subm=$0;
start = extract_date($1)
}
/Dispatched/{
end = extract_date($1)
if(end-start < 0){
end += 31536000
}
if(end-start > 3600){
print job
print subm
print
}
}' file
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug 1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20
function extract_date(line){
split(line, date, /[: ]/);
day = date[3] + 0;
if(day < 10){
day = "0" day;
}
return mktime("1996 " months[date[2]] " " day " " date[4] " " date[5] " " date[6]);
}
BEGIN{
months["Jan"] = "01";
months["Feb"] = "02";
months["Mar"] = "03";
months["Apr"] = "04";
months["May"] = "05";
months["Jun"] = "06";
months["Jul"] = "07";
months["Aug"] = "08";
months["Sep"] = "09";
months["Oct"] = "10";
months["Nov"] = "11";
months["Dec"] = "12";
}
NR % 3 == 1 { # handel first line
job = $0;
}
NR % 3 == 2 { # handel second line
submitTime = extract_date(substr($0, 1, 19));
submit = $0;
}
NR % 3 == 0 { # handel third line
dispatchTime = extract_date(substr($0, 1, 19));
timeDiff = dispatchTime - submitTime;
if (timeDiff < 0) {timeDiff += 31536000};
if (timeDiff <= 3600) {
print job;
print submit;
print;
}
}
$ cat tst.awk
BEGIN {
year = strftime("%Y")
minSecs = 60 * 60
}
idx = index("JanFebMarAprMayJunJulAugSepOctNocDec",$2) {
prevSecs = secs
secs = mktime(year " " (idx+2)/3 " " $3 " " gensub(/:/," ","g",$4))
}
{ rec = rec $0 ORS }
$5 == "Dispatched" {
if ( (secs - prevSecs) >= minSecs ) {
printf "%s", rec
}
rec = ""
}
$ awk -f tst.awk file
Job <831953>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 31 09:37:28: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 11:47:05: Dispatched 40 Task(s) on Host(s) <40*r05n42>, Allocated 40
Job <831955>, Job Name <phys>, User <phy-zhangz>, Project <default>, Command <#
Wed Jul 30 09:37:29: Submitted from host <login02>, to Queue <medium>, CWD </sc
Wed Jul 31 09:47:07: Dispatched 40 Task(s) on Host(s) <40*r03n10>, Allocated 40
Job <823777>, Job Name <3rd>, User <mse-jiangf>, Project <default>, Mail <ritaq
Wed Jul 31 09:55:14: Submitted from host <login04>, to Queue <medium>, CWD <$HO
Thu Aug 1 12:57:12: Dispatched 20 Task(s) on Host(s) <20*r03n09>, Allocated 20