Bash 从日志文件获取指定时间间隔内的请求计数
我需要在1、2、3、…、N分钟内通过awk获取日志文件的请求计数 例如,用户设置一个与一分钟间隔相对应的变量(Bash 从日志文件获取指定时间间隔内的请求计数,bash,awk,Bash,Awk,我需要在1、2、3、…、N分钟内通过awk获取日志文件的请求计数 例如,用户设置一个与一分钟间隔相对应的变量($interval=1)–输出应如下 09:01 - 09:02 count of requests 09:02 - 09:03 count of requests ... 09:16 - 09:17 count of requests 间隔2分钟: 09:01 - 09:03 count of requests 09:03 - 09:05 count of reques
$interval=1
)–输出应如下
09:01 - 09:02
count of requests
09:02 - 09:03
count of requests
...
09:16 - 09:17
count of requests
间隔2分钟:
09:01 - 09:03
count of requests
09:03 - 09:05
count of requests
...
09:15 - 09:17
count of requests
我曾尝试通过bash脚本解决分配的任务,但它仅在一分钟的时间间隔内有效:
#/bin/bash
对于((i=1;iOk),它有点笨拙,在文件的每一行上运行date
,并读取两次(ugh),但请尝试以下操作:
#! /usr/bin/env bash
declare -i interval=${1:- 1} timestamp=0 elapsed=0 last=0 cnt=0
declare H M lh lm # reporting vars
echo "Counting hits per ${interval}m interval"
interval=$(( interval * 60 )) # convert to seconds
while IFS="][" read x d x && [[ -n "$d" ]] # pull just the the timestamp
do IFS="$IFS/:" read day mon year h m s x <<< "$d" # parse components
timestamp="$( date -d "$mon $day $year $h:$m:$s" +'%s' )" # reformat to epoch secs
if (( last ))
then elapsed="$(( timestamp - last ))" # check elapsed time since last
if (( elapsed > interval ))
then printf "$H:$M - $lh:$lm\n$cnt\n"
cnt=1
last=$timestamp
H=$h; M=$m;
lh=$h; lm=$m;
else cnt+=1
lh=$h; lm=$m;
fi
else last=$timestamp # assure initialized
H=$h; M=$m;
lh=$h; lm=$m;
cnt=1
fi
done < "$yourLogFile" # you'll need to set this
printf "$H:$M - $lh:$lm\n$cnt\n"
那应该是你的开始
一个原始的all-awk
版本-
#! /usr/bin/env bash
awk -v interval=${1:- 1} '
BEGIN { last = 0; cnt = 0;
printf "Counting hits per %sm interval\n", interval
interval = interval * 60
}
/:/ {
split( $0, tmp, "[" )
split( tmp[2], t2, "]" )
split( t2[1], tmp, ":" ) # tmp now date, H, M, S, tz
H = tmp[2]; M = tmp[3]
split( tmp[1], dtmp, "/" ) # dtmp now dd, Mon, yyyy
switch ( dtmp[2] ) {
case "Jan": Mon = "01"; break; case "Feb": Mon = "02"; break; case "Mar": Mon = "03"; break;
case "Apr": Mon = "04"; break; case "May": Mon = "05"; break; case "Jun": Mon = "06"; break;
case "Jul": Mon = "07"; break; case "Aug": Mon = "08"; break; case "Sep": Mon = "09"; break;
case "Oct": Mon = "10"; break; case "Nov": Mon = "11"; break; case "Dec": Mon = "12"; break;
}
tstr = sprintf( "%s %s %s %s %s %s", dtmp[3], Mon, dtmp[1], H, M, "00" )
epoch = mktime( tstr )
if ( last ) {
elapsed = epoch - last
if ( elapsed > interval ) {
printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt
cnt = 1; last = epoch; h=H; m=M; lh=H; lm=M;
} else { cnt+=1; lh=H; lm=M; }
} else { last = epoch; h=H; m=M; lh=H; lm=M; cnt=1; }
next;
}
END { printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt }
' ${2:-defaultFileName}
对于大文件输入,这应该更有效。您的需求在几个方面还不清楚(请参阅我在您问题下的评论),但以下是使用GNU awk进行时间函数的一般方法:
$ cat tst.awk
BEGIN { FS="[][]" }
{
split($2,t,/[\/: ]/)
t[2] = (index("JanFebMarAprMayJunJulAugSepOctNovDec",t[2])+2)/3
epochMins = int(mktime(t[3] " " t[2] " " t[1] " " t[4] " " t[5] " 0") / 60)
if (NR == 1) {
begMins = epochMins
}
endMins = epochMins
cnt[epochMins]++
}
END {
for (epochMins = begMins; epochMins <= endMins; epochMins+=interval) {
begTime = strftime("%H:%M",epochMins*60)
endTime = strftime("%H:%M",(epochMins+interval)*60)
tot = 0
begInterval = epochMins
endInterval = epochMins + interval - 1
for (i=begInterval; i<=endInterval; i++) {
tot += cnt[i]
}
print begTime, endTime ORS tot
}
}
IMHO,如果您没有这样做,请尝试在示例中添加虚拟IP地址。如果间隔大于1分钟,则无法使用正则表达式以合理的方式解决此问题!您需要在此处执行时间/日期算术,这使得awk对于任务来说是一个糟糕的选择。@user1934428谢谢您的反馈。您知道解决pro的其他方法吗问题?使用awk是没有必要的。任何具有良好日期算法的语言都可以。我会使用Ruby,但Perl或Python也可以。毕竟,您需要计算两个时间点之间的时间段来解决您的问题。还要删除bash-您使用bash仅循环文件,并为每一行启动一个单独的子进程。Doe没什么意义。在你的问题中有两件事是不清楚的:1)在1分钟的时间间隔内,09:04发生的事情应该精确地计算在09:03-09:04区间内,还是09:04到09:05区间内,或者两者兼而有之?2)输出应该列出“时间间隔”中从最早到最晚的所有时间吗增量还是仅列出存在值的增量?请更新您的示例输入/输出以涵盖这些情况。如果您在awk
中重写整个内容,这可能会更有效率。我认为您应该能够使用为此列出的工具。如果我有时间,我会尝试POC。作为回应,第4行(索引(“Janfebmarapmayjunjulaugsepoctnovdec”,t[2])+2)/3
是将月份名称(如Mar)转换为数字(3)的惯用方法。只要想想它在做什么,如果不明显,就为中间步骤添加一些打印。第5行只是调用mktime()将时间戳转换为自历元起的秒,然后将其转换为分钟。请参见gawk手册页中的mktime。
#! /usr/bin/env bash
awk -v interval=${1:- 1} '
BEGIN { last = 0; cnt = 0;
printf "Counting hits per %sm interval\n", interval
interval = interval * 60
}
/:/ {
split( $0, tmp, "[" )
split( tmp[2], t2, "]" )
split( t2[1], tmp, ":" ) # tmp now date, H, M, S, tz
H = tmp[2]; M = tmp[3]
split( tmp[1], dtmp, "/" ) # dtmp now dd, Mon, yyyy
switch ( dtmp[2] ) {
case "Jan": Mon = "01"; break; case "Feb": Mon = "02"; break; case "Mar": Mon = "03"; break;
case "Apr": Mon = "04"; break; case "May": Mon = "05"; break; case "Jun": Mon = "06"; break;
case "Jul": Mon = "07"; break; case "Aug": Mon = "08"; break; case "Sep": Mon = "09"; break;
case "Oct": Mon = "10"; break; case "Nov": Mon = "11"; break; case "Dec": Mon = "12"; break;
}
tstr = sprintf( "%s %s %s %s %s %s", dtmp[3], Mon, dtmp[1], H, M, "00" )
epoch = mktime( tstr )
if ( last ) {
elapsed = epoch - last
if ( elapsed > interval ) {
printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt
cnt = 1; last = epoch; h=H; m=M; lh=H; lm=M;
} else { cnt+=1; lh=H; lm=M; }
} else { last = epoch; h=H; m=M; lh=H; lm=M; cnt=1; }
next;
}
END { printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt }
' ${2:-defaultFileName}
$ cat tst.awk
BEGIN { FS="[][]" }
{
split($2,t,/[\/: ]/)
t[2] = (index("JanFebMarAprMayJunJulAugSepOctNovDec",t[2])+2)/3
epochMins = int(mktime(t[3] " " t[2] " " t[1] " " t[4] " " t[5] " 0") / 60)
if (NR == 1) {
begMins = epochMins
}
endMins = epochMins
cnt[epochMins]++
}
END {
for (epochMins = begMins; epochMins <= endMins; epochMins+=interval) {
begTime = strftime("%H:%M",epochMins*60)
endTime = strftime("%H:%M",(epochMins+interval)*60)
tot = 0
begInterval = epochMins
endInterval = epochMins + interval - 1
for (i=begInterval; i<=endInterval; i++) {
tot += cnt[i]
}
print begTime, endTime ORS tot
}
}
$ awk -v interval=1 -f tst.awk file
09:01 09:02
6
09:02 09:03
0
09:03 09:04
0
09:04 09:05
0
09:05 09:06
0
09:06 09:07
8
09:07 09:08
0
09:08 09:09
0
09:09 09:10
0
09:10 09:11
0
09:11 09:12
14
09:12 09:13
0
09:13 09:14
0
09:14 09:15
4
$ awk -v interval=2 -f tst.awk file
09:01 09:03
6
09:03 09:05
0
09:05 09:07
8
09:07 09:09
0
09:09 09:11
0
09:11 09:13
14
09:13 09:15
4
$ awk -v interval=5 -f tst.awk file
09:01 09:06
6
09:06 09:11
8
09:11 09:16
18