Bash 从日志文件获取指定时间间隔内的请求计数

Bash 从日志文件获取指定时间间隔内的请求计数,bash,awk,Bash,Awk,我需要在1、2、3、…、N分钟内通过awk获取日志文件的请求计数 例如,用户设置一个与一分钟间隔相对应的变量($interval=1)–输出应如下 09:01 - 09:02 count of requests 09:02 - 09:03 count of requests ... 09:16 - 09:17 count of requests 间隔2分钟: 09:01 - 09:03 count of requests 09:03 - 09:05 count of reques

我需要在1、2、3、…、N分钟内通过awk获取日志文件的请求计数

例如,用户设置一个与一分钟间隔相对应的变量(
$interval=1
)–输出应如下

09:01 - 09:02  
count of requests 
09:02 - 09:03
count of requests
 ... 
09:16 - 09:17 
count of requests
间隔2分钟:

09:01 - 09:03 
count of requests
09:03 - 09:05
count of requests
...
09:15 - 09:17
count of requests
我曾尝试通过bash脚本解决分配的任务,但它仅在一分钟的时间间隔内有效:

#/bin/bash

对于((i=1;iOk),它有点笨拙,在文件的每一行上运行
date
,并
读取两次(ugh),但请尝试以下操作:

#! /usr/bin/env bash
declare -i interval=${1:- 1} timestamp=0 elapsed=0 last=0 cnt=0
declare H M lh lm                                              # reporting vars
echo "Counting hits per ${interval}m interval"
interval=$(( interval * 60 ))                                  # convert to seconds
while IFS="][" read x d x && [[ -n "$d" ]]                     # pull just the the timestamp
do IFS="$IFS/:" read day mon year h m s x <<< "$d"             # parse components
   timestamp="$( date -d "$mon $day $year $h:$m:$s" +'%s' )"   # reformat to epoch secs
   if (( last ))
   then elapsed="$(( timestamp - last ))"                      # check elapsed time since last
        if (( elapsed > interval ))
        then printf "$H:$M - $lh:$lm\n$cnt\n"
             cnt=1
             last=$timestamp
             H=$h; M=$m;
             lh=$h; lm=$m;
        else cnt+=1
             lh=$h; lm=$m;
        fi
   else last=$timestamp                                        # assure initialized
        H=$h; M=$m;
        lh=$h; lm=$m;
        cnt=1
   fi
done < "$yourLogFile" # you'll need to set this
printf "$H:$M - $lh:$lm\n$cnt\n"
那应该是你的开始


一个原始的all-
awk
版本-

#! /usr/bin/env bash

awk -v interval=${1:- 1} '
BEGIN { last = 0; cnt = 0;
   printf "Counting hits per %sm interval\n", interval
   interval = interval * 60
}

/:/ {
  split( $0,     tmp,  "[" )
  split( tmp[2], t2,   "]" )
  split( t2[1],  tmp,  ":" ) # tmp  now date, H, M, S, tz
  H = tmp[2]; M = tmp[3]
  split( tmp[1], dtmp, "/" ) # dtmp now dd, Mon, yyyy
  switch ( dtmp[2] ) {
   case "Jan": Mon = "01"; break; case "Feb": Mon = "02"; break; case "Mar": Mon = "03"; break;
   case "Apr": Mon = "04"; break; case "May": Mon = "05"; break; case "Jun": Mon = "06"; break;
   case "Jul": Mon = "07"; break; case "Aug": Mon = "08"; break; case "Sep": Mon = "09"; break;
   case "Oct": Mon = "10"; break; case "Nov": Mon = "11"; break; case "Dec": Mon = "12"; break;
  }
  tstr = sprintf( "%s %s %s %s %s %s", dtmp[3], Mon, dtmp[1], H, M, "00" )
  epoch = mktime( tstr )

  if ( last ) {
     elapsed = epoch - last
     if ( elapsed > interval ) {
        printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt
        cnt = 1; last = epoch; h=H;  m=M; lh=H; lm=M;
     } else { cnt+=1; lh=H; lm=M; }
  } else { last = epoch; h=H; m=M; lh=H; lm=M; cnt=1; }
  next;
}

END { printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt }

' ${2:-defaultFileName}

对于大文件输入,这应该更有效。

您的需求在几个方面还不清楚(请参阅我在您问题下的评论),但以下是使用GNU awk进行时间函数的一般方法:

$ cat tst.awk
BEGIN { FS="[][]" }
{
    split($2,t,/[\/: ]/)
    t[2] = (index("JanFebMarAprMayJunJulAugSepOctNovDec",t[2])+2)/3
    epochMins = int(mktime(t[3] " " t[2] " " t[1] " " t[4] " " t[5] " 0") / 60)
    if (NR == 1) {
        begMins = epochMins
    }
    endMins = epochMins
    cnt[epochMins]++
}
END {
    for (epochMins = begMins; epochMins <= endMins; epochMins+=interval) {
        begTime = strftime("%H:%M",epochMins*60)
        endTime = strftime("%H:%M",(epochMins+interval)*60)
        tot = 0
        begInterval = epochMins
        endInterval = epochMins + interval - 1
        for (i=begInterval; i<=endInterval; i++) {
            tot += cnt[i]
        }
        print begTime, endTime ORS tot
    }
}


IMHO,如果您没有这样做,请尝试在示例中添加虚拟IP地址。如果间隔大于1分钟,则无法使用正则表达式以合理的方式解决此问题!您需要在此处执行时间/日期算术,这使得awk对于任务来说是一个糟糕的选择。@user1934428谢谢您的反馈。您知道解决pro的其他方法吗问题?使用awk是没有必要的。任何具有良好日期算法的语言都可以。我会使用Ruby,但Perl或Python也可以。毕竟,您需要计算两个时间点之间的时间段来解决您的问题。还要删除bash-您使用bash仅循环文件,并为每一行启动一个单独的子进程。Doe没什么意义。在你的问题中有两件事是不清楚的:1)在1分钟的时间间隔内,09:04发生的事情应该精确地计算在09:03-09:04区间内,还是09:04到09:05区间内,或者两者兼而有之?2)输出应该列出“时间间隔”中从最早到最晚的所有时间吗增量还是仅列出存在值的增量?请更新您的示例输入/输出以涵盖这些情况。如果您在
awk
中重写整个内容,这可能会更有效率。我认为您应该能够使用为此列出的工具。如果我有时间,我会尝试POC。作为回应,第4行
(索引(“Janfebmarapmayjunjulaugsepoctnovdec”,t[2])+2)/3
是将月份名称(如Mar)转换为数字(3)的惯用方法。只要想想它在做什么,如果不明显,就为中间步骤添加一些
打印。第5行只是调用mktime()将时间戳转换为自历元起的秒,然后将其转换为分钟。请参见gawk手册页中的mktime。
#! /usr/bin/env bash

awk -v interval=${1:- 1} '
BEGIN { last = 0; cnt = 0;
   printf "Counting hits per %sm interval\n", interval
   interval = interval * 60
}

/:/ {
  split( $0,     tmp,  "[" )
  split( tmp[2], t2,   "]" )
  split( t2[1],  tmp,  ":" ) # tmp  now date, H, M, S, tz
  H = tmp[2]; M = tmp[3]
  split( tmp[1], dtmp, "/" ) # dtmp now dd, Mon, yyyy
  switch ( dtmp[2] ) {
   case "Jan": Mon = "01"; break; case "Feb": Mon = "02"; break; case "Mar": Mon = "03"; break;
   case "Apr": Mon = "04"; break; case "May": Mon = "05"; break; case "Jun": Mon = "06"; break;
   case "Jul": Mon = "07"; break; case "Aug": Mon = "08"; break; case "Sep": Mon = "09"; break;
   case "Oct": Mon = "10"; break; case "Nov": Mon = "11"; break; case "Dec": Mon = "12"; break;
  }
  tstr = sprintf( "%s %s %s %s %s %s", dtmp[3], Mon, dtmp[1], H, M, "00" )
  epoch = mktime( tstr )

  if ( last ) {
     elapsed = epoch - last
     if ( elapsed > interval ) {
        printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt
        cnt = 1; last = epoch; h=H;  m=M; lh=H; lm=M;
     } else { cnt+=1; lh=H; lm=M; }
  } else { last = epoch; h=H; m=M; lh=H; lm=M; cnt=1; }
  next;
}

END { printf "%s:%s - %s:%s\n%s\n", h, m, lh, lm, cnt }

' ${2:-defaultFileName}
$ cat tst.awk
BEGIN { FS="[][]" }
{
    split($2,t,/[\/: ]/)
    t[2] = (index("JanFebMarAprMayJunJulAugSepOctNovDec",t[2])+2)/3
    epochMins = int(mktime(t[3] " " t[2] " " t[1] " " t[4] " " t[5] " 0") / 60)
    if (NR == 1) {
        begMins = epochMins
    }
    endMins = epochMins
    cnt[epochMins]++
}
END {
    for (epochMins = begMins; epochMins <= endMins; epochMins+=interval) {
        begTime = strftime("%H:%M",epochMins*60)
        endTime = strftime("%H:%M",(epochMins+interval)*60)
        tot = 0
        begInterval = epochMins
        endInterval = epochMins + interval - 1
        for (i=begInterval; i<=endInterval; i++) {
            tot += cnt[i]
        }
        print begTime, endTime ORS tot
    }
}
$ awk -v interval=1 -f tst.awk file
09:01 09:02
6
09:02 09:03
0
09:03 09:04
0
09:04 09:05
0
09:05 09:06
0
09:06 09:07
8
09:07 09:08
0
09:08 09:09
0
09:09 09:10
0
09:10 09:11
0
09:11 09:12
14
09:12 09:13
0
09:13 09:14
0
09:14 09:15
4
$ awk -v interval=2 -f tst.awk file
09:01 09:03
6
09:03 09:05
0
09:05 09:07
8
09:07 09:09
0
09:09 09:11
0
09:11 09:13
14
09:13 09:15
4
$ awk -v interval=5 -f tst.awk file
09:01 09:06
6
09:06 09:11
8
09:11 09:16
18