Bash 通过日志文件查找cc攻击IP使用shell脚本

Bash 通过日志文件查找cc攻击IP使用shell脚本,bash,shell,awk,sed,gawk,Bash,Shell,Awk,Sed,Gawk,我有这样的历史web日志文件: 157.15.14.19 - - 06 Sep 2016 09:13:10 +0300 "GET /index.php?id=1 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" 157.15.14.19 - - 06 Sep 2016 09:13:11 +0300 "GET /index.php?id=2 HT

我有这样的历史web日志文件:

157.15.14.19 - -  06 Sep 2016 09:13:10 +0300  "GET /index.php?id=1 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:13:11 +0300  "GET /index.php?id=2 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:13:12 +0300  "GET /index.php?id=3 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:14:13 +0300  "GET /index.php?id=4 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:14:14 +0300  "GET /index.php?id=5 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:15:15 +0300  "GET /index.php?id=6 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:15:16 +0300  "GET /index.php?id=7 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:15:17 +0300  "GET /index.php?id=8 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:16:10 +0300  "GET /index.php?id=9 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:16:10 +0300  "GET /index.php?id=10 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
8.8.8.8 - -  06 Sep 2016 09:17:10 +0300  "GET /index.php?id=11 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
9.9.9.9 - -  06 Sep 2016 09:17:10 +0300  "GET /index.php?id=12 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:18:10 +0300  "GET /index.php?id=13 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:19:10 +0300  "GET /index.php?id=14 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:19:10 +0300  "GET /index.php?id=15 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:20:10 +0300  "GET /index.php?id=15 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
123.123.123.123 - -  06 Sep 2016 09:21:10 +0300  "GET /index.php?id=15 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
157.15.14.19 - -  06 Sep 2016 09:22:10 +0300  "GET /index.php?id=15 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
我只想通过昨天的web日志文件找出cc攻击IP

在本例中,我签署了cc攻击:

每5分钟,相同的远程ip请求计数超过5,ip将发起cc攻击并打印它

日志文件是全天的,并且只使用bash脚本,就像awk、cat、gawk、sed等等

请给我一些建议,谢谢


更新:

我尝试使用测试脚本(每2分钟相同的请求计数>5)

但是,代码太糟糕了,需要优化

awk -v Interval=5 -v Trig=5 -F '[[:blank:]]*|:' '
        {
        # using format log
        #  157.15.14.19 - -  06 Sep 2016 09:13:10 +0300  "GET /index.php?id=1 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
        # $1            2 3  4  5   6    7  8  9  10      11 ...

        ThisTime = $7 * 60 + $8
        #if new cycle (so this line is not in the cycle)
        if ( ThisTime > ( LastTic + Interval ) ) {
          # check and print last cycle hit
          for( IP in IPCounts) if ( IPCounts[ IP] > Trig) print LastTime " " IP " : " IPCounts[ IP]

          # reset reference
          split( "", IPCounts)
          LastTime = $4 " " $5 " " $6 " " $7 ":" sprintf( "%2d", ( $8 - ( $8 % Interval) )) ":00"
          LastTic = $7 * 60 + ( $8 - ( $8 % Interval) )
          }
        # add this line to new cycle
        IPCounts[ $1]++
        }

        END {
          # print last cycle
          for( IP in IPCounts) if ( IPCounts[ IP] > Trig) print LastTime " " IP " : " IPCounts[ IP]
          }
      ' YourFile


# for format of log
#  op.g.cc 124.145.36.121 - - [21/Nov/2016:03:38:02 +0800] ==> 172.11.0.238:80 "POST ...
# $1       2              3 4 5            6  7  8  9      10   11 ...  

# change:
#  $7 by $6, $8 by $7
#  LastTime = $5 ":" $6 ":" sprintf( "%2d", ( $7 - ( $7 % Interval) )) ":00 +800]"
#  IPCounts[ $2]++
注:

  • 快速而肮脏地选择时间(您提到每天1个日志)。如果需要更高的精度,请使用mkftime使用实时历元时间参考
  • Trig是计数触发级别(5次),Interval是循环时间(5分钟)

op.g.cc 124.145.36.121---[21/Nov/2016:03:38:02+0800]=>172.11.0.238:80“POST/zabbix/jsrpc.php?output=json-rpc HTTP/1.1“200 77”0.316“Mozilla/5.0(X11;Linux x86_64)AppleWebKit/537.36(KHTML,像Gecko)”-“-”上游_响应时间“0.316”请求时间“0.316”dd.g.cc 60.223.223.54/0800:02]==>172.11.0.53:8012“GET/?ts=1479670682&uid=&mid=&cs=HTTP/1.1“200 479”0.039”“-“Dalvik/2.1.0(Linux;U;Android 5.0.2;Redmi Note 2 MIUI/V8.0.2.0.LHMCNDG)”-“5.0.1.0002”上游响应时间“0.039”请求时间“0.039”我的日志格式更改如下,如何修改脚本。感谢这样的修改:
op.g.cc 124.145.36.121---[21/Nov/2016:03:38:02+0800]==>172.11.0.238:80“POST/zabbix/jsrpc.php?output=json-rpc HTTP/1.1“200 77”0.316”“op.g.cc/?ddreset=1&sid=”;“Mozilla/5.0(X11;Linux x86_64)AppleWebKit/537.36(KHTML,像Gecko)”-“上游响应时间”0.316“请求时间”0.316“
awk -v Interval=5 -v Trig=5 -F '[[:blank:]]*|:' '
        {
        # using format log
        #  157.15.14.19 - -  06 Sep 2016 09:13:10 +0300  "GET /index.php?id=1 HTTP/1.1" 200 16977 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
        # $1            2 3  4  5   6    7  8  9  10      11 ...

        ThisTime = $7 * 60 + $8
        #if new cycle (so this line is not in the cycle)
        if ( ThisTime > ( LastTic + Interval ) ) {
          # check and print last cycle hit
          for( IP in IPCounts) if ( IPCounts[ IP] > Trig) print LastTime " " IP " : " IPCounts[ IP]

          # reset reference
          split( "", IPCounts)
          LastTime = $4 " " $5 " " $6 " " $7 ":" sprintf( "%2d", ( $8 - ( $8 % Interval) )) ":00"
          LastTic = $7 * 60 + ( $8 - ( $8 % Interval) )
          }
        # add this line to new cycle
        IPCounts[ $1]++
        }

        END {
          # print last cycle
          for( IP in IPCounts) if ( IPCounts[ IP] > Trig) print LastTime " " IP " : " IPCounts[ IP]
          }
      ' YourFile


# for format of log
#  op.g.cc 124.145.36.121 - - [21/Nov/2016:03:38:02 +0800] ==> 172.11.0.238:80 "POST ...
# $1       2              3 4 5            6  7  8  9      10   11 ...  

# change:
#  $7 by $6, $8 by $7
#  LastTime = $5 ":" $6 ":" sprintf( "%2d", ( $7 - ( $7 % Interval) )) ":00 +800]"
#  IPCounts[ $2]++