
从awk中的三个以上连续行计算值,awk,Awk,我正在处理一个文件,如下所示: site Date time value1 value2 0023 2014-01-01 00:00 32.0 23.7 0023 2014-01-01 01:00 38.0 29.9 0023 2014-01-01 02:00 85.0 26.6 0023 2014-01-01 03:00 34.0 25.3 0023 2014-01-01 04:00 37.0 23.8 0023 2014-01-01 05:00 80.0 20.3 0023 2014-01-


site Date time value1 value2
0023 2014-01-01 00:00 32.0 23.7
0023 2014-01-01 01:00 38.0 29.9
0023 2014-01-01 02:00 85.0 26.6
0023 2014-01-01 03:00 34.0 25.3
0023 2014-01-01 04:00 37.0 23.8
0023 2014-01-01 05:00 80.0 20.3
0023 2014-01-01 06:00 90.0 20.0
0023 2014-01-01 07:00 180.0 20.0
0023 2014-01-01 08:00 30.0 20.0


0023 2014-01-01 05:00 80.0 20.3 1
0023 2014-01-01 06:00 90.0 20.0 1
0023 2014-01-01 07:00 180.0 20.0 1
0023 2014-06-30 23:00 200.0 30.3 2
0023 2014-07-01 00:00 303.0 30.3 2
0023 2014-07-01 01:00 134.0 30.3 2
0025 2014-07-01 01:00 136.0 25.3 1           
0025 2014-07-01 02:00 116.0 25.3 1
0025 2014-07-01 03:00 106.0 25.3 1



#cat in2

还有我的awk程序:#cat msr

(($#!=1))&&{echo“Usage$0 inp_file”;退出1;}
$1~“[^0-9]”| | NF=5 | |$4$5~“[^0-9.]”{下一步;}|不跳过任何数据行
$1 != stn{chck_prnt();stn=$1;stc=1;}#新建站点,将计数器设置为1
#prnt的func(如果需要)&clr filo、mxvl


0023 2014-01-01 21:00 90.0 20
 0023 2014-01-01 22:00 80.0 20
 0023 2014-01-01 23:00 130.0 20
 0023 2014-01-02 16:00 130.0 20
 0023 2014-01-02 17:00 200.0 30.3
 0023 2014-01-02 18:00 303.0 30.3

0023 2014-01-01 21:00 90.0 20 1
0023 2014-01-01 22:00 80.0 20 1
0023 2014-01-01 23:00 130.0 20 1
0023 2014-01-02 16:00 130.0 20 1
0023 2014-01-02 17:00 200.0 30.3 1
0023 2014-01-02 18:00 303.0 30.3 1

0023 2014-01-01 21:00 90.0 20   1  
0023 2014-01-01 22:00 80.0 20    1
0023 2014-01-01 23:00 130.0 20   1
0023 2014-01-02 16:00 130.0 20   2
0023 2014-01-02 17:00 200.0 30.3 2 
0023 2014-01-02 18:00 303.0 30.3 2


@Kelly,你的评论不适合回答我的问题(在我看来)。 但我试着再次猜测真正的规格是什么

我希望关键的想法是: 当两条连续线之间的时间差大于 1小时我们还需要打印候选人(来自filo),如果“标准”适用于他们

我需要创建一个加号函数来计算时间差。 当filo不是空的时候,在主体部分有一个加号来调用它。 我还放了一个过滤行来检查输入中的日期和时间格式

请注意: 我的chk1h()函数就足够了, 但还有其他可能计算时间戳之间的时间差:


2/bash shell日期命令,例如:日期“+%s”-d“2014-03-28 11:48:30”


如果还存在一些问题,您需要给出一个较长且完整的代表性输入序列。 不要给出两个列表(输入和输出)。 输入列表就足够了,在要显示的行的末尾写上相应的站点计数器。 不打印的行没有第6列

cat msr2

(($#!=1))&& { echo "Usage $0 inp_file"; exit 1; }
awk '
 BEGIN {ix=0; stn=-1;}                             # ix: index of filo, stn: non-exist
 $1~"[^0-9]" || NF!=5 || $4 $5 ~ "[^0-9.]" {next;} # skip no data lines
 $2 " " $3 !~ "^[1-2][0-9][0-9][0-9]-[0-1][0-9]-[0-3][0-9] [0-2][0-9].00$" {  # dt&tm format filtering
     printf("Unexpected dt,tm format:\nInput ln:%d\nContent: %s\n",NR,$0); exit(1);}

 $1 != stn  {chck_prnt(); stn=$1; stc=1;}          # new site, set counter to 1
 $4 < 3*$5  {chck_prnt(); next;}                   # broken the col4>3*col5
 ix         {if(!chk1h(ld, lt, $2, $3))chck_prnt();} # filo not empty-->need to chck 1h diff
            {filo[ix++]=$0; ld=$2; lt=$3;          # put into filo & set last dt,tm,mxvl
 END        {chck_prnt();}                         # no more data line

 function chck_prnt(  i){                          # (i is a local var) 
    if(ix>=3 && mxvl>100){                         # prnt condition 
        for(i=0; i<ix; i++)printf("%s %d\n", filo[i],stc); # prnt all filo
        stc++;                                     # increase counter at site
    ix=0; mxvl=0;                                  # clr filo & maxvl

 function chk1h(d1,t1,d2,t2,  h1,h2,dy,dm,dd){     # ret 1 if dt of current ln - last dt in filo == 1h othrwise 0
   h1=substr(t1,1,2); h2=substr(t2,1,2);
   if(h2-h1==1 && d1==d2)return(1);                # most of case in same day 1h
   if(h1!=23||h2!="00")return(0);                  # not 1h
   split(d1,v1,"-"); split(d2,v2,"-");             # v1[1-3]=ymd last in filo, v2[1-3] current
   dy=v2[1]-v1[1];                                 # diff of year
   dm=v2[2]-v1[2];                                 # diff of month
   dd=v2[3]-v1[3];                                 # diff of day
   if(dd==1 && !dy && !dm)return(1);               # 23h-->00h 1h in same month
   if(v2[3]!="01")return(0);                       # not 1h
   if(v1[3]==31)                                   # chng of month, three type of prev month
       if(!dy && dm==1 || dy==1 && dm==-11)return(1); # 1h
       else return(0);                             # not 1h
       if("04 06 09 11" ~ v1[2] && !dy && dm==1)return(1); # 1h
       else return(0);                             # not 1h
   if("28 29" ~ v1[3] && v1[2]=="02" && !dy && dm==1)return(1); # 1h
   return(0);                                      # not 1h
' $1

./msr2 in3|awk 'NF==6&&$1!~"[^0-9]"{printf("%s %s %s %6.1f %6.1f %3u\n",$1,$2,$3,$4,$5,$6);}'

0023 2014-01-01 21:00 90.0 20   1  
0023 2014-01-01 22:00 80.0 20    1
0023 2014-01-01 23:00 130.0 20   1
0023 2014-01-02 16:00 130.0 20   2
0023 2014-01-02 17:00 200.0 30.3 2 
0023 2014-01-02 18:00 303.0 30.3 2
(($#!=1))&& { echo "Usage $0 inp_file"; exit 1; }
awk '
 BEGIN {ix=0; stn=-1;}                             # ix: index of filo, stn: non-exist
 $1~"[^0-9]" || NF!=5 || $4 $5 ~ "[^0-9.]" {next;} # skip no data lines
 $2 " " $3 !~ "^[1-2][0-9][0-9][0-9]-[0-1][0-9]-[0-3][0-9] [0-2][0-9].00$" {  # dt&tm format filtering
     printf("Unexpected dt,tm format:\nInput ln:%d\nContent: %s\n",NR,$0); exit(1);}

 $1 != stn  {chck_prnt(); stn=$1; stc=1;}          # new site, set counter to 1
 $4 < 3*$5  {chck_prnt(); next;}                   # broken the col4>3*col5
 ix         {if(!chk1h(ld, lt, $2, $3))chck_prnt();} # filo not empty-->need to chck 1h diff
            {filo[ix++]=$0; ld=$2; lt=$3;          # put into filo & set last dt,tm,mxvl
 END        {chck_prnt();}                         # no more data line

 function chck_prnt(  i){                          # (i is a local var) 
    if(ix>=3 && mxvl>100){                         # prnt condition 
        for(i=0; i<ix; i++)printf("%s %d\n", filo[i],stc); # prnt all filo
        stc++;                                     # increase counter at site
    ix=0; mxvl=0;                                  # clr filo & maxvl

 function chk1h(d1,t1,d2,t2,  h1,h2,dy,dm,dd){     # ret 1 if dt of current ln - last dt in filo == 1h othrwise 0
   h1=substr(t1,1,2); h2=substr(t2,1,2);
   if(h2-h1==1 && d1==d2)return(1);                # most of case in same day 1h
   if(h1!=23||h2!="00")return(0);                  # not 1h
   split(d1,v1,"-"); split(d2,v2,"-");             # v1[1-3]=ymd last in filo, v2[1-3] current
   dy=v2[1]-v1[1];                                 # diff of year
   dm=v2[2]-v1[2];                                 # diff of month
   dd=v2[3]-v1[3];                                 # diff of day
   if(dd==1 && !dy && !dm)return(1);               # 23h-->00h 1h in same month
   if(v2[3]!="01")return(0);                       # not 1h
   if(v1[3]==31)                                   # chng of month, three type of prev month
       if(!dy && dm==1 || dy==1 && dm==-11)return(1); # 1h
       else return(0);                             # not 1h
       if("04 06 09 11" ~ v1[2] && !dy && dm==1)return(1); # 1h
       else return(0);                             # not 1h
   if("28 29" ~ v1[3] && v1[2]=="02" && !dy && dm==1)return(1); # 1h
   return(0);                                      # not 1h
' $1
0023 2014-01-01 21:00 90.0 20 1
0023 2014-01-01 22:00 80.0 20 1
0023 2014-01-01 23:00 130.0 20 1
0023 2014-01-02 16:00 130.0 20 2
0023 2014-01-02 17:00 200.0 30.3 2
0023 2014-01-02 18:00 303.0 30.3 2
./msr2 in3|awk 'NF==6&&$1!~"[^0-9]"{printf("%s %s %s %6.1f %6.1f %3u\n",$1,$2,$3,$4,$5,$6);}'
0023 2014-01-01 21:00   90.0   20.0   1
0023 2014-01-01 22:00   80.0   20.0   1
0023 2014-01-01 23:00  130.0   20.0   1
0023 2014-01-02 16:00  130.0   20.0   2
0023 2014-01-02 17:00  200.0   30.3   2
0023 2014-01-02 18:00  303.0   30.3   2