C 使用无嵌套while循环的滑动窗口删除注释_C_Sliding Window

C 使用无嵌套while循环的滑动窗口删除注释

C 使用无嵌套while循环的滑动窗口删除注释,c,sliding-window,C,Sliding Window,我试图用c代码从c文件中删除注释和字符串。我将继续对示例进行评论。我有一个滑动窗口，所以在任何给定时刻，我只有字符n和n-1。如果可能的话，我正在尝试找出一种不使用嵌套whiles的算法，但是我需要一种算法来通过输入getchar。我的第一个想法是通过while-through找到whenn=*和（n-1）=/，然后通过while-through-直到n=/和（n-1）=*，但考虑到这已经嵌套了，我觉得效率很低。如果有必要，我可以这样做，但我想知道是否有人有更好的解决方案使用一个while循环

我试图用c代码从c文件中删除注释和字符串。我将继续对示例进行评论。我有一个滑动窗口，所以在任何给定时刻，我只有字符

和

n-1

。如果可能的话，我正在尝试找出一种不使用嵌套

whiles

的算法，但是我需要一种算法来通过输入

getchar

。我的第一个想法是通过while-through找到when

n=*和（n-1）=/

，然后通过while-through-直到

n=/和（n-1）=*

，但考虑到这已经嵌套了，我觉得效率很低。如果有必要，我可以这样做，但我想知道是否有人有更好的解决方案

使用一个

while

循环编写的算法可能如下所示：

while ((c = getchar()) != EOF)
{
    ... // looking at the byte that was just read

    if (...) // the symbol is not inside a comment
    {
        putchar(c);
    }
}

要确定输入的字符是否属于注释，可以使用状态机。在下面的示例中，它有4个状态；还存在遍历到下一个状态的规则

int state = 0;
int next_state;
while ((c = getchar()) != EOF)
{
    switch (state)
    {
        case 0: next_state = (c == '/' ? 1 : 0); break;
        case 1: next_state = (c == '*' ? 2 : c == '/' ? 1 : 0); break;
        case 2: next_state = (c == '*' ? 3 : 2); break;
        case 3: next_state = (c == '/' ? 0 : c == '*' ? 3 : 2); break;
        default: next_state = state; // will never happen
    }

    if (state == 1 && next_state == 0)
    {
        putchar('/'); // for correct output when a slash is not followed by a star
    }
    if (state == 0 && next_state == 0)
    {
        putchar(c);
    }
    state = next_state;
}

上面的例子非常简单：在非注释上下文中，如在C字符串中，

/*

，它不能正常工作；它不支持

注释等。

正确地执行此操作比一开始想象的要复杂得多，这里的其他注释巧妙地指出了这一点。我强烈建议编写一个表驱动的FSM，使用状态转换图来获得正确的转换。在国际海事组织（IMO）看来，试图用案例陈述来做任何事情都是极易出错的

这里有一个点/图格式的图表，您可以直接从中编写状态表。注意，我根本没有测试过这个，所以YMMV

该图的语义是，当您看到

时，如果该状态中的其他输入都不匹配，则为下降。文件结尾在除

S0

以外的任何状态下都是错误，未明确列出的任何字符或

也是错误。除了在注释中（

S4

和

S5

）以及检测到开始注释（

S1

）外，扫描的每个字符都会打印出来。在检测开始注释时，您必须缓冲字符，如果是错误的开始，则打印字符，否则在确定它确实是注释时将其丢弃

在点图中，

sq

是单引号

，dq
是双引号“

有向图状态机{
rankdir=LR；
size=“8,5”；
节点[形状=双圆]；S0/*init*/；
节点[形状=圆]；
S0/*init*/->S1/*begin_cmt*/[label=“”/”]；
S0/*init*/->S2/*in_str*/[label=dq]；
S0/*init*/->S3/*in_ch*/[label=sq]；
S0/*init*/->S0/*init*/[label=”“]；
S1/*在slc*/[label=“”/”]中开始>S4/*；
S1/*在mlc*/[label=“*””]中开始>S5/*；
S1/*开始_cmt*/->S0/*初始*/[label=”“]；
S1/*begin\u cmt*/->S1/*begin\u cmt*/[label=“'\\n'”；//句柄“/\n/”和“/\n*”
S2/*in_str*/->S0/*init*/[label=“\\'”]；
S2/*in_str*/->S6/*str_esc*/[label=“\\\”]；
S2/*in_str*/->S2/*in_str*/[label=”“]；
S3/*in_ch*/->S0/*init*/[label=sq]；
S4/*in_-slc*/->S4/*in_-slc*/[label=”“]；
S4/*in_slc*/->S0/*init*/[label=“\\n'”；
S5/*in_-mlc*/->S7/*end_-mlc*/[label=“'*'”]；
S5/*in_-mlc*/->S5/*in_-mlc*/[标签=”“]；
S7/*end_-mlc*/->S7/*end_-mlc*/[label=“'*'|'\\n'”；
S7/*end_mlc*/->S0/*init*/[label=“”/”]；
S7/*结束_-mlc*/->S5/*在_-mlc*/[标签=”“]；
S6/*str_esc*/->S8/*oct*/[label=“[0-3]”；
S6/*str_esc*/->S9/*hex*/[label=“'x'”；
S6/*str_esc*/->S2/*in_str*/[label=”“]；
S8/*oct*/->S10/*o1*/[label=“[0-7]”；
S10/*o1*/->S2/*in_str*/[label=“[0-7]”；
S9/*hex*/->S11/*h1*/[label=hex]；
S11/*h1*/->S2/*in_str*/[label=hex]；
S3/*in_ch*/->S12/*ch esc*/[label=“\\\”]；
S3/*in_ch*/->S13/*out_ch*/[label=”“]；
S13/*out_ch*/->S0/*init*/[label=sq]；
S12/*Chu esc*/->S3/*in_ch*/[label=sq]；
S12/*Chu esc*/->S12/*Chu esc*/[标签=”“]；
}
由于您只希望在缓冲区中使用两个字符，而在while循环中仅使用一个字符，因此我建议使用第三个字符来跟踪您的状态（无论是否跳过文本）。我为您准备了一个测试程序，其中包含解释逻辑的内联注释：
// Program to strip comments and strings from a C file
//
//  Build:
//     gcc -o strip-comments strip-comments.c
//
//  Test:
//     ./strip-comments strip-comments.c

#include <stdio.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>

/* The following is a block of strings, and comments for testing
 * the code.
 */
/* test if three comments *//* chained together */// will be removed.
static int value = 128 /* test comment within valid code *// 2;
const char * test1 = "This is a test of \" processing"; /* testing inline comment */
const char * test2 = "this is a test of \n within strings."; // testing inline comment
// this is a the last test


int strip_c_code(FILE * in, FILE * out)
{
   char      buff[2];
   char      skipping;

   skipping = '\0';
   buff[0]  = '\0';
   buff[1]  = '\0';

   // loop through the file
   while((buff[0] =  fgetc(in)) != EOF)
   {
      // checking for start of comment or string block
      if (!(skipping))
      {
         // start skipping in "//"  comments
         if ((buff[1] == '/') && (buff[0] == '/'))
            skipping = '/';

         // start skipping in "/*"  comments
         else if ((buff[1] == '/') && (buff[0] == '*'))
            skipping = '*';

         // start skipping at start of strings, but not character assignments
         else if ( ((buff[1] != '\'') && (buff[0] == '"')) &&
                   ((buff[1] != '\\') && (buff[0] == '"')) )
         {
            fputc(buff[1], out);
            skipping = '"';
         };

         // clear buffer so that processed characters are not interpreted as
         // end of skip characters.
         if ((skipping))
         {
            buff[0] = '\0';
            buff[1] = '\0';
         };
      };

      // check for characters which terminate skip block
      switch(skipping)
      {
         // if skipping "//" comments, look for new line
         case '/':
         if (buff[1] == '\n')
            skipping = '\0';
         break;

         // if skipping "/*" comments, look for "*/" terminating string
         case '*':
         if ((buff[1] == '*') && (buff[0] == '/'))
         {
            buff[0]  = '\0';
            buff[1]  = '\0';
            skipping = '\0';
         };
         break;

         // if skipping strings, look for terminating '"' character
         case '"':
         if ((buff[1] != '\\') && (buff[0] == '"'))
         {
            skipping = '\0';
            buff[0]  = '\0';
            buff[1]  = '\0';
            fprintf(out, "NULL"); // replace string with NULL
         };
         break;

         default:
         break;
      };

      // if not skipping, write character out
      if ( (!(skipping)) && ((buff[1])) )
         fputc(buff[1], out);

      // shift new character to old character position
      buff[1] = buff[0];
   };

   // verify that the comment or string was terminated properly
   if ((skipping))
   {
      fprintf(stderr, "Unterminated comment or string\n");
      return(-1);
   };

   // write last character
   fputc(buff[1], out);

   return(0);
}


int main(int argc, char * argv[])
{
   FILE * fs;

   if (argc != 2)
   {
      fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
      return(1);
   };

   if ((fs = fopen(argv[1], "r")) == NULL)
   {
      perror("fopen()");
      return(1);
   };

   strip_c_code(fs, stdout);

   fclose(fs);

   return(0);
}

/* end of source file */

//从C文件中删除注释和字符串的程序
//
//建造：
//gcc-o条注释条注释.c
//
//测试：
///strip comments strip comments.c
#包括
#包括
#包括
#包括
#包括
#包括
/*下面是一个字符串块，以及用于测试的注释
*密码。
*/
/*测试链接在一起的三条注释*//*是否将被删除。
静态int值=128/*有效代码内的测试注释*//2；
const char*test1=“这是对\“处理”/*测试内联注释*/
const char*test2=“这是对字符串中\n的测试。”；//测试内联注释
//这是最后一次测试
int strip_c_代码（文件*输入，文件*输出）
{
字符buff[2]；
跳过字符；
正在跳过='\0'；
buff[0]='\0'；
buff[1]='\0'；
//循环浏览文件
while（（buff[0]=fgetc（in））！=EOF）
{
//检查注释或字符串块的开头
如果（！（跳过））
{
//开始跳过“/”注释
如果（（buff[1]=='/'）&&（buff[0]=='/'））
跳过='/'；
//开始跳过“/*”注释
如果（（buff[1]='/'）&&（buff[0]='*'））
跳过='*'；
// Program to strip comments and strings from a C file
//
//  Build:
//     gcc -o strip-comments strip-comments.c
//
//  Test:
//     ./strip-comments strip-comments.c

#include <stdio.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>

/* The following is a block of strings, and comments for testing
 * the code.
 */
/* test if three comments *//* chained together */// will be removed.
static int value = 128 /* test comment within valid code *// 2;
const char * test1 = "This is a test of \" processing"; /* testing inline comment */
const char * test2 = "this is a test of \n within strings."; // testing inline comment
// this is a the last test


int strip_c_code(FILE * in, FILE * out)
{
   char      buff[2];
   char      skipping;

   skipping = '\0';
   buff[0]  = '\0';
   buff[1]  = '\0';

   // loop through the file
   while((buff[0] =  fgetc(in)) != EOF)
   {
      // checking for start of comment or string block
      if (!(skipping))
      {
         // start skipping in "//"  comments
         if ((buff[1] == '/') && (buff[0] == '/'))
            skipping = '/';

         // start skipping in "/*"  comments
         else if ((buff[1] == '/') && (buff[0] == '*'))
            skipping = '*';

         // start skipping at start of strings, but not character assignments
         else if ( ((buff[1] != '\'') && (buff[0] == '"')) &&
                   ((buff[1] != '\\') && (buff[0] == '"')) )
         {
            fputc(buff[1], out);
            skipping = '"';
         };

         // clear buffer so that processed characters are not interpreted as
         // end of skip characters.
         if ((skipping))
         {
            buff[0] = '\0';
            buff[1] = '\0';
         };
      };

      // check for characters which terminate skip block
      switch(skipping)
      {
         // if skipping "//" comments, look for new line
         case '/':
         if (buff[1] == '\n')
            skipping = '\0';
         break;

         // if skipping "/*" comments, look for "*/" terminating string
         case '*':
         if ((buff[1] == '*') && (buff[0] == '/'))
         {
            buff[0]  = '\0';
            buff[1]  = '\0';
            skipping = '\0';
         };
         break;

         // if skipping strings, look for terminating '"' character
         case '"':
         if ((buff[1] != '\\') && (buff[0] == '"'))
         {
            skipping = '\0';
            buff[0]  = '\0';
            buff[1]  = '\0';
            fprintf(out, "NULL"); // replace string with NULL
         };
         break;

         default:
         break;
      };

      // if not skipping, write character out
      if ( (!(skipping)) && ((buff[1])) )
         fputc(buff[1], out);

      // shift new character to old character position
      buff[1] = buff[0];
   };

   // verify that the comment or string was terminated properly
   if ((skipping))
   {
      fprintf(stderr, "Unterminated comment or string\n");
      return(-1);
   };

   // write last character
   fputc(buff[1], out);

   return(0);
}


int main(int argc, char * argv[])
{
   FILE * fs;

   if (argc != 2)
   {
      fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
      return(1);
   };

   if ((fs = fopen(argv[1], "r")) == NULL)
   {
      perror("fopen()");
      return(1);
   };

   strip_c_code(fs, stdout);

   fclose(fs);

   return(0);
}

/* end of source file */