Regex 如何区分我想要的东西和我不想要的东西';perl中的t? 我必须在代码C++代码上应用转换,而不是注释或预处理器语句。我遇到的问题是预处理器语句。基本上,我想要这样的东西: #!/usr/bin/perl my $file = $ARGV[0]; my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"}; my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'}; my ($rest, $code, $stuffToIgnore) = (""); open(my $inputFH, "<:raw:crlf", $file) or die "can't open $file for reading. $!"; open(my $outputFH, ">:raw:crlf", "$file.out") or die "can't open $file.out for writing. $!"; my $counter = 0; while (<$inputFH>) { $_ = "$rest$_"; do { ($code, $stuffToIgnore, $rest) = m( ((?: $doubleQuotedString # found a string |$singleQuotedString # found a string |(?:[^/]++|/[^*/]) # found something not a string, comment or preprocessor statement )*+ ) ((?: ^\s*+#.*$ | # preprocessor statement \s*+//.*$ | # line comment \s*+/\*(?:[^*]++|\*(?!/))*+\*/ # block comment )*+ ) ((?s).*) # rest )xm; ++$counter; goto BLOCK_READ if $stuffToIgnore ne "" or eof($inputFH); } while ($_ .= <$inputFH>); BLOCK_READ: defined $code or die "Unterminated block."; # do transformation on $code print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n"; print $outputFH "$code$stuffToIgnore"; }

Regex 如何区分我想要的东西和我不想要的东西';perl中的t? 我必须在代码C++代码上应用转换,而不是注释或预处理器语句。我遇到的问题是预处理器语句。基本上,我想要这样的东西: #!/usr/bin/perl my $file = $ARGV[0]; my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"}; my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'}; my ($rest, $code, $stuffToIgnore) = (""); open(my $inputFH, "<:raw:crlf", $file) or die "can't open $file for reading. $!"; open(my $outputFH, ">:raw:crlf", "$file.out") or die "can't open $file.out for writing. $!"; my $counter = 0; while (<$inputFH>) { $_ = "$rest$_"; do { ($code, $stuffToIgnore, $rest) = m( ((?: $doubleQuotedString # found a string |$singleQuotedString # found a string |(?:[^/]++|/[^*/]) # found something not a string, comment or preprocessor statement )*+ ) ((?: ^\s*+#.*$ | # preprocessor statement \s*+//.*$ | # line comment \s*+/\*(?:[^*]++|\*(?!/))*+\*/ # block comment )*+ ) ((?s).*) # rest )xm; ++$counter; goto BLOCK_READ if $stuffToIgnore ne "" or eof($inputFH); } while ($_ .= <$inputFH>); BLOCK_READ: defined $code or die "Unterminated block."; # do transformation on $code print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n"; print $outputFH "$code$stuffToIgnore"; },regex,perl,Regex,Perl,要查看正在运行的演示,请参阅。我认为,首先将正则表达式的复杂性分解为几个独立的部分,您将获得成功。只要将不同的可能匹配项拉入单独的if块,可读性就会有很大的不同 您还应该能够利用存在少量预处理器指令这一事实。您可以试试这个。 一些问题。您是否一次累积和处理超过一行数据 如果没有,我看不到任何关于扩展到多行的块注释的规定。 另外,块注释使用行注释,我看不到行注释继续的任何规定 编辑-5 @阿德里安-我打出了一个适合我的版本。下面是Perl代码和输出。 让我知道这是否奏效。 请注意,为#预处理器以及

要查看正在运行的演示,请参阅。

我认为,首先将正则表达式的复杂性分解为几个独立的部分,您将获得成功。只要将不同的可能匹配项拉入单独的
if
块,可读性就会有很大的不同

您还应该能够利用存在少量预处理器指令这一事实。

您可以试试这个。
一些问题。您是否一次累积和处理超过一行数据

如果没有,我看不到任何关于扩展到多行的块注释的规定。
另外,块注释使用行注释,我看不到行注释继续的任何规定

编辑-5 @阿德里安-我打出了一个适合我的版本。下面是Perl代码和输出。
让我知道这是否奏效。
请注意,为
#
预处理器以及
/
行注释添加了行延续代码
测试结果非常好

 my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"};
 my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'};

 my ($rest, $code, $stuffToIgnore) = ("");

 my $counter = 0;


 while (<DATA>)
 {
         $_ = "$rest$_";
         do
         {
                 ($code, $stuffToIgnore, $rest) = m~

                       (                              # (1), Code
                            (?:
                                 (?! ^ [^\S\n]* \# )            #  Not a preprocessor statement
                                 (?:
                                      $ doubleQuotedString           # found a string
                                   |  $ singleQuotedString           # found a string
                                   |  (?: [^/\n]++ | / [^*/\n] )     # found something not a string or comment or newline
                                   |  \n                             # newline
                                 )
                            )*+
                       )

                       (                              # (2), Ignore
                            (?:
                                 \s*+ ^ [^\S\n]* \#             # '#' Preprocessor statement with Continuation logic
                                 (?:
                                      [^\\]                          # any Non-Continuation character ^\
                                   |  \\ \n?                         # Or,  any Continuation character followed by optional newline
                                 )*?
                                 (?<! \\ )
                                 \n
                              |
                                 \s*+ /\*                       # '/**_**/' Block comment
                                 (?:
                                      [^*]++
                                   |  \*
                                      (?! / )
                                 )*+
                                 \*/
                              |
                                 \s*+ //                        # '//' Line comment, with Continuation logic
                                 (?:
                                      [^\\]
                                   |  \\ \n?

                                 )*?
                                 (?<! \\ )
                                 \n
                            )*+
                       )

                       (                              # (3), Rest
                            (?s)
                            .*
                       )
                  ~xm;

                  ++$counter;
                  goto BLOCK_READ if $stuffToIgnore ne "" or eof(DATA);

         } while ($_ .= <DATA>);

         BLOCK_READ:
         defined $code or die "Unterminated block.";

         # do transformation on $code

         print "CODE:   >>$code<<\nIGNORE: >>$stuffToIgnore<<\n---------------------------------------\n";
 }


 __DATA__

 # hello \
 there
 # how
 # are
 #you

 #pragma once

 #include "EditState.h"
 #include "MyDoc.h"

 // InputEdit

 class CInputEdit : public CEdit
 {
    DECLARE_DYNAMIC(CInputEdit)

 public:
    CInputEdit();
    virtual ~CInputEdit();

 // Attributes
 protected:

    DECLARE_MESSAGE_MAP()

    BOOL m_bTrackingMenu;

 };
my$doubleQuotedString=q{”(?>[^“\\\\\]+\\\\\{2}}\\\\\\\\\\\\\\\(?s)。*+};
我的$singleQuotedString=q{'(?>[^'\\\\\]++\\\\\\{2}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\(?s)。*+};
我的($rest,$code,$stuffToIgnore)=(“”);
我的$counter=0;
而()
{
$\=“$rest$\”;
做
{
($code、$stuffToIgnore、$rest)=m~
(#(1),代码
(?:
(?!^[^\S\n]*\\\)\不是预处理器语句
(?:
$doubleQuotedString#找到一个字符串
|$singleQuotedString#找到一个字符串
|(?:[^/\n]++|/[^*/\n])#找到的内容不是字符串、注释或换行符
|\n#新行
)
)*+
)
(#)(2),忽略
(?:
\带连续逻辑的s*+^[^\s\n]*\\\\\\\\\\'\\'预处理器语句
(?:
[^\\]#任何非连续字符^\
|\\\n?\或任何后跟可选换行符的连续字符
)*?
(?);
块读取:
已定义$code或die“未端接块”;
#对$code进行转换

“打印”代码:>>$CODE$StufftoIgnorey是的,我一次要累积多行代码进行转换。这就是做/while循环的目的。至于多行块注释,它们是通过#block comment部分提供的。这也会消耗块注释中嵌入的任何行注释。关于行注释继续。也适用于预处理器继续。Edit2:这将不起作用。
#发现一些不…
将捕获预处理器语句,这是主要问题。Edit:感谢您的帮助,但这并不重要,我已经控制住了。仅供参考,
[^\\]<代码> >匹配< >代码> \r>代码> <代码> \n>代码>虽然我认为-VE断言可能持有一些承诺。@阿德里安-见Edt3.3。如果您期望预处理器位于文本流的中间,但在一行上是第一条语句,则将将其排除在GRP1 CMAP中,并让它在GRP2中捕获。并且,谢谢FYI,但在这个CON中文本,
[^\\]
\n
没有关系,除非它的
\\\n
我看不出这会使它更容易阅读,或者我会如何以那种方式轻松阅读。
 my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"};
 my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'};

 my ($rest, $code, $stuffToIgnore) = ("");

 my $counter = 0;


 while (<DATA>)
 {
         $_ = "$rest$_";
         do
         {
                 ($code, $stuffToIgnore, $rest) = m~

                       (                              # (1), Code
                            (?:
                                 (?! ^ [^\S\n]* \# )            #  Not a preprocessor statement
                                 (?:
                                      $ doubleQuotedString           # found a string
                                   |  $ singleQuotedString           # found a string
                                   |  (?: [^/\n]++ | / [^*/\n] )     # found something not a string or comment or newline
                                   |  \n                             # newline
                                 )
                            )*+
                       )

                       (                              # (2), Ignore
                            (?:
                                 \s*+ ^ [^\S\n]* \#             # '#' Preprocessor statement with Continuation logic
                                 (?:
                                      [^\\]                          # any Non-Continuation character ^\
                                   |  \\ \n?                         # Or,  any Continuation character followed by optional newline
                                 )*?
                                 (?<! \\ )
                                 \n
                              |
                                 \s*+ /\*                       # '/**_**/' Block comment
                                 (?:
                                      [^*]++
                                   |  \*
                                      (?! / )
                                 )*+
                                 \*/
                              |
                                 \s*+ //                        # '//' Line comment, with Continuation logic
                                 (?:
                                      [^\\]
                                   |  \\ \n?

                                 )*?
                                 (?<! \\ )
                                 \n
                            )*+
                       )

                       (                              # (3), Rest
                            (?s)
                            .*
                       )
                  ~xm;

                  ++$counter;
                  goto BLOCK_READ if $stuffToIgnore ne "" or eof(DATA);

         } while ($_ .= <DATA>);

         BLOCK_READ:
         defined $code or die "Unterminated block.";

         # do transformation on $code

         print "CODE:   >>$code<<\nIGNORE: >>$stuffToIgnore<<\n---------------------------------------\n";
 }


 __DATA__

 # hello \
 there
 # how
 # are
 #you

 #pragma once

 #include "EditState.h"
 #include "MyDoc.h"

 // InputEdit

 class CInputEdit : public CEdit
 {
    DECLARE_DYNAMIC(CInputEdit)

 public:
    CInputEdit();
    virtual ~CInputEdit();

 // Attributes
 protected:

    DECLARE_MESSAGE_MAP()

    BOOL m_bTrackingMenu;

 };
 CODE:   >>
 <<
 IGNORE: >># hello \
 there
 <<
 ---------------------------------------
 CODE:   >><<
 IGNORE: >># how
 <<
 ---------------------------------------
 CODE:   >><<
 IGNORE: >># are
 <<
 ---------------------------------------
 CODE:   >><<
 IGNORE: >>#you
 <<
 ---------------------------------------
 CODE:   >>
 <<
 IGNORE: >>#pragma once
 <<
 ---------------------------------------
 CODE:   >>
 <<
 IGNORE: >>#include "EditState.h"
 <<
 ---------------------------------------
 CODE:   >><<
 IGNORE: >>#include "MyDoc.h"
 <<
 ---------------------------------------
 CODE:   >>
 <<
 IGNORE: >>// InputEdit
 <<
 ---------------------------------------
 CODE:   >>
 class CInputEdit : public CEdit
 {
         DECLARE_DYNAMIC(CInputEdit)

 public:
         CInputEdit();
         virtual ~CInputEdit();

 <<
 IGNORE: >>// Attributes
 <<
 ---------------------------------------
 CODE:   >>protected:

         DECLARE_MESSAGE_MAP()

         BOOL m_bTrackingMenu;

 };

 <<
 IGNORE: >><<
 ---------------------------------------