Regex 如何区分我想要的东西和我不想要的东西';perl中的t? 我必须在代码C++代码上应用转换,而不是注释或预处理器语句。我遇到的问题是预处理器语句。基本上,我想要这样的东西: #!/usr/bin/perl my $file = $ARGV[0]; my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"}; my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'}; my ($rest, $code, $stuffToIgnore) = (""); open(my $inputFH, "<:raw:crlf", $file) or die "can't open $file for reading. $!"; open(my $outputFH, ">:raw:crlf", "$file.out") or die "can't open $file.out for writing. $!"; my $counter = 0; while (<$inputFH>) { $_ = "$rest$_"; do { ($code, $stuffToIgnore, $rest) = m( ((?: $doubleQuotedString # found a string |$singleQuotedString # found a string |(?:[^/]++|/[^*/]) # found something not a string, comment or preprocessor statement )*+ ) ((?: ^\s*+#.*$ | # preprocessor statement \s*+//.*$ | # line comment \s*+/\*(?:[^*]++|\*(?!/))*+\*/ # block comment )*+ ) ((?s).*) # rest )xm; ++$counter; goto BLOCK_READ if $stuffToIgnore ne "" or eof($inputFH); } while ($_ .= <$inputFH>); BLOCK_READ: defined $code or die "Unterminated block."; # do transformation on $code print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n"; print $outputFH "$code$stuffToIgnore"; }
要查看正在运行的演示,请参阅。我认为,首先将正则表达式的复杂性分解为几个独立的部分,您将获得成功。只要将不同的可能匹配项拉入单独的Regex 如何区分我想要的东西和我不想要的东西';perl中的t? 我必须在代码C++代码上应用转换,而不是注释或预处理器语句。我遇到的问题是预处理器语句。基本上,我想要这样的东西: #!/usr/bin/perl my $file = $ARGV[0]; my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"}; my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'}; my ($rest, $code, $stuffToIgnore) = (""); open(my $inputFH, "<:raw:crlf", $file) or die "can't open $file for reading. $!"; open(my $outputFH, ">:raw:crlf", "$file.out") or die "can't open $file.out for writing. $!"; my $counter = 0; while (<$inputFH>) { $_ = "$rest$_"; do { ($code, $stuffToIgnore, $rest) = m( ((?: $doubleQuotedString # found a string |$singleQuotedString # found a string |(?:[^/]++|/[^*/]) # found something not a string, comment or preprocessor statement )*+ ) ((?: ^\s*+#.*$ | # preprocessor statement \s*+//.*$ | # line comment \s*+/\*(?:[^*]++|\*(?!/))*+\*/ # block comment )*+ ) ((?s).*) # rest )xm; ++$counter; goto BLOCK_READ if $stuffToIgnore ne "" or eof($inputFH); } while ($_ .= <$inputFH>); BLOCK_READ: defined $code or die "Unterminated block."; # do transformation on $code print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n"; print $outputFH "$code$stuffToIgnore"; },regex,perl,Regex,Perl,要查看正在运行的演示,请参阅。我认为,首先将正则表达式的复杂性分解为几个独立的部分,您将获得成功。只要将不同的可能匹配项拉入单独的if块,可读性就会有很大的不同 您还应该能够利用存在少量预处理器指令这一事实。您可以试试这个。 一些问题。您是否一次累积和处理超过一行数据 如果没有,我看不到任何关于扩展到多行的块注释的规定。 另外,块注释使用行注释,我看不到行注释继续的任何规定 编辑-5 @阿德里安-我打出了一个适合我的版本。下面是Perl代码和输出。 让我知道这是否奏效。 请注意,为#预处理器以及
if
块,可读性就会有很大的不同
您还应该能够利用存在少量预处理器指令这一事实。您可以试试这个。一些问题。您是否一次累积和处理超过一行数据 如果没有,我看不到任何关于扩展到多行的块注释的规定。
另外,块注释使用行注释,我看不到行注释继续的任何规定 编辑-5 @阿德里安-我打出了一个适合我的版本。下面是Perl代码和输出。
让我知道这是否奏效。
请注意,为
#
预处理器以及/
行注释添加了行延续代码测试结果非常好
my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"};
my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'};
my ($rest, $code, $stuffToIgnore) = ("");
my $counter = 0;
while (<DATA>)
{
$_ = "$rest$_";
do
{
($code, $stuffToIgnore, $rest) = m~
( # (1), Code
(?:
(?! ^ [^\S\n]* \# ) # Not a preprocessor statement
(?:
$ doubleQuotedString # found a string
| $ singleQuotedString # found a string
| (?: [^/\n]++ | / [^*/\n] ) # found something not a string or comment or newline
| \n # newline
)
)*+
)
( # (2), Ignore
(?:
\s*+ ^ [^\S\n]* \# # '#' Preprocessor statement with Continuation logic
(?:
[^\\] # any Non-Continuation character ^\
| \\ \n? # Or, any Continuation character followed by optional newline
)*?
(?<! \\ )
\n
|
\s*+ /\* # '/**_**/' Block comment
(?:
[^*]++
| \*
(?! / )
)*+
\*/
|
\s*+ // # '//' Line comment, with Continuation logic
(?:
[^\\]
| \\ \n?
)*?
(?<! \\ )
\n
)*+
)
( # (3), Rest
(?s)
.*
)
~xm;
++$counter;
goto BLOCK_READ if $stuffToIgnore ne "" or eof(DATA);
} while ($_ .= <DATA>);
BLOCK_READ:
defined $code or die "Unterminated block.";
# do transformation on $code
print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n---------------------------------------\n";
}
__DATA__
# hello \
there
# how
# are
#you
#pragma once
#include "EditState.h"
#include "MyDoc.h"
// InputEdit
class CInputEdit : public CEdit
{
DECLARE_DYNAMIC(CInputEdit)
public:
CInputEdit();
virtual ~CInputEdit();
// Attributes
protected:
DECLARE_MESSAGE_MAP()
BOOL m_bTrackingMenu;
};
my$doubleQuotedString=q{”(?>[^“\\\\\]+\\\\\{2}}\\\\\\\\\\\\\\\(?s)。*+};
我的$singleQuotedString=q{'(?>[^'\\\\\]++\\\\\\{2}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\(?s)。*+};
我的($rest,$code,$stuffToIgnore)=(“”);
我的$counter=0;
而()
{
$\=“$rest$\”;
做
{
($code、$stuffToIgnore、$rest)=m~
(#(1),代码
(?:
(?!^[^\S\n]*\\\)\不是预处理器语句
(?:
$doubleQuotedString#找到一个字符串
|$singleQuotedString#找到一个字符串
|(?:[^/\n]++|/[^*/\n])#找到的内容不是字符串、注释或换行符
|\n#新行
)
)*+
)
(#)(2),忽略
(?:
\带连续逻辑的s*+^[^\s\n]*\\\\\\\\\\'\\'预处理器语句
(?:
[^\\]#任何非连续字符^\
|\\\n?\或任何后跟可选换行符的连续字符
)*?
(?);
块读取:
已定义$code或die“未端接块”;
#对$code进行转换
“打印”代码:>>$CODE$StufftoIgnorey是的,我一次要累积多行代码进行转换。这就是做/while
循环的目的。至于多行块注释,它们是通过#block comment
部分提供的。这也会消耗块注释中嵌入的任何行注释。关于行注释继续。也适用于预处理器继续。Edit2:这将不起作用。#发现一些不…
将捕获预处理器语句,这是主要问题。Edit:感谢您的帮助,但这并不重要,我已经控制住了。仅供参考,[^\\]<代码> >匹配< >代码> \r>代码> <代码> \n>代码>虽然我认为-VE断言可能持有一些承诺。@阿德里安-见Edt3.3。如果您期望预处理器位于文本流的中间,但在一行上是第一条语句,则将将其排除在GRP1 CMAP中,并让它在GRP2中捕获。并且,谢谢FYI,但在这个CON中文本,[^\\]
与\n
没有关系,除非它的\\\n
我看不出这会使它更容易阅读,或者我会如何以那种方式轻松阅读。
my $doubleQuotedString = q{"(?>[^"\\\\]++|\\\\{2}|\\\\(?s).)*+"};
my $singleQuotedString = q{'(?>[^'\\\\]++|\\\\{2}|\\\\(?s).)*+'};
my ($rest, $code, $stuffToIgnore) = ("");
my $counter = 0;
while (<DATA>)
{
$_ = "$rest$_";
do
{
($code, $stuffToIgnore, $rest) = m~
( # (1), Code
(?:
(?! ^ [^\S\n]* \# ) # Not a preprocessor statement
(?:
$ doubleQuotedString # found a string
| $ singleQuotedString # found a string
| (?: [^/\n]++ | / [^*/\n] ) # found something not a string or comment or newline
| \n # newline
)
)*+
)
( # (2), Ignore
(?:
\s*+ ^ [^\S\n]* \# # '#' Preprocessor statement with Continuation logic
(?:
[^\\] # any Non-Continuation character ^\
| \\ \n? # Or, any Continuation character followed by optional newline
)*?
(?<! \\ )
\n
|
\s*+ /\* # '/**_**/' Block comment
(?:
[^*]++
| \*
(?! / )
)*+
\*/
|
\s*+ // # '//' Line comment, with Continuation logic
(?:
[^\\]
| \\ \n?
)*?
(?<! \\ )
\n
)*+
)
( # (3), Rest
(?s)
.*
)
~xm;
++$counter;
goto BLOCK_READ if $stuffToIgnore ne "" or eof(DATA);
} while ($_ .= <DATA>);
BLOCK_READ:
defined $code or die "Unterminated block.";
# do transformation on $code
print "CODE: >>$code<<\nIGNORE: >>$stuffToIgnore<<\n---------------------------------------\n";
}
__DATA__
# hello \
there
# how
# are
#you
#pragma once
#include "EditState.h"
#include "MyDoc.h"
// InputEdit
class CInputEdit : public CEdit
{
DECLARE_DYNAMIC(CInputEdit)
public:
CInputEdit();
virtual ~CInputEdit();
// Attributes
protected:
DECLARE_MESSAGE_MAP()
BOOL m_bTrackingMenu;
};
CODE: >>
<<
IGNORE: >># hello \
there
<<
---------------------------------------
CODE: >><<
IGNORE: >># how
<<
---------------------------------------
CODE: >><<
IGNORE: >># are
<<
---------------------------------------
CODE: >><<
IGNORE: >>#you
<<
---------------------------------------
CODE: >>
<<
IGNORE: >>#pragma once
<<
---------------------------------------
CODE: >>
<<
IGNORE: >>#include "EditState.h"
<<
---------------------------------------
CODE: >><<
IGNORE: >>#include "MyDoc.h"
<<
---------------------------------------
CODE: >>
<<
IGNORE: >>// InputEdit
<<
---------------------------------------
CODE: >>
class CInputEdit : public CEdit
{
DECLARE_DYNAMIC(CInputEdit)
public:
CInputEdit();
virtual ~CInputEdit();
<<
IGNORE: >>// Attributes
<<
---------------------------------------
CODE: >>protected:
DECLARE_MESSAGE_MAP()
BOOL m_bTrackingMenu;
};
<<
IGNORE: >><<
---------------------------------------