Javascript 正则表达式正在捕获整个字符串_Javascript_Regex

Javascript 正则表达式正在捕获整个字符串

javascript regex

Javascript 正则表达式正在捕获整个字符串,javascript,regex,Javascript,Regex,我正在使用以下正则表达式： (public|private +)?function +([a-zA-Z_$][0-9a-zA-Z_$]*) *\$([0-9a-zA-Z_$, ]*)\$ *{(.*)} 要匹配以下字符串： public function messenger(text){ sendMsg(text); } private function sendMsg(text){ alert(text); } （字符串中没有换行符，它们在正则表达式运行之前转换为空格）我希望它捕获这

我正在使用以下正则表达式：

(public|private +)?function +([a-zA-Z_$][0-9a-zA-Z_$]*) *\\(([0-9a-zA-Z_$, ]*)\\) *{(.*)}

要匹配以下字符串：

public function messenger(text){
sendMsg(text);
}
private function sendMsg(text){
alert(text);
}

（字符串中没有换行符，它们在正则表达式运行之前转换为空格）

我希望它捕获这两个函数，但它正在捕获： $1: "" $2:“信使” $3:“文本” $4:“sendMsg（text）；}私有函数sendMsg（text）{alert（text）；”

顺便说一下，我正在使用Javascript。

尝试更改

(.*)

到

默认情况下，

运算符是贪婪的，使用尽可能多的字符。请尝试使用非贪婪的等效运算符

*？

/((?:(?:public|private)\s+)?)function\s+([a-zA-Z_$][\w$]*)\s*\(([\w$, ]*)\)\s*{(.*?)}/

\w

匹配单词，相当于

[a-zA-Z0-9\

，但可以在字符类中使用。请注意，这不会匹配其中包含块的函数，例如：

function foo() {
    for (p in this) {
      ...
    }
}

除非正则表达式支持（JS不支持），否则这很难用正则表达式实现，这就是为什么您需要一个合适的解析器。

更改正则表达式的最后一部分：

{(.*)}

为此：

{(.*?)}

这使得它“非贪婪”，因此它不会捕获到输入中的最后一个

请注意，如果任何函数代码包含一个

字符，那么这将中断，但是您正在处理嵌套，这从来都不是正则表达式能够很好地完成的事情。

因为您在另一个线程中接受了我的（错误）答案，我觉得自己有义务发布一个正确的解决方案。这不会很快很短，但希望能有所帮助

下面是如果有必要，我将如何为类似c语言编写基于regexp的解析器

<script>
/* 
Let's start with this simple utility function. It's a
kind of stubborn version of String.replace() - it
checks the string over and over again, until nothing
more can be replaced
*/

function replaceAll(str, regexp, repl) {
    str = str.toString();
    while(str.match(regexp))
        str = str.replace(regexp, repl);
    return str;
}

/*
Next, we need a function that removes specific
constructs from the text and replaces them with
special "markers", which are "invisible" for further
processing. The matches are collected in a buffer so
that they can be restored later.
*/

function isolate(type, str, regexp, buf) {
    return replaceAll(str, regexp, function($0) {
        buf.push($0);
        return "<<" + type + (buf.length - 1) + ">>";
    });
} 

/*
The following restores "isolated" strings from the
buffer:
*/

function restore(str, buf) {
    return replaceAll(str, /<<[a-z]+(\d+)>>/g, function($0, $1) {
        return buf[parseInt($1)];
    });
}

/*
Write down the grammar. Javascript regexps are
notoriously hard to read (there is no "comment"
option like in perl), therefore let's use more
readable format with spacing and substitution
variables. Note that "$string" and "$block" rules are
actually "isolate()" markers.
*/

var grammar = {
    $nothing: "",
    $space:  "\\s",
    $access: "public $space+ | private $space+ | $nothing",
    $ident:  "[a-z_]\\w*",
    $args:   "[^()]*",
    $string: "<<string [0-9]+>>",
    $block:  "<<block [0-9]+>>",
    $fun:    "($access) function $space* ($ident) $space* \\( ($args) \\) $space* ($block)"
}

/*
This compiles the grammar to pure regexps - one for
each grammar rule:
*/

function compile(grammar) {
    var re = {};
    for(var p in grammar)
        re[p] = new RegExp(
            replaceAll(grammar[p], /\$\w+/g, 
                    function($0) { return grammar[$0] }).
            replace(/\s+/g, ""), 
        "gi");
    return re;
}

/*
Let's put everything together
*/

function findFunctions(code, callback) {
    var buf = [];

    // isolate strings
    code = isolate("string", code, /"(\\.|[^\"])*"/g, buf);

    // isolate blocks in curly brackets {...}
    code = isolate("block",  code, /{[^{}]*}/g, buf);

    // compile our grammar
    var re = compile(grammar);

    // and perform an action for each function we can find
    code.replace(re.$fun, function() {
        var p = [];
        for(var i = 1; i < arguments.length; i++)
            p.push(restore(arguments[i], buf));
        return callback.apply(this, p)
    });
}
</script>

如果函数体中存在另一个

，则这将不起作用。例如，函数体中类似于

if（foo）{doSomething；}

的内容将破坏您的解决方案。您不能使用正则表达式来解析非常规文本。如果函数体中存在另一个

，则这将不起作用。例如，函数体中类似于

if（foo）{doSomething；}

的内容将破坏您的解决方案。您不能使用正则表达式来解析非正则文本。@Smotchkiss，这实际上是一种循环参数，因为正则语言定义为正则表达式可以解析的语言。基本上，“你不能解析你不能解析的东西”，你不知道吗。谢谢你避免了隧道视觉，outis:）对于告诉你使用

（.*）

的解决方案要小心@奥蒂斯对此有更多的细节。

<script>
/* 
Let's start with this simple utility function. It's a
kind of stubborn version of String.replace() - it
checks the string over and over again, until nothing
more can be replaced
*/

function replaceAll(str, regexp, repl) {
    str = str.toString();
    while(str.match(regexp))
        str = str.replace(regexp, repl);
    return str;
}

/*
Next, we need a function that removes specific
constructs from the text and replaces them with
special "markers", which are "invisible" for further
processing. The matches are collected in a buffer so
that they can be restored later.
*/

function isolate(type, str, regexp, buf) {
    return replaceAll(str, regexp, function($0) {
        buf.push($0);
        return "<<" + type + (buf.length - 1) + ">>";
    });
} 

/*
The following restores "isolated" strings from the
buffer:
*/

function restore(str, buf) {
    return replaceAll(str, /<<[a-z]+(\d+)>>/g, function($0, $1) {
        return buf[parseInt($1)];
    });
}

/*
Write down the grammar. Javascript regexps are
notoriously hard to read (there is no "comment"
option like in perl), therefore let's use more
readable format with spacing and substitution
variables. Note that "$string" and "$block" rules are
actually "isolate()" markers.
*/

var grammar = {
    $nothing: "",
    $space:  "\\s",
    $access: "public $space+ | private $space+ | $nothing",
    $ident:  "[a-z_]\\w*",
    $args:   "[^()]*",
    $string: "<<string [0-9]+>>",
    $block:  "<<block [0-9]+>>",
    $fun:    "($access) function $space* ($ident) $space* \\( ($args) \\) $space* ($block)"
}

/*
This compiles the grammar to pure regexps - one for
each grammar rule:
*/

function compile(grammar) {
    var re = {};
    for(var p in grammar)
        re[p] = new RegExp(
            replaceAll(grammar[p], /\$\w+/g, 
                    function($0) { return grammar[$0] }).
            replace(/\s+/g, ""), 
        "gi");
    return re;
}

/*
Let's put everything together
*/

function findFunctions(code, callback) {
    var buf = [];

    // isolate strings
    code = isolate("string", code, /"(\\.|[^\"])*"/g, buf);

    // isolate blocks in curly brackets {...}
    code = isolate("block",  code, /{[^{}]*}/g, buf);

    // compile our grammar
    var re = compile(grammar);

    // and perform an action for each function we can find
    code.replace(re.$fun, function() {
        var p = [];
        for(var i = 1; i < arguments.length; i++)
            p.push(restore(arguments[i], buf));
        return callback.apply(this, p)
    });
}
</script>

<code>
public function blah(arg1, arg2) {
    if("some string" == "public function") {
        callAnother("{hello}")
        while(something) {
            alert("escaped \" string");
        }
    }
}

function yetAnother() { alert("blah") }
</code>

<script>
window.onload = function() {
    var code = document.getElementsByTagName("code")[0].innerHTML;
    findFunctions(code, function(access, name, args, body) {
        document.write(
            "<br>" + 
            "<br> access= " + access +
            "<br> name= "   + name +
            "<br> args= "   + args +
            "<br> body= "   + body
        )
    });
}
</script>