从JavaScript函数中提取嵌套函数名_Javascript_Regex_Parsing_Function

从JavaScript函数中提取嵌套函数名

javascript regex parsing function

从JavaScript函数中提取嵌套函数名,javascript,regex,parsing,function,Javascript,Regex,Parsing,Function,给定一个函数，我试图找出其中嵌套函数的名称（只有一层深）一个针对toString（）的简单正则表达式一直有效，直到我开始使用带有注释的函数。事实证明，一些浏览器存储原始源代码的一部分，而另一些浏览器则根据编译后的内容重建源代码；在某些浏览器中，toString（）的输出可能包含原始代码注释。另外，以下是我的发现： <pre> <script type="text/javascript"> function someFn() { /** * Some c

给定一个函数，我试图找出其中嵌套函数的名称（只有一层深）

一个针对

toString（）

的简单正则表达式一直有效，直到我开始使用带有注释的函数。事实证明，一些浏览器存储原始源代码的一部分，而另一些浏览器则根据编译后的内容重建源代码；在某些浏览器中，

toString（）

的输出可能包含原始代码注释。另外，以下是我的发现：

<pre>
<script type="text/javascript">
function someFn() {
    /**
     * Some comment
     */
     function fn1() {
         alert("/*This is not a comment, it's a string literal*/");
     }

     function // keyword
     fn2 // name
     (x, y) // arguments
     {
         /*
         body
         */
     }

     function fn3() {
        alert("this is the word function in a string literal");
     }

     var f = function () { // anonymous, ignore
     };
}

var s = someFn.toString();
// remove inline comments
s = s.replace(/\/\/.*/g, "");
// compact all whitespace to a single space
s = s.replace(/\s{2,}/g, " ");
// remove all block comments, including those in string literals
s = s.replace(/\/\*.*?\*\//g, "");
document.writeln(s);
// remove string literals to avoid false matches with the keyword 'function'
s = s.replace(/'.*?'/g, "");
s = s.replace(/".*?"/g, "");
document.writeln(s);
// find all the function definitions
var matches = s.match(/function(.*?)\(/g);
for (var ii = 1; ii < matches.length; ++ii) {
    // extract the function name
    var funcName = matches[ii].replace(/function(.+)\(/, "$1");
    // remove any remaining leading or trailing whitespace
    funcName = funcName.replace(/\s+$|^\s+/g, "");
    if (funcName === '') {
        // anonymous function, discard
        continue;
    }
    // output the results
    document.writeln('[' + funcName + ']');
}
</script>
</pre>

受试者结果解决方案不必是纯正则表达式

更新：您可以假设我们总是处理有效的、正确嵌套的代码，所有字符串文本、注释和块都正确终止。这是因为我正在解析一个已经编译为有效函数的函数

更新2:如果您想知道这背后的动机：我正在开发一个新的JavaScript单元测试框架，名为。您可以使用几种不同的格式编写测试和测试套件。其中一个是函数：

function someFn() {
    /**
     * Some comment
     */
     function fn1() {
         alert("/*This is not a comment, it's a string literal*/");
     }

     function // keyword
     fn2 // name
     (x, y) // arguments
     {
         /*
         body
         */
     }

     var f = function () { // anonymous, ignore
     };
}

function myTests() {
    function setUp() {
    }

    function tearDown() {
    }

    function testSomething() {
    }

    function testSomethingElse() {
    }
}

由于函数隐藏在闭包中，因此我无法从函数外部调用它们。因此，我将外部函数转换为字符串，提取函数名，在底部附加一条“now run the given internal function”语句，并使用new

function（）

将其重新编译为函数。如果测试函数中有注释，那么提取函数名和避免误报就变得很困难。因此我请求SO社区的帮助

更新3:我想出了一个不需要大量修改代码语义的方法。我使用原始源代码本身来探测第一级函数。


函数someFn（）{
/**
*一些评论
*/
函数fn1（）{
警报（“/*这不是注释，是字符串文字*/”；
}
函数//关键字
fn2//名称
（x，y）//参数
{
/*
身体
*/
}
函数fn3（）{
警报（“这是字符串文字中的单词函数”）；
}
var f=函数（）{//匿名，忽略
};
}
var s=someFn.toString（）；
//删除内联注释
s=s。替换（/\/\/.*/g，”）；
//将所有空白压缩到单个空间
s=s.replace（/\s{2，}/g，“”）；
//删除所有块注释，包括字符串文字中的注释
s=s.replace（//\/\*.*？\*\//g，“”）；
书面文件；
//删除字符串文字以避免与关键字“function”的错误匹配
s=s.replace（/'.'？'/g，“”）；
s=s。替换（/“*？”/g，”）；
书面文件；
//查找所有函数定义
var matches=s.match（/function（.*）\（/g）；
对于（变量ii=1；ii


我确信我遗漏了一些东西，但从您在原始问题中的要求来看，我认为我已经达到了目标，包括消除了在字符串文本中查找函数
关键字的可能性
最后一点，我认为在函数块中损坏字符串文字没有任何问题。您的要求是查找函数名，因此我不必费心保存函数内容。
外观更改和错误修复
正则表达式必须读取\b函数\b
，以避免误报
如果nested
的计算结果不为true
，则在块中定义的函数（例如在循环体中）将被忽略
var tests = {
    test1: function (){
        console.log( "test 1 ran" );
    },

    test2: function (){
        console.log( "test 2 ran" );
    },

    test3: function (){
        console.log( "test 3 ran" );
    }
};

函数标记化（代码）{
var code=code.split（/\\./）.join（“”），
正则表达式=/\b函数\b | \（| \）{124; \}\/\*\*\*\/\\/\/\\/\“|”\\ n | \ s+/mg，
令牌=[]，
pos=0；
for（变量匹配；匹配=regex.exec（代码）；pos=regex.lastIndex）{
var match=匹配[0]，
matchStart=regex.lastIndex-match.length；
如果（位置<匹配开始）
tokens.push（code.substring（pos，matchStart））；
代币。推送（匹配）；
}
if（位置<代码长度）
令牌推送（代码子串（pos））；
归还代币；
}
变量分隔符={
'/*' : '*/',
“//”：“\n”，
'"' : '"',
'\'' : '\''
};
函数extractInnerFunctionNames（函数，嵌套）{
变量名称=[]，
tokens=tokenize（func.toString（）），
级别=0；
对于（变量i=0；i学术上正确的处理方法是为Javascript（函数定义）的子集generate创建一个lexer和解析器
<pre>
<script type="text/javascript">
function someFn() {
    /**
     * Some comment
     */
     function fn1() {
         alert("/*This is not a comment, it's a string literal*/");
     }

     function // keyword
     fn2 // name
     (x, y) // arguments
     {
         /*
         body
         */
     }

     function fn3() {
        alert("this is the word function in a string literal");
     }

     var f = function () { // anonymous, ignore
     };
}

var s = someFn.toString();
// remove inline comments
s = s.replace(/\/\/.*/g, "");
// compact all whitespace to a single space
s = s.replace(/\s{2,}/g, " ");
// remove all block comments, including those in string literals
s = s.replace(/\/\*.*?\*\//g, "");
document.writeln(s);
// remove string literals to avoid false matches with the keyword 'function'
s = s.replace(/'.*?'/g, "");
s = s.replace(/".*?"/g, "");
document.writeln(s);
// find all the function definitions
var matches = s.match(/function(.*?)\(/g);
for (var ii = 1; ii < matches.length; ++ii) {
    // extract the function name
    var funcName = matches[ii].replace(/function(.+)\(/, "$1");
    // remove any remaining leading or trailing whitespace
    funcName = funcName.replace(/\s+$|^\s+/g, "");
    if (funcName === '') {
        // anonymous function, discard
        continue;
    }
    // output the results
    document.writeln('[' + funcName + ']');
}
</script>
</pre>

function tokenize(code) {
    var code = code.split(/\\./).join(''),
        regex = /\bfunction\b|\(|\)|\{|\}|\/\*|\*\/|\/\/|"|'|\n|\s+/mg,
        tokens = [],
        pos = 0;

    for(var matches; matches = regex.exec(code); pos = regex.lastIndex) {
        var match = matches[0],
            matchStart = regex.lastIndex - match.length;

        if(pos < matchStart)
            tokens.push(code.substring(pos, matchStart));

        tokens.push(match);
    }

    if(pos < code.length)
        tokens.push(code.substring(pos));

    return tokens;
}

var separators = {
    '/*' : '*/',
    '//' : '\n',
    '"' : '"',
    '\'' : '\''
};

function extractInnerFunctionNames(func, nested) {
    var names = [],
        tokens = tokenize(func.toString()),
        level = 0;

    for(var i = 0; i < tokens.length; ++i) {
        var token = tokens[i];

        switch(token) {
            case '{':
            ++level;
            break;

            case '}':
            --level;
            break;

            case '/*':
            case '//':
            case '"':
            case '\'':
            var sep = separators[token];
            while(++i < tokens.length && tokens[i] !== sep);
            break;

            case 'function':
            if(level === 1 || (nested && level)) {
                while(++i < tokens.length) {
                    token = tokens[i];

                    if(token === '(')
                        break;

                    if(/^\s+$/.test(token))
                        continue;

                    if(token === '/*' || token === '//') {
                        var sep = separators[token];
                        while(++i < tokens.length && tokens[i] !== sep);
                        continue;
                    }

                    names.push(token);
                    break;
                }
            }
            break;
        }
    }

    return names;
}

var tests = {
    test1: function (){
        console.log( "test 1 ran" );
    },

    test2: function (){
        console.log( "test 2 ran" );
    },

    test3: function (){
        console.log( "test 3 ran" );
    }
};

for( var test in tests ){ 
    tests[test]();
}

function splitFunction(fn) {
    var tokens =
        /^[\s\r\n]*function[\s\r\n]*([^\(\s\r\n]*?)[\s\r\n]*\([^\)\s\r\n]*\)[\s\r\n]*\{((?:[^}]*\}?)+)\}\s*$/
        .exec(fn);

    if (!tokens) {
        throw "Invalid function.";
    }

    return {
        name: tokens[1],
        body: tokens[2]
    };
}

var probeOutside = function () {
    return eval(
        "typeof $fn$ === \"function\""
        .split("$fn$")
        .join(arguments[0]));
};

function extractFunctions(fn) {
    var fnParts = splitFunction(fn);

    var probeInside = new Function(
        splitFunction(probeOutside).body + fnParts.body);

    var tokens;
    var fns = [];
    var tokenRe = /(\w+)/g;

    while ((tokens = tokenRe.exec(fnParts.body))) {
        var token = tokens[1];

        try {
            if (probeInside(token) && !probeOutside(token)) {
                fns.push(token);
            }
        } catch (e) {
            // ignore token
        }
    }

    return fns;
}

function testGlobalFn() {}

function testSuite() {
    function testA() {
        function testNested() {
        }
    }

    // function testComment() {}
    // function testGlobalFn() {}

    function // comments
    testB /* don't matter */
    () // neither does whitespace
    {
        var s = "function testString() {}";
    }
}

document.write(extractFunctions(testSuite));
// writes "testA,testB"

typeof $fn$ !== "undefined" && $fn$ instanceof Function

typeof $fn$ === "function"

function foo() {}

function TestSuite() {
    function foo() {}
}

return eval("[" + fnList + "]");

<pre><script>
var extractFunctions = (function() {
    var level, names;

    function tokenize(code) {
        var code = code.split(/\\./).join(''),
            regex = /\bfunction\b|\(|\)|\{|\}|\/\*|\*\/|\/\/|"|'|\n|\s+|\\/mg,
            tokens = [],
            pos = 0;

        for(var matches; matches = regex.exec(code); pos = regex.lastIndex) {
            var match = matches[0],
                matchStart = regex.lastIndex - match.length;

            if(pos < matchStart)
                tokens.push(code.substring(pos, matchStart));

            tokens.push(match);
        }

        if(pos < code.length)
            tokens.push(code.substring(pos));

        return tokens;
    }

    function parse(tokens, callback) {
        for(var i = 0; i < tokens.length; ++i) {
            var j = callback(tokens[i], tokens, i);
            if(j === false) break;
            else if(typeof j === 'number') i = j;
        }
    }

    function skip(tokens, idx, limiter, escapes) {
        while(++idx < tokens.length && tokens[idx] !== limiter)
            if(escapes && tokens[idx] === '\\') ++idx;

        return idx;
    }

    function removeDeclaration(token, tokens, idx) {
        switch(token) {
            case '/*':
            return skip(tokens, idx, '*/');

            case '//':
            return skip(tokens, idx, '\n');

            case ')':
            tokens.splice(0, idx + 1);
            return false;
        }
    }

    function extractTopLevelFunctionNames(token, tokens, idx) {
        switch(token) {
            case '{':
            ++level;
            return;

            case '}':
            --level;
            return;

            case '/*':
            return skip(tokens, idx, '*/');

            case '//':
            return skip(tokens, idx, '\n');

            case '"':
            case '\'':
            return skip(tokens, idx, token, true);

            case 'function':
            if(level === 1) {
                while(++idx < tokens.length) {
                    token = tokens[idx];

                    if(token === '(')
                        return idx;

                    if(/^\s+$/.test(token))
                        continue;

                    if(token === '/*') {
                        idx = skip(tokens, idx, '*/');
                        continue;
                    }

                    if(token === '//') {
                        idx = skip(tokens, idx, '\n');
                        continue;
                    }

                    names.push(token);
                    return idx;
                }
            }
            return;
        }
    }

    function getTopLevelFunctionRefs(func) {
        var tokens = tokenize(func.toString());
        parse(tokens, removeDeclaration);

        names = [], level = 0;
        parse(tokens, extractTopLevelFunctionNames);

        var code = tokens.join('') + '\nthis._refs = [' +
            names.join(',') + '];';

        return (new (new Function(code)))._refs;
    }

    return getTopLevelFunctionRefs;
})();

function testSuite() {
    function testA() {
        function testNested() {
        }
    }

    // function testComment() {}
    // function testGlobalFn() {}

    function // comments
    testB /* don't matter */
    () // neither does whitespace
    {
        var s = "function testString() {}";
    }
}

document.writeln(extractFunctions(testSuite).join('\n---\n'));
</script></pre>