Javascript 在检查字符串时是否有方法包括单词变体（过去时、动名词）？_Javascript

Javascript 在检查字符串时是否有方法包括单词变体（过去时、动名词）？

javascript

Javascript 在检查字符串时是否有方法包括单词变体（过去时、动名词）？,javascript,Javascript,假设我有一系列搜索词，比如- var searchTerms = ["blow", "search", "fly", "type"] 和一个字符串数组，如 var arrayToBeSearched = ["blowing", "searched", "flew", "typed", "blah", "blah","blah"] 当我检查数组时，有没有一种简单的方法可以包含过去时态或其他单词变体？或者我应该在searchTerms中包含变体吗在Javascipt中没有本机的方法来执行此操作

假设我有一系列搜索词，比如-

var searchTerms = ["blow", "search", "fly", "type"]

和一个字符串数组，如

var arrayToBeSearched = ["blowing", "searched", "flew", "typed", "blah", "blah","blah"]

当我检查数组时，有没有一种简单的方法可以包含过去时态或其他单词变体？或者我应该在searchTerms中包含变体吗

在Javascipt中没有本机的方法来执行此操作。但是，您可以使用正则表达式来处理数据，以查找几乎任何您想要的模式。例如“ed”、“ing”等

但是在你的帖子中没有提到的是你搜索的范围和环境。这种情况是否发生在您的站点/应用程序的特定有限部分？如果它更大，但仍然在客户端，您可能希望（正如其他人所提到的）使用库或编写执行所需操作的插件。如果这是一种更为单一的方式，您可能希望在服务器端甚至在数据库本身执行此操作。

这里是node.js解决方案。注意，它是根据LGPL许可的

虽然我自己没有使用此框架的经验，但您可以查看

该库在浏览器中提供全文搜索，其功能之一是对词根“search”进行词干分析或匹配“searched”、“searcheng”等。

存在勒芒化算法，例如波特词干分析器。这将把你的单词映射到词干，然后可以直接比较词干是否相等。描述了该算法。全文转载：

//Javascript中的波特词干分析器。评论很少，但很容易违反原文中的规则
//纸，在
//
//波特，1980，后缀剥离算法，程序，第14卷，
//第3号，第130-137页，
//
//另见http://www.tartarus.org/~martin/PorterStemmer
//第1版为“安达格”，2004年7月
//Christopher McKenzie于2009年8月发布的第2版（大幅修订）
变量词干分析器=（函数（）{
var step2list={
“国家”：“ate”，
“民族的”：“民族的”，
“enci”：“ence”，
“anci”：“ance”，
“izer”：“ize”，
“bli”：“ble”，
“ALI”：“al”，
“entli”：“ent”，
“eli”：“e”，
“ousli”：“ous”，
“化”：“化”，
“吃”；“吃”，
“ator”：“ate”，
“主义”：“al”，
“爱”：“爱”，
“充实”：“充实”，
“ous”：“ous”，
“阿利蒂”：“阿尔”，
“iviti”：“ive”，
“biliti”：“ble”，
“logi”：“日志”
},
步骤3列表={
“证书”：“ic”，
“或”：“或”，
“alize”：“al”，
“iciti”：“ic”，
“ic”：“ic”，
“ful”：“，
“性”：”
},
c=“^aeiou]”，//辅音
v=“[aeiouy]”，//元音
C=C+“[^aeiouy]*”，//辅音序列
V=V+“[aeiou]*”，//元音序列
mgr0=“^（“+C+”）？+V+C，//[C]VC…是m>0
meq1=“^（“+C+”）？+V+C+”（“+V+”）？$”，//[C]VC[V]为m=1
mgr1=“^（“+C+”）？+V+C+V+C，//[C]VCVC…是m>1
s_v=“^（“+C+”）？+v；//词干中的元音
返回函数（w）{
变种茎，
后缀
第一，
重新，
re2，
re3，
re4，
origword=w；
如果（w.length<3）{返回w；}
firstch=w.substr（0,1）；
if（firstch==“y”）{
w=firstch.toUpperCase（）+w.substr（1）；
}
//步骤1a
re=/^（+？）（ss|i）es$/；
re2=/^（+？）（[^s]）s$/；
如果（re.test（w））{w=w.replace（re，“$1$2”）；}
else如果（re2.test（w））{w=w.replace（re2，“$1$2”）；}
//步骤1b
re=/^（+？）eed$/；
re2=/^（+？）（ed | ing）$/；
如果（重新测试（w））{
var fp=重新执行（w）；
re=新的RegExp（mgr0）；
if（重新测试（fp[1]））{
re=/.$/；
w=w。替换（重“”）；
}
}否则如果（re2.试验（w））{
var fp=re2.exec（w）；
stem=fp[1]；
re2=新的RegExp（s_v）；
if（re2.试验（阀杆））{
w=茎；
re2=/（在| bl | iz）$/；
re3=新的RegExp（“（[^aeiouylsz]）\\1$”；
re4=新的RegExp（“^C+v+”[^aeiouwxy]$”）；
如果（re2.test（w））{w=w+“e”；}
else如果（re3.test（w））{re=/.$/；w=w.replace（re，”；}
else如果（re4.test（w））{w=w+“e”；}
}
}
//步骤1c
re=/^（+？）y$/；
如果（重新测试（w））{
var fp=重新执行（w）；
stem=fp[1]；
re=新的RegExp（s_v）；
如果（重新测试（stem））{w=stem+“i”；}
}
//步骤2
re=/^（+？）（国家性的、国家性的、公正性的、公正性的、公正性的、理性的、理性的）；
如果（重新测试（w））{
var fp=重新执行（w）；
stem=fp[1]；
后缀=fp[2]；
re=新的RegExp（mgr0）；
if（重新测试（阀杆））{
w=阀杆+第2步列表[后缀]；
}
}
//步骤3
re=/^（+？）（文字化、文字化、文字化、文字化、文字化）$/；
如果（重新测试（w））{
var fp=重新执行（w）；
stem=fp[1]；
后缀=fp[2]；
re=新的RegExp（mgr0）；
if（重新测试（阀杆））{
w=阀杆+步骤3列表[后缀]；
}
}
//步骤4
re=/^（+？）（al | ance | ence | er | ic | able | ible | ant | ment | ent | ou | ism | ate | iti ous | ive | ize |；
re2=/^（+？）（s|t）（离子）$/；
如果（重新测试（w））{
var fp=重新执行（w）；
stem=fp[1]；
re=新的RegExp（mgr1）；
if（重新测试（阀杆））{
w=茎；
// Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
// paper, in
//
//  Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
//  no. 3, pp 130-137,
//
// see also http://www.tartarus.org/~martin/PorterStemmer

// Release 1 be 'andargor', Jul 2004
// Release 2 (substantially revised) by Christopher McKenzie, Aug 2009

var stemmer = (function(){
    var step2list = {
            "ational" : "ate",
            "tional" : "tion",
            "enci" : "ence",
            "anci" : "ance",
            "izer" : "ize",
            "bli" : "ble",
            "alli" : "al",
            "entli" : "ent",
            "eli" : "e",
            "ousli" : "ous",
            "ization" : "ize",
            "ation" : "ate",
            "ator" : "ate",
            "alism" : "al",
            "iveness" : "ive",
            "fulness" : "ful",
            "ousness" : "ous",
            "aliti" : "al",
            "iviti" : "ive",
            "biliti" : "ble",
            "logi" : "log"
        },

        step3list = {
            "icate" : "ic",
            "ative" : "",
            "alize" : "al",
            "iciti" : "ic",
            "ical" : "ic",
            "ful" : "",
            "ness" : ""
        },

        c = "[^aeiou]",          // consonant
        v = "[aeiouy]",          // vowel
        C = c + "[^aeiouy]*",    // consonant sequence
        V = v + "[aeiou]*",      // vowel sequence

        mgr0 = "^(" + C + ")?" + V + C,               // [C]VC... is m>0
        meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$",  // [C]VC[V] is m=1
        mgr1 = "^(" + C + ")?" + V + C + V + C,       // [C]VCVC... is m>1
        s_v = "^(" + C + ")?" + v;                   // vowel in stem

    return function (w) {
        var     stem,
            suffix,
            firstch,
            re,
            re2,
            re3,
            re4,
            origword = w;

        if (w.length < 3) { return w; }

        firstch = w.substr(0,1);
        if (firstch == "y") {
            w = firstch.toUpperCase() + w.substr(1);
        }

        // Step 1a
        re = /^(.+?)(ss|i)es$/;
        re2 = /^(.+?)([^s])s$/;

        if (re.test(w)) { w = w.replace(re,"$1$2"); }
        else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }

        // Step 1b
        re = /^(.+?)eed$/;
        re2 = /^(.+?)(ed|ing)$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            re = new RegExp(mgr0);
            if (re.test(fp[1])) {
                re = /.$/;
                w = w.replace(re,"");
            }
        } else if (re2.test(w)) {
            var fp = re2.exec(w);
            stem = fp[1];
            re2 = new RegExp(s_v);
            if (re2.test(stem)) {
                w = stem;
                re2 = /(at|bl|iz)$/;
                re3 = new RegExp("([^aeiouylsz])\\1$");
                re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
                if (re2.test(w)) {  w = w + "e"; }
                else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
                else if (re4.test(w)) { w = w + "e"; }
            }
        }

        // Step 1c
        re = /^(.+?)y$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            stem = fp[1];
            re = new RegExp(s_v);
            if (re.test(stem)) { w = stem + "i"; }
        }

        // Step 2
        re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            stem = fp[1];
            suffix = fp[2];
            re = new RegExp(mgr0);
            if (re.test(stem)) {
                w = stem + step2list[suffix];
            }
        }

        // Step 3
        re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            stem = fp[1];
            suffix = fp[2];
            re = new RegExp(mgr0);
            if (re.test(stem)) {
                w = stem + step3list[suffix];
            }
        }

        // Step 4
        re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
        re2 = /^(.+?)(s|t)(ion)$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            stem = fp[1];
            re = new RegExp(mgr1);
            if (re.test(stem)) {
                w = stem;
            }
        } else if (re2.test(w)) {
            var fp = re2.exec(w);
            stem = fp[1] + fp[2];
            re2 = new RegExp(mgr1);
            if (re2.test(stem)) {
                w = stem;
            }
        }

        // Step 5
        re = /^(.+?)e$/;
        if (re.test(w)) {
            var fp = re.exec(w);
            stem = fp[1];
            re = new RegExp(mgr1);
            re2 = new RegExp(meq1);
            re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
            if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
                w = stem;
            }
        }

        re = /ll$/;
        re2 = new RegExp(mgr1);
        if (re.test(w) && re2.test(w)) {
            re = /.$/;
            w = w.replace(re,"");
        }

        // and turn initial Y back to y

        if (firstch == "y") {
            w = firstch.toLowerCase() + w.substr(1);
        }

        return w;
    }
})();