Javascript 具有不同端点的正则表达式模式匹配_Javascript_Regex

Javascript 具有不同端点的正则表达式模式匹配

javascript regex

Javascript 具有不同端点的正则表达式模式匹配,javascript,regex,Javascript,Regex,我想通过javascript从下面的字符串列表中提取具有特定模式的子字符串但我在设置正则表达式模式方面有问题输入字符串列表搜索？w=tot&DA=YZR&t\u nil\u searchbox=btn&sug=&o=&q=%EB%B9%84%EC%BD%98 搜索？q=%EB%B9%84%EC%BD%98&go=%EC%A0…4%EB%B9%84%EC%BD%98&sc=8-2&sp=-1&sk=&cvid=f05407c5bcb9496990d2874135aee8e9 其中=nexea

我想通过javascript从下面的字符串列表中提取具有特定模式的子字符串

但我在设置正则表达式模式方面有问题

输入字符串列表

搜索？w=tot&DA=YZR&t\u nil\u searchbox=btn&sug=&o=&

q=%EB%B9%84%EC%BD%98

搜索？

q=%EB%B9%84%EC%BD%98

&go=%EC%A0…4%EB%B9%84%EC%BD%98&sc=8-2&sp=-1&sk=&cvid=f05407c5bcb9496990d2874135aee8e9

其中=nexearch&

query=%EB%B9%84%EC%BD%98

&sm=top\hty&fbm=0&ie=utf8

预期模式匹配结果

%EB%B9%84%EC%BD%98

用于上述情况

正则表达式

/（query | q）=.*

+此处的附加正则表达式+

它的终点将是

或

首次出现&

问题:

我应该为额外的正则表达式写什么

你可以测试一下。谢谢。

将第一个捕获组转到非捕获组，然后添加一个否定字符类，而不是

\b(?:query|q)=([^&\n]*)

将第一个捕获组转到非捕获组，然后添加一个否定字符类，而不是

\b(?:query|q)=([^&\n]*)

我个人建议另一种方法，使用更程序化的函数来匹配所需的参数值，而不是“简单”的正则表达式。虽然一开始它可能看起来更复杂，但如果您将来需要找到不同的或附加的参数值，它确实允许轻松扩展

也就是说：

/* haystack:
     String, the string in which you're looking for the
     parameter-values,
   needles:
     Array, the parameters whose values you're looking for
*/
function queryGrab(haystack, needles) {
  // creating a regular expression from the array of needles,
  // given an array of ['q','query'], this will result in:
  // /^(q)|(query)/gi
  var reg = new RegExp('^(' + needles.join(')|(') + ')', 'gi'),

    // finding either the index of the '?' character in the haystack:
    queryIndex = haystack.indexOf('?'),

    // getting the substring from the haystack, starting
    // after the '?' character:
    keyValues = haystack.substring(queryIndex + 1)
      // splitting that string on the '&' characters,
      // to form an array:
      .split('&')
      // filtering that array (with Array.prototype.filter()),
      // the 'keyValue' argument is the current array-element
      // from the array over which we're iterating:
      .filter(function(keyValue) {
        // if RegExp.prototype.test() returns true,
        // meaning the supplied string ('keyValue')
        // is matched by the created regular expression,
        // the current element is retained in the filtered
        // array:
        return reg.test(keyValue);
    // converting that filtered-array to a string
    // on the naive assumption each searched-string
    // should return only one match:
    }).toString();

  // returning a substring of the keyValue, from after
  // the position of the '=' character:
  return keyValues.substring(keyValues.indexOf('=') + 1);
}

// essentially irrelevant, just for the purposes of
// providing a demonstration; here we get all the
// elements of class="haystack":
var haystacks = document.querySelectorAll('.haystack'),

  // the parameters we're looking for:
  needles = ['q', 'query'],

  // an 'empty' variable for later use:
  retrieved;

// using Array.prototype.forEach() to iterate over, and
// perform a function on, each of the .haystack elements
// (using Function.prototype.call() to use the array-like
// NodeList instead of an array):
Array.prototype.forEach.call(haystacks, function(stack) {
  // like filter(), the variable is the current array-element

  // retrieved caches the found parameter-value (using
  // a variable because we're using it twice):
  retrieved = queryGrab(stack.textContent, needles);

  // setting the next-sibling's text:
  stack.nextSibling.nodeValue = '(found: ' + retrieved + ')';

  // updating the HTML of the current node, to allow for
  // highlighting:
  stack.innerHTML = stack.textContent.replace(retrieved, '<span class="found">$&</span>');
});


搜索？w=总计&；DA=YZR&；t\u nil\u searchbox=btn&；sug=&；o=&；q=%EB%B9%84%EC%BD%98

搜索？q=%EB%B9%84%EC%BD%98&；go=%EC%A0…4%EB%B9%84%EC%BD%98&；sc=8-2&；标准普尔=-1&；sk=&；cvid=f05407c5bcb9496990d2874135aee8e9

其中=nexearch&；查询=%EB%B9%84%EC%BD%98&；sm=顶部和顶部；fbm=0&；ie=utf8

也就是说：

/* haystack:
     String, the string in which you're looking for the
     parameter-values,
   needles:
     Array, the parameters whose values you're looking for
*/
function queryGrab(haystack, needles) {
  // creating a regular expression from the array of needles,
  // given an array of ['q','query'], this will result in:
  // /^(q)|(query)/gi
  var reg = new RegExp('^(' + needles.join(')|(') + ')', 'gi'),

    // finding either the index of the '?' character in the haystack:
    queryIndex = haystack.indexOf('?'),

    // getting the substring from the haystack, starting
    // after the '?' character:
    keyValues = haystack.substring(queryIndex + 1)
      // splitting that string on the '&' characters,
      // to form an array:
      .split('&')
      // filtering that array (with Array.prototype.filter()),
      // the 'keyValue' argument is the current array-element
      // from the array over which we're iterating:
      .filter(function(keyValue) {
        // if RegExp.prototype.test() returns true,
        // meaning the supplied string ('keyValue')
        // is matched by the created regular expression,
        // the current element is retained in the filtered
        // array:
        return reg.test(keyValue);
    // converting that filtered-array to a string
    // on the naive assumption each searched-string
    // should return only one match:
    }).toString();

  // returning a substring of the keyValue, from after
  // the position of the '=' character:
  return keyValues.substring(keyValues.indexOf('=') + 1);
}

// essentially irrelevant, just for the purposes of
// providing a demonstration; here we get all the
// elements of class="haystack":
var haystacks = document.querySelectorAll('.haystack'),

  // the parameters we're looking for:
  needles = ['q', 'query'],

  // an 'empty' variable for later use:
  retrieved;

// using Array.prototype.forEach() to iterate over, and
// perform a function on, each of the .haystack elements
// (using Function.prototype.call() to use the array-like
// NodeList instead of an array):
Array.prototype.forEach.call(haystacks, function(stack) {
  // like filter(), the variable is the current array-element

  // retrieved caches the found parameter-value (using
  // a variable because we're using it twice):
  retrieved = queryGrab(stack.textContent, needles);

  // setting the next-sibling's text:
  stack.nextSibling.nodeValue = '(found: ' + retrieved + ')';

  // updating the HTML of the current node, to allow for
  // highlighting:
  stack.innerHTML = stack.textContent.replace(retrieved, '<span class="found">$&</span>');
});


搜索？w=总计&；DA=YZR&；t\u nil\u searchbox=btn&；sug=&；o=&；q=%EB%B9%84%EC%BD%98

搜索？q=%EB%B9%84%EC%BD%98&；go=%EC%A0…4%EB%B9%84%EC%BD%98&；sc=8-2&；标准普尔=-1&；sk=&；cvid=f05407c5bcb9496990d2874135aee8e9

其中=nexearch&；查询=%EB%B9%84%EC%BD%98&；sm=顶部和顶部；fbm=0&；ie=utf8

regexp不是解析这些查询字符串的最佳方法。有库和工具，但如果您想自己做：

function parseQueryString(url) {
    return _.object(url .              // build an object from pairs
        split('?')[1]   .              // take the part after the ?
        split('&')      .              // split it by &
        map(function(str) {            // turn parts into 2-elt array
            return str.split('=');     // broken at =
        })
    );
}

这使用了下划线的

。对象，它从键/值对数组中创建一个对象，但是如果不想使用它，可以在几行中编写自己的等价物
现在，您正在寻找的价值只是
params = parseQueryString(url);
return params.q || params.query;

regexp不是解析这些查询字符串的最佳方法。有库和工具，但如果您想自己做：
function parseQueryString(url) {
    return _.object(url .              // build an object from pairs
        split('?')[1]   .              // take the part after the ?
        split('&')      .              // split it by &
        map(function(str) {            // turn parts into 2-elt array
            return str.split('=');     // broken at =
        })
    );
}

这使用了下划线的。对象，它从键/值对数组中创建一个对象，但是如果不想使用它，可以在几行中编写自己的等价物
现在，您正在寻找的价值只是
params = parseQueryString(url);
return params.q || params.query;

这将返回以query
结尾的任何参数的值，例如xquery
，这可能不是一件好事。添加单词边界。。现在不会了。我真的不能同意使用这样的正则表达式来解析查询字符串。它太脆了。举一个人为的例子，假设我的查询参数名为foo$query
。这是一个完全有效的参数名。然而，您的regexp将拾取它，就好像它是query
。在regexps中，您正在使用具有特定含义的\b
单词边界，而单词边界在上下文中表示其他含义。我不认为使用regexp解析查询字符串比使用JS、CSS、HTML或任何其他“语言”更有效，是的，查询字符串是一种小语言。至少，您应该将\b
替换为（？：^ |&）
。巧合的是，在您的情况下，这不会拾取结果，因为在query
之前有一个空格，这实际上使它成为一个无效的查询字符串，在这种情况下，不返回任何内容应被视为是正确的行为。这将返回以query
结尾的任何参数的值，例如xquery
，这可能不是一件好事。添加一个单词边界。。现在不会了。我真的不能同意使用这样的正则表达式来解析查询字符串。它太脆了。举一个人为的例子，假设我的查询参数名为foo$query
。这是一个完全有效的参数名。然而，您的regexp将拾取它，就好像它是query
。在regexps中，您正在使用具有特定含义的\b
单词边界，而单词边界在上下文中表示其他含义。我不认为使用regexp解析查询字符串比使用JS、CSS、HTML或任何其他“语言”更有效，是的，查询字符串是一种小语言。至少，您应该将\b
替换为（？：^ |&）
。巧合的是，在您的情况下，这不会得到结果，因为在查询
之前有一个空格，这实际上使它成为一个无效的查询字符串，在这种情况下，不返回任何内容应该被认为是正确的行为。