如何使用Javascript修改节点的内容？_Javascript_Jquery_Html_Regex

如何使用Javascript修改节点的内容？

javascript jquery html regex

如何使用Javascript修改节点的内容？,javascript,jquery,html,regex,Javascript,Jquery,Html,Regex,我需要使用Javascript做三件事：选择类为“foo”的所有节点查找这些节点中以“*”开头的所有单词用将这些单词包围起来，其中xyz是单词本身例如，内容： <ul> <li class="foo"> *abc def *ghi </li> <li class="bar"> abc *def *ghi </li> </ul> *abc def*ghi abc*def*ghi

我需要使用Javascript做三件事：

选择类为“foo”的所有节点

查找这些节点中以“*”开头的所有单词

用

将这些单词包围起来

，其中

xyz

是单词本身

例如，内容：

<ul>
  <li class="foo">
    *abc def *ghi
  </li>
  <li class="bar">
    abc *def *ghi
  </li>
</ul>



*abc def*ghi


abc*def*ghi

将成为

<ul>
  <li class="foo">
    <span class="abc">*abc</span> def <span class="ghi">*ghi</span>
  </li>
  <li class="bar">
    abc *def *ghi    <!-- Not part of a node with class "foo", so
  </li>                     no changes made. -->
</ul>



*abc def*ghi


abc*def*ghi

我该如何做到这一点？（附带说明，解决方案也涉及jQuery，但除此之外，我不希望包含任何其他依赖项。）

regexp看起来像这样（sed-ish语法）：

s/\*\（\w+\）\b\（？！[^\）/*\1/g

因此：

$（'li.foo'）。每个（函数（）{
var html=$（this.html（）；
html=html.replace（/\*（\w+）\b（？！[^）/g，“*$1”）；
$（this）.html（html）；
});

\*（\w+）\b

段是一个重要的部分。它找到一个星号，后跟一个或多个单词字符，后跟某种类型的单词终止符（例如，行尾或空格）。该单词被捕获到

$1

，然后用作文本和输出类

紧接着的部分（

（？！[^）

）是一个负向前看。它声明除非前面有一个打开的角括号，否则不跟随一个关闭的角括号。这会阻止字符串在HTML标记内的匹配。这不会处理格式错误的HTML，但无论如何也不应该如此。

不需要jQuery：

UE_replacer = function (node) {

   // just for performance, skip attribute and
   // comment nodes (types 2 and 8, respectively)
   if (node.nodeType == 2) return;
   if (node.nodeType == 8) return;

   // for text nodes (type 3), wrap words of the
   // form *xyzzy with a span that has class xyzzy
   if (node.nodeType == 3) {

      // in the actual text, the nodeValue, change
      // all strings ('g'=global) that start and end
      // on a word boundary ('\b') where the first
      // character is '*' and is followed by one or
      // more ('+'=one or more) 'word' characters
      // ('\w'=word character). save all the word
      // characters (that's what parens do) so that
      // they can be used in the replacement string
      // ('$1'=re-use saved characters).
      var text = node.nodeValue.replace(
            /\b\*(\w+)\b/g,
            '<span class="$1">*$1</span>'   // <== Wrong!
      );

      // set the new text back into the nodeValue
      node.nodeValue = text;
      return;
   }

   // for all other node types, call this function
   // recursively on all its child nodes
   for (var i=0; i<node.childNodes.length; ++i) {
      UE_replacer( node.childNodes[i] );
   }
}

// start the replacement on 'document', which is
// the root node
UE_replacer( document );

UE\u replacer=函数（节点）{
//为了提高性能，跳过属性和
//注释节点（分别为类型2和类型8）
if（node.nodeType==2）返回；
if（node.nodeType==8）返回；
//对于文本节点（类型3），将
//表单*xyzy，跨距为xyzy类
if（node.nodeType==3）{
//在实际文本中，nodeValue
//开始和结束的所有字符串（'g'=全局）
//在单词边界（'\b'）上，第一个
//字符为“*”，后跟一个或多个
//多个（“+”=一个或多个“word”字符
//（'\w'=单词字符）。保存所有单词
//角色（帕伦斯就是这么做的）这样
//它们可以在替换字符串中使用
//（“$1”=重复使用保存的字符）。
var text=node.nodeValue.replace(
/\b\*（\w+）\b/g，
'*$1'/不要尝试处理元素的innerHTML/html（）。这永远不会起作用，因为正则表达式的功能不足以解析html。只需遍历文本节点，查找所需内容：
// Replace words in text content, recursively walking element children.
//
function wrapWordsInDescendants(element, tagName, className) {
    for (var i= element.childNodes.length; i-->0;) {
        var child= element.childNodes[i];
        if (child.nodeType==1) // Node.ELEMENT_NODE
            wrapWordsInDescendants(child, tagName, className);
        else if (child.nodeType==3) // Node.TEXT_NODE
            wrapWordsInText(child, tagName, className);
    }
}

// Replace words in a single text node
//
function wrapWordsInText(node, tagName, className) {

    // Get list of *word indexes
    //
    var ixs= [];
    var match;
    while (match= starword.exec(node.data))
        ixs.push([match.index, match.index+match[0].length]);

    // Wrap each in the given element
    //
    for (var i= ixs.length; i-->0;) {
        var element= document.createElement(tagName);
        element.className= className;
        node.splitText(ixs[i][1]);
        element.appendChild(node.splitText(ixs[i][0]));
        node.parentNode.insertBefore(element, node.nextSibling);
    }
}
var starword= /(^|\W)\*\w+\b/g;

// Process all elements with class 'foo'
//
$('.foo').each(function() {
    wrapWordsInDescendants(this, 'span', 'xyz');
});


// If you're not using jQuery, you'll need the below bits instead of $...

// Fix missing indexOf method on IE
//
if (![].indexOf) Array.prototype.indexOf= function(item) {
    for (var i= 0; i<this.length; i++)
        if (this[i]==item)
            return i;
    return -1;
}

// Iterating over '*' (all elements) is not fast; if possible, reduce to
// all elements called 'li', or all element inside a certain element etc.
//
var elements= document.getElementsByTagName('*');
for (var i= elements.length; i-->0;)
    if (elements[i].className.split(' ').indexOf('foo')!=-1)
        wrapWordsInDescendants(elements[i], 'span', 'xyz');

//替换文本内容中的单词，递归遍历元素子元素。
//
函数wrapwordsin子体（元素、标记名、类名）{
对于（var i=element.childNodes.length；i-->0；）{
var child=element.childNodes[i]；
if（child.nodeType==1）//Node.ELEMENT\u Node
wrapwordsin子代（子代、标记名、类名）；
else if（child.nodeType==3）//Node.TEXT\u Node
wrapWordsInText（子级、标记名、类名）；
}
}
//替换单个文本节点中的单词
//
函数wrapWordsInText（节点、标记名、类名）{
//获取*单词索引的列表
//
var ixs=[]；
var匹配；
while（match=starword.exec（node.data））
推送（[match.index，match.index+match[0].length]）；
//将每个元素包装到给定的元素中
//
对于（变量i=ixs.length；i-->0；）{
var元素=document.createElement（标记名）；
element.className=className；
node.splitText（ixs[i][1]）；
element.appendChild（node.splitText（ixs[i][0]）；
node.parentNode.insertBefore（元素，节点.nextSibling）；
}
}
var starword=/（^ |\W）\*\W+\b/g；
//使用类“foo”处理所有元素
//
$（'.foo'）。每个（函数（）{
WrapWordsin后代（这是“span”、“xyz”）；
});
//如果不使用jQuery，则需要以下位而不是$。。。
//修复IE上缺少索引的方法
//
if（！[].indexOf）Array.prototype.indexOf=函数（项）{
对于（var i=0；i0；）
if（elements[i].className.split（“”）.indexOf（'foo'）！=-1）
wrapwordsin子体（元素[i]，'span'，'xyz'）；
使用任何库（例如prototype、jquery等）？是的！jQuery很好。我修改了问题以反映这一点。@Unknown Entity，您能否100%确定只有单词会出现在'foo'元素中？没有其他HTML标记隐藏在其中？此外，“foo”是否始终是元素，或者它可能是或或其他什么？在替换字符串中，javascript使用$1而不是\\1.此外，我建议使用text（）而不是html（），因为在
中的某个位置可能有一个与标记或属性同名的类。非常好！@系统暂停，我不相信使用text（）而不是html（）。如果更新text（），它将被转义，不？@strager，你是对的，text（t）将转义新span，因此它应该是html（t）。我正在尝试解决此问题：*abc.使用html（）在提取的文本中包含整个内容--*xyz被替换，导致无效的html。text（）将只生成*abc。@system PAUSE，啊，我明白你的意思。我稍后将处理regexp以满足你的要求。@system PAUSE，更新了我的答案。根据RegexBuddy工作（它使用.NET regexp AFAIK，但希望它也能与JS一起工作）。不完全确定，但这可能会出现以下情况：
@system:你能解释一下你的答案与strager的有什么不同吗？我很难看出两者之间的区别。@未知实体，system PAUSE的方法是迭代DOM节点并在文本节点上执行替换（并根据需要添加跨度）。我的方法只对单个节点中包含的HTML进行操作。两种方法在理论上都应该同样有效。@Unknown Entity，strager做对了，除了
UE_replacer = function (node) {

   // just for performance, skip attribute and
   // comment nodes (types 2 and 8, respectively)
   if (node.nodeType == 2) return;
   if (node.nodeType == 8) return;

   // for text nodes (type 3), wrap words of the
   // form *xyzzy with a span that has class xyzzy
   if (node.nodeType == 3) {

      // in the actual text, the nodeValue, change
      // all strings ('g'=global) that start and end
      // on a word boundary ('\b') where the first
      // character is '*' and is followed by one or
      // more ('+'=one or more) 'word' characters
      // ('\w'=word character). save all the word
      // characters (that's what parens do) so that
      // they can be used in the replacement string
      // ('$1'=re-use saved characters).
      var text = node.nodeValue.replace(
            /\b\*(\w+)\b/g,
            '<span class="$1">*$1</span>'   // <== Wrong!
      );

      // set the new text back into the nodeValue
      node.nodeValue = text;
      return;
   }

   // for all other node types, call this function
   // recursively on all its child nodes
   for (var i=0; i<node.childNodes.length; ++i) {
      UE_replacer( node.childNodes[i] );
   }
}

// start the replacement on 'document', which is
// the root node
UE_replacer( document );

// Replace words in text content, recursively walking element children.
//
function wrapWordsInDescendants(element, tagName, className) {
    for (var i= element.childNodes.length; i-->0;) {
        var child= element.childNodes[i];
        if (child.nodeType==1) // Node.ELEMENT_NODE
            wrapWordsInDescendants(child, tagName, className);
        else if (child.nodeType==3) // Node.TEXT_NODE
            wrapWordsInText(child, tagName, className);
    }
}

// Replace words in a single text node
//
function wrapWordsInText(node, tagName, className) {

    // Get list of *word indexes
    //
    var ixs= [];
    var match;
    while (match= starword.exec(node.data))
        ixs.push([match.index, match.index+match[0].length]);

    // Wrap each in the given element
    //
    for (var i= ixs.length; i-->0;) {
        var element= document.createElement(tagName);
        element.className= className;
        node.splitText(ixs[i][1]);
        element.appendChild(node.splitText(ixs[i][0]));
        node.parentNode.insertBefore(element, node.nextSibling);
    }
}
var starword= /(^|\W)\*\w+\b/g;

// Process all elements with class 'foo'
//
$('.foo').each(function() {
    wrapWordsInDescendants(this, 'span', 'xyz');
});


// If you're not using jQuery, you'll need the below bits instead of $...

// Fix missing indexOf method on IE
//
if (![].indexOf) Array.prototype.indexOf= function(item) {
    for (var i= 0; i<this.length; i++)
        if (this[i]==item)
            return i;
    return -1;
}

// Iterating over '*' (all elements) is not fast; if possible, reduce to
// all elements called 'li', or all element inside a certain element etc.
//
var elements= document.getElementsByTagName('*');
for (var i= elements.length; i-->0;)
    if (elements[i].className.split(' ').indexOf('foo')!=-1)
        wrapWordsInDescendants(elements[i], 'span', 'xyz');