PHP剥离<;p>;及<;span>;包含空格的标记

PHP剥离<;p>;及<;span>;包含空格的标记,php,regex,Php,Regex,我在PHP变量中保存了一些HTML字符串,其中包含如下字符串 "Some random text <p>&nbsp;<span></span>&nbsp;</p> and the random text continues" "<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the

我在PHP变量中保存了一些HTML字符串,其中包含如下字符串

"Some random text <p>&nbsp;<span></span>&nbsp;</p> and the random text continues"
"<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the <span> </span>"
“一些随机文本

,随机文本继续” “一些随机的

和”
如何去掉包含空格的
标记?对这样的事情:

"Some random text and the random text continues"
"<p>Some random</p> and the "
“一些随机文本,随机文本继续”
“一些随机的

和”
您需要使用递归:

$data = <<<'EOD'
Some random text <p>&nbsp;<span> </span>&nbsp;</p> and the random text continues
<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the <span> </span>
EOD;

$pattern = '~<(p|span)>(?>\s+|&nbsp;|(?R))*</\1>~';

$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#  
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~

使用DOMDocument,您可以执行以下操作:

function removeEmptyTags($html, $tags = false) {
    $state = libxml_use_internal_errors(true);
    $dom = new DOMDocument;
    $dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);

    $xp = new DOMXPath($dom);
    $xp->registerNamespace('php', 'http://php.net/xpath');
    $xp->registerPHPFunctions('isEmpty');

    $predicate = '';
    if ($tags)
        $predicate = '[' . implode(' or ', array_map(function($i) {
            return 'name()="' . $i . '"';
        }, $tags)) . ']';

    $nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');

    foreach ($nodeList as $node) {
        $node->parentNode->removeChild($node);
    }

    $result = '';
    foreach ($dom->documentElement->childNodes as $node) {
        $result .= $dom->saveHTML($node);
    }

    return $result;
}

function isEmpty($txt) {
    return preg_match('~^(?:\s+|&nbsp;)*$~iu', $txt) ? true : false;
}

echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);

您需要使用递归:

$data = <<<'EOD'
Some random text <p>&nbsp;<span> </span>&nbsp;</p> and the random text continues
<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the <span> </span>
EOD;

$pattern = '~<(p|span)>(?>\s+|&nbsp;|(?R))*</\1>~';

$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#  
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~

使用DOMDocument,您可以执行以下操作:

function removeEmptyTags($html, $tags = false) {
    $state = libxml_use_internal_errors(true);
    $dom = new DOMDocument;
    $dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);

    $xp = new DOMXPath($dom);
    $xp->registerNamespace('php', 'http://php.net/xpath');
    $xp->registerPHPFunctions('isEmpty');

    $predicate = '';
    if ($tags)
        $predicate = '[' . implode(' or ', array_map(function($i) {
            return 'name()="' . $i . '"';
        }, $tags)) . ']';

    $nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');

    foreach ($nodeList as $node) {
        $node->parentNode->removeChild($node);
    }

    $result = '';
    foreach ($dom->documentElement->childNodes as $node) {
        $result .= $dom->saveHTML($node);
    }

    return $result;
}

function isEmpty($txt) {
    return preg_match('~^(?:\s+|&nbsp;)*$~iu', $txt) ? true : false;
}

echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);

您需要使用递归:

$data = <<<'EOD'
Some random text <p>&nbsp;<span> </span>&nbsp;</p> and the random text continues
<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the <span> </span>
EOD;

$pattern = '~<(p|span)>(?>\s+|&nbsp;|(?R))*</\1>~';

$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#  
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~

使用DOMDocument,您可以执行以下操作:

function removeEmptyTags($html, $tags = false) {
    $state = libxml_use_internal_errors(true);
    $dom = new DOMDocument;
    $dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);

    $xp = new DOMXPath($dom);
    $xp->registerNamespace('php', 'http://php.net/xpath');
    $xp->registerPHPFunctions('isEmpty');

    $predicate = '';
    if ($tags)
        $predicate = '[' . implode(' or ', array_map(function($i) {
            return 'name()="' . $i . '"';
        }, $tags)) . ']';

    $nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');

    foreach ($nodeList as $node) {
        $node->parentNode->removeChild($node);
    }

    $result = '';
    foreach ($dom->documentElement->childNodes as $node) {
        $result .= $dom->saveHTML($node);
    }

    return $result;
}

function isEmpty($txt) {
    return preg_match('~^(?:\s+|&nbsp;)*$~iu', $txt) ? true : false;
}

echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);

您需要使用递归:

$data = <<<'EOD'
Some random text <p>&nbsp;<span> </span>&nbsp;</p> and the random text continues
<p>Some random</p> <p>&nbsp;<span></span>&nbsp;</p> and the <span> </span>
EOD;

$pattern = '~<(p|span)>(?>\s+|&nbsp;|(?R))*</\1>~';

$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#  
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~

使用DOMDocument,您可以执行以下操作:

function removeEmptyTags($html, $tags = false) {
    $state = libxml_use_internal_errors(true);
    $dom = new DOMDocument;
    $dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);

    $xp = new DOMXPath($dom);
    $xp->registerNamespace('php', 'http://php.net/xpath');
    $xp->registerPHPFunctions('isEmpty');

    $predicate = '';
    if ($tags)
        $predicate = '[' . implode(' or ', array_map(function($i) {
            return 'name()="' . $i . '"';
        }, $tags)) . ']';

    $nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');

    foreach ($nodeList as $node) {
        $node->parentNode->removeChild($node);
    }

    $result = '';
    foreach ($dom->documentElement->childNodes as $node) {
        $result .= $dom->saveHTML($node);
    }

    return $result;
}

function isEmpty($txt) {
    return preg_match('~^(?:\s+|&nbsp;)*$~iu', $txt) ? true : false;
}

echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);

你试过什么吗?我试过:“#(\s | |)*#”但它只会删除p ta