PHP剥离<;p>;及<;span>;包含空格的标记
我在PHP变量中保存了一些HTML字符串,其中包含如下字符串PHP剥离<;p>;及<;span>;包含空格的标记,php,regex,Php,Regex,我在PHP变量中保存了一些HTML字符串,其中包含如下字符串 "Some random text <p> <span></span> </p> and the random text continues" "<p>Some random</p> <p> <span></span> </p> and the
"Some random text <p> <span></span> </p> and the random text continues"
"<p>Some random</p> <p> <span></span> </p> and the <span> </span>"
“一些随机文本,随机文本继续”
“一些随机的和”
如何去掉包含空格的
和
标记?对这样的事情:
"Some random text and the random text continues"
"<p>Some random</p> and the "
“一些随机文本,随机文本继续”
“一些随机的和”
您需要使用递归:
$data = <<<'EOD'
Some random text <p> <span> </span> </p> and the random text continues
<p>Some random</p> <p> <span></span> </p> and the <span> </span>
EOD;
$pattern = '~<(p|span)>(?>\s+| |(?R))*</\1>~';
$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~
使用DOMDocument,您可以执行以下操作:
function removeEmptyTags($html, $tags = false) {
$state = libxml_use_internal_errors(true);
$dom = new DOMDocument;
$dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$xp = new DOMXPath($dom);
$xp->registerNamespace('php', 'http://php.net/xpath');
$xp->registerPHPFunctions('isEmpty');
$predicate = '';
if ($tags)
$predicate = '[' . implode(' or ', array_map(function($i) {
return 'name()="' . $i . '"';
}, $tags)) . ']';
$nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');
foreach ($nodeList as $node) {
$node->parentNode->removeChild($node);
}
$result = '';
foreach ($dom->documentElement->childNodes as $node) {
$result .= $dom->saveHTML($node);
}
return $result;
}
function isEmpty($txt) {
return preg_match('~^(?:\s+| )*$~iu', $txt) ? true : false;
}
echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);
您需要使用递归:
$data = <<<'EOD'
Some random text <p> <span> </span> </p> and the random text continues
<p>Some random</p> <p> <span></span> </p> and the <span> </span>
EOD;
$pattern = '~<(p|span)>(?>\s+| |(?R))*</\1>~';
$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~
使用DOMDocument,您可以执行以下操作:
function removeEmptyTags($html, $tags = false) {
$state = libxml_use_internal_errors(true);
$dom = new DOMDocument;
$dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$xp = new DOMXPath($dom);
$xp->registerNamespace('php', 'http://php.net/xpath');
$xp->registerPHPFunctions('isEmpty');
$predicate = '';
if ($tags)
$predicate = '[' . implode(' or ', array_map(function($i) {
return 'name()="' . $i . '"';
}, $tags)) . ']';
$nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');
foreach ($nodeList as $node) {
$node->parentNode->removeChild($node);
}
$result = '';
foreach ($dom->documentElement->childNodes as $node) {
$result .= $dom->saveHTML($node);
}
return $result;
}
function isEmpty($txt) {
return preg_match('~^(?:\s+| )*$~iu', $txt) ? true : false;
}
echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);
您需要使用递归:
$data = <<<'EOD'
Some random text <p> <span> </span> </p> and the random text continues
<p>Some random</p> <p> <span></span> </p> and the <span> </span>
EOD;
$pattern = '~<(p|span)>(?>\s+| |(?R))*</\1>~';
$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~
使用DOMDocument,您可以执行以下操作:
function removeEmptyTags($html, $tags = false) {
$state = libxml_use_internal_errors(true);
$dom = new DOMDocument;
$dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$xp = new DOMXPath($dom);
$xp->registerNamespace('php', 'http://php.net/xpath');
$xp->registerPHPFunctions('isEmpty');
$predicate = '';
if ($tags)
$predicate = '[' . implode(' or ', array_map(function($i) {
return 'name()="' . $i . '"';
}, $tags)) . ']';
$nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');
foreach ($nodeList as $node) {
$node->parentNode->removeChild($node);
}
$result = '';
foreach ($dom->documentElement->childNodes as $node) {
$result .= $dom->saveHTML($node);
}
return $result;
}
function isEmpty($txt) {
return preg_match('~^(?:\s+| )*$~iu', $txt) ? true : false;
}
echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);
您需要使用递归:
$data = <<<'EOD'
Some random text <p> <span> </span> </p> and the random text continues
<p>Some random</p> <p> <span></span> </p> and the <span> </span>
EOD;
$pattern = '~<(p|span)>(?>\s+| |(?R))*</\1>~';
$result = preg_replace($pattern, '', $data);
echo $result;
$data=#打开一个原子组:所有必须忽略的内容
\s+#空格
|#或
#
|#或
(?R)#递归
)*#重复原子群
#结束标记:带有对捕获组1的反向引用
~
使用DOMDocument,您可以执行以下操作:
function removeEmptyTags($html, $tags = false) {
$state = libxml_use_internal_errors(true);
$dom = new DOMDocument;
$dom->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$xp = new DOMXPath($dom);
$xp->registerNamespace('php', 'http://php.net/xpath');
$xp->registerPHPFunctions('isEmpty');
$predicate = '';
if ($tags)
$predicate = '[' . implode(' or ', array_map(function($i) {
return 'name()="' . $i . '"';
}, $tags)) . ']';
$nodeList = $xp->query('//*'. $predicate . '[php:functionString("isEmpty", .)]');
foreach ($nodeList as $node) {
$node->parentNode->removeChild($node);
}
$result = '';
foreach ($dom->documentElement->childNodes as $node) {
$result .= $dom->saveHTML($node);
}
return $result;
}
function isEmpty($txt) {
return preg_match('~^(?:\s+| )*$~iu', $txt) ? true : false;
}
echo removeEmptyTags($data, ['p', 'span']);
函数removemptytags($html,$tags=false){
$state=libxml\u使用\u内部错误(true);
$dom=新的DOMDocument;
$dom->loadHTML($html,LIBXML_html_NODEFDTD | LIBXML_html_NOIMPLIED);
$xp=新的DOMXPath($dom);
$xp->registerNamespace('php','http://php.net/xpath');
$xp->RegisterHPFunctions('isEmpty');
$predicate='';
如果($tags)
$predicate='['.内爆('or',数组_映射(函数$i){
返回'name()='。$i'';
},$tags))。]';
$nodeList=$xp->query('//*'.$predicate'.[php:functionString(“isEmpty”,)]');
foreach($nodelistas$node){
$node->parentNode->removeChild($node);
}
$result='';
foreach($dom->documentElement->childNodes作为$node){
$result.=$dom->saveHTML($node);
}
返回$result;
}
函数isEmpty($txt){
返回preg_match('~^(?:\s+|)*$~iu',$txt)?true:false;
}
echo removemptytags($data,['p','span']);
你试过什么吗?我试过:“#(\s | |)*#”但它只会删除p ta