如何使用PHP删除空标记;新的DomDocument()“;?
我正在使用PHP“newdomdocument()”。我想从指定的效果中删除内容为空的标记。但是下面的代码不起作用。我该怎么做如何使用PHP删除空标记;新的DomDocument()“;?,php,domdocument,Php,Domdocument,我正在使用PHP“newdomdocument()”。我想从指定的效果中删除内容为空的标记。但是下面的代码不起作用。我该怎么做 $html = '<blockquote> <p>Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s,</p> </blockquote> <p>8</p> <hr> <p>
$html = '<blockquote>
<p>Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s,</p>
</blockquote>
<p>8</p>
<hr>
<p></p>
<strong></strong>
<a href="" title="Link Name" target="_blank"></a>
<img src="tex.png" />
<span></span>
<ul><li></li></ul>
<ol><li></li></ol>
<em></em>
<u></u>
<s></s>
<blockquote></blockquote>
<p> </p>';
$dom = new DomDocument();
$dom->loadHTML($html);
$xpath = new DOMXPath($dom);
// Selects tags to be processed.
$tags_list = $xpath->query("//p|//br|//a|//strong|//img|//ul|//ol|//li|//em|//u|//s|//hr|//blockquote");
foreach($tags_list as $tag) {
// Checks and deletes tags with empty content.
if( in_array($tag->tagName, ['p','a','strong','blockquote']) ){
if( $tag->nodeValue == "" ){
$tag->parentNode->removeChild($tag);
}
}
}
$cleanHtml = $dom->saveHTML();
echo $cleanHtml;
$html='1!'
自16世纪以来,Lorem Ipsum一直是行业标准的虚拟文本
八,
';
$dom=新的DomDocument();
$dom->loadHTML($html);
$xpath=newdomxpath($dom);
//选择要处理的标记。
$tags|u list=$xpath->query(//p |//br |//a |//strong//img |//ul |//ol |//li |//em |//u |//s |//hr |//blockquote);
foreach($tags\u列表为$tag){
//检查并删除内容为空的标记。
if(在数组中($tag->tagName,['p','a','strong','blockquote'])){
如果($tag->nodeValue==“”){
$tag->parentNode->removeChild($tag);
}
}
}
$cleanHtml=$dom->saveHTML();
echo$cleanHtml;
所需结果:
<blockquote>
<p>Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s,</p>
</blockquote>
<p>8</p>
<hr />
自16世纪以来,Lorem Ipsum一直是行业标准的虚拟文本
八,
我对代码进行了编辑。现在运行顺利。但是我正在尝试删除
标记。
@Tygo我想使用现在可用的$xpath->query
查询来执行此操作。您能编辑您的问题并发布预期的输出吗?@JackFleeting我对代码进行了编辑。现在运行顺利。但我正在尝试删除“”标记。
<?php
error_reporting(0);
$html = "<blockquote>
<p>Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s,</p>
</blockquote>
<p>8</p>
<hr>
<p></p>
<strong></strong>
<a href=\"\" title=\"Link Name\" target=\"_blank\"></a>
<img src=\"tex.png\" />
<span></span>
<ul><li></li></ul>
<ol><li></li></ol>
<em></em>
<u></u>
<s></s>
<blockquote></blockquote>
<p> </p>";
$dom = new DomDocument();
$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NODEFDTD);
$xpath = new DOMXPath($dom);
foreach($xpath->query('//p|//br|//a|//strong|//img|//ul|//ol|//li|//em|//u|//s|//hr|//blockquote') as $tag) {
// Sadece belirtilen etiketlere işlem yapılır.
if( in_array($tag->tagName, ['p','br','strong','ul','ol','li','em','u','s','hr','blockquote']) ){
// Etiketin öznitelikleri varsa işlem devam eder.
if( $tag->hasAttributes() ){
// Etiketin tüm öznitelikleri döngüye alınır.
foreach (iterator_to_array($tag->attributes) as $all_attribute_detail) {
// Etiketin tüm öznitelikleri siler.
$tag->removeAttribute($all_attribute_detail->name);
}
}
}
// Sadece belirtilen "img" etiketine işlem yapılır.
if( $tag->tagName == 'img'){
// Etiketin öznitelikleri varsa işlem devam eder.
if( $tag->hasAttributes() ){
// Etiketin src özniteliği blob: | data: | //:0 ve boş ise tespit eder ve etiketi siler.
preg_match('/(^blob:|^data:|^\/\/:0|^$)/', trim($tag->getAttribute('src')), $matches);
count($matches[0]) ? $tag->parentNode->removeChild($tag) : "";
// Etiketin tüm öznitelikleri döngüye alınır.
foreach (iterator_to_array($tag->attributes) as $img_attribute_detail) {
// İzin verilenler haricindeki tüm öznitelikleri siler.
if( !in_array($img_attribute_detail->name, ['src','alt']) ){
// Etiketin tüm öznitelikleri silinir.
$tag->removeAttribute($img_attribute_detail->name);
}
}
}
}
// Sadece belirtilen "a" etiketine işlem yapılır.
if( $tag->tagName == 'a'){
// Etiketin öznitelikleri varsa işlem devam eder.
if( $tag->hasAttributes() ){
// Etiketin src özniteliği blob: | data: | //:0 ve boş ise tespit eder ve etiketi siler.
empty(trim($tag->getAttribute('href'))) ? $tag->parentNode->removeChild($tag) : "";
// Etiketin tüm öznitelikleri döngüye alınır.
foreach (iterator_to_array($tag->attributes) as $a_attribute_detail) {
// İzin verilenler haricindeki tüm öznitelikleri siler.
if( !in_array($a_attribute_detail->name, ['href','target','title']) ){
// Etiketin tüm öznitelikleri silinir.
$tag->removeAttribute($a_attribute_detail->name);
}
$tag->setAttribute('rel', 'nofollow noopener');
}
}
}
}
foreach($xpath->query('//*[not(*) and not(@*) and not(text()[normalize-space()])]') as $tag) {
if( !in_array($tag->tagName, ['hr','br']) ){
$tag->parentNode->removeChild($tag);
}
}
$cleanHtml = $dom->saveHTML();
$cleanHtml = preg_replace('~<(\w+)[^>]*>(?>[\p{Z}\p{C}]|<br\b[^>]*>|&(?:(?:nb|thin|zwnb|e[nm])sp|zwnj|#xfeff|#xa0|#160|#65279);|(?R))*</\1>~iu',"",$cleanHtml);
$cleanHtml = strip_tags($cleanHtml,'<p><br><a><strong><img><ul><ol><li><em><u><s><hr><blockquote>');
echo $cleanHtml;
?>