Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/php/278.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
PHP域XPath问题_Php_Dom_Xpath - Fatal编程技术网

PHP域XPath问题

PHP域XPath问题,php,dom,xpath,Php,Dom,Xpath,我试图用html标记解析文本块,但我遇到了一些问题 <?php libxml_use_internal_errors(true); $html = ' <html> <body> <div> Message <b>bold</b>, <s>strike</s> </div> <div> <span clas

我试图用html标记解析文本块,但我遇到了一些问题

<?php
    libxml_use_internal_errors(true);
    $html = '
<html>
<body>
    <div>
        Message <b>bold</b>, <s>strike</s>
    </div>
    <div>
        <span class="how">
            <a href="link" title="text">Link</a>, <b> BOLD </b>
        </span>
    </div>
</body>
</html>
    ';

    $dom = new DOMDocument();
    $dom->preserveWhiteSpace = false;
    $dom->strictErrorChecking = false;
    $dom->recover = true;
    $dom->loadHTML($html);        

    function getMessages($element, $xpath)
    {
        $messages = array();

        $children = $element->childNodes;        

        foreach ($children as $child) 
        { 

            if(strtolower($child->nodeName) == 'div')
            {
                // my functions
            }
            else
            if ($child->nodeType == XML_TEXT_NODE)
            {
                $text = trim(DOMinnerHTML($element));
                if($text)
                {
                    $messages[] = array('type' => 'text', 'text' => $text);
                }
            }
        }

        return $messages;
    }

    function DOMinnerHTML($element) 
    {
        $innerHTML = null; 
        $children = $element->childNodes;

        foreach ($children as $child) 
        {
            $tmp_dom = new DOMDocument(); 
            $tmp_dom->appendChild($tmp_dom->importNode($child, true)); 
            $innerHTML .= trim($tmp_dom->saveHTML()); 
        } 
        return $innerHTML; 
    } 

    $xpath = new DOMXPath($dom);
    $messagesXpath = $xpath->query("//div");

    $messages = array();
    $i = 0;
    foreach($messagesXpath as $message)
    {
        $messages[] = getMessages($message, $xpath);
        $i++;
        if ($i == 2)
        break;
    }

    var_dump($messages);  

我认为您误解了哪些元素是被迭代的,因为您选择了所有
s,然后将每个元素传递给
getMessages
。但是,在
getMessages
中,您然后迭代每个
XML\u TEXT\u节点
childNodes,这就是双重复制的来源

让我们以HTML为例:

<div>
    Message <b>bold</b>, <s>strike</s>
</div>

以上所有的代码对于这个问题都是必要的吗?你能把它减少到相关的部分吗?太多了。顺便说一句,你为什么不使用herdoc?@SalmanPK,这对问题有什么关系?
if ($child->nodeType == XML_TEXT_NODE)
{
    $text = trim(DOMinnerHTML($element));
    if($text)
    {
          $messages[] = array('type' => 'text', 'text' => $text);
    }
}
<div>
    Message <b>bold</b>, <s>strike</s>
</div>
<?php
    libxml_use_internal_errors(true);
    $html = <<<HTML
<html>
<body>
    <div>
        Message <b>bold</b>, <s>strike</s>
    </div>
    <div>
        <span class="how">
            <a href="link" title="text">Link</a>, <b> BOLD </b>
        </span>
    </div>
</body>
</html>
HTML;

    $dom = new DOMDocument();
    $dom->preserveWhiteSpace = false;
    $dom->strictErrorChecking = false;
    $dom->recover = true;
    $dom->loadHTML($html);

    function getMessages($allDivs) {
        $messages = array();

        foreach ($allDivs as $div)  {
            if ($div->nodeType == XML_ELEMENT_NODE) {
                $messages[] = trim(DOMinnerHTML($div));
            }
        }

        return $messages;
    }

    function DOMinnerHTML($element) {
        $innerHTML = null;
        $children = $element->childNodes;

        foreach ($children as $child) {
            $tmp_dom = new DOMDocument();
            $tmp_dom->appendChild($tmp_dom->importNode($child, true));
            $innerHTML .= trim($tmp_dom->saveHTML());
        }
        return $innerHTML;
    }

    $xpath = new DOMXPath($dom);
    $messagesXpath = $xpath->query("//div");

    $messages[] = getMessages($messagesXpath);

    print_r($messages);
?>