Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/php/255.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Php XPath不检索某些内容_Php_Xpath - Fatal编程技术网

Php XPath不检索某些内容

Php XPath不检索某些内容,php,xpath,Php,Xpath,我是一个新手,试图编写一个爬虫程序,从论坛中获取一些统计数据 这是我的密码: <?php $ch = curl_init(); $timeout = 0; // set to zero for no timeout curl_setopt ($ch, CURLOPT_URL, 'http://m.jeuxvideo.com/forums/42-51-61913988-1-0-1-0-je-code-un-bot-pour-le-forom-je-vous-le-montre-en-act

我是一个新手,试图编写一个爬虫程序,从论坛中获取一些统计数据

这是我的密码:

<?php

$ch = curl_init();
$timeout = 0; // set to zero for no timeout
curl_setopt ($ch, CURLOPT_URL, 'http://m.jeuxvideo.com/forums/42-51-61913988-1-0-1-0-je-code-un-bot-pour-le-forom-je-vous-le-montre-en-action.htm');
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$file_contents = curl_exec($ch);
curl_close($ch);


$dom = new DOMDocument;
libxml_use_internal_errors(true);
$dom->loadHTML($file_contents);

$xpath = new DOMXPath($dom);
$posts = $xpath->query("//div[@class='who-post']/a");//$elements = $xpath->query("/html/body/div[@id='yourTagIdHere']");
$dates = $xpath->query("//div[@class='date-post']");//$elements = $xpath->query("/html/body/div[@id='yourTagIdHere']");
$contents = $xpath->query("//div[@class='message  text-enrichi-fmobile  text-crop-fmobile']/p");//$elements = $xpath->query("/html/body/div[@id='yourTagIdHere']");



$i = 0;
foreach ($posts as $post) {

    $nodes = $post->childNodes;

    foreach ($nodes as $node) {
    $value = trim($node->nodeValue);

      $tab[$i]['author'] = $value;
      $i++;


    }

}

$i = 0;

foreach ($dates as $date) {

    $nodes = $date->childNodes;
    foreach ($nodes as $node) {
      $value = trim($node->nodeValue);

      $tab[$i]['date'] = $value;
      $i++;
    }

}

$i = 0;

foreach ($contents as $content) {

    $nodes = $content->childNodes;
    foreach ($nodes as $node) {
      $value = $node->nodeValue;

      echo $value;

        $tab[$i]['content'] = trim($value);
        $i++;


    }

}

?>
<h1>Participants</h2>
<pre>
<?php 
print_r($tab);
?>
</pre>
loadHTML($file\u contents);
$xpath=newdomxpath($dom);
$posts=$xpath->query(//div[@class='who-post']/a)//$elements=$xpath->query(“/html/body/div[@id='yourTagIdHere']”);
$dates=$xpath->query(//div[@class='date-post'])//$elements=$xpath->query(“/html/body/div[@id='yourTagIdHere']”);
$contents=$xpath->query(“//div[@class='messagetext-enrichi-fmobile text-crop-fmobile']/p”)//$elements=$xpath->query(“/html/body/div[@id='yourTagIdHere']”);
$i=0;
foreach($posts作为$post){
$nodes=$post->childNodes;
foreach($node作为$node){
$value=trim($node->nodeValue);
$tab[$i]['author']=$value;
$i++;
}
}
$i=0;
foreach($日期作为$日期){
$nodes=$date->childNodes;
foreach($node作为$node){
$value=trim($node->nodeValue);
$tab[$i]['date']=$value;
$i++;
}
}
$i=0;
foreach($contents作为$content){
$nodes=$content->childNodes;
foreach($node作为$node){
$value=$node->nodeValue;
echo美元价值;
$tab[$i]['content']=trim($value);
$i++;
}
}
?>
参与者
如您所见,代码不会检索某些内容。例如,我正在尝试从以下位置检索此内容:

第二篇文章是一张图片,我的代码不起作用

另一方面,我想我犯了一些错误,我觉得我的代码很难看


您能帮我吗?

您只需先选择帖子,然后使用以下方法分别获取每个子数据:

  • 结合检索纯文本
  • 与组合以检索消息段落
代码:


无关提示:删除网站的HTML本身并不违法,但未经他们同意,你应该避免在自己的应用程序/网站上显示他们的数据。此外,如果他们决定更改HTML结构/CSS类名,这可能会随时中断。

感谢您的编码课程。。。令人惊叹的。。。真正地但只是一个小问题,它仍然不能与图片一起工作。如果您能给我们一个示例,说明您希望您的输出是什么样子的,那将非常有帮助。
$xpath = new DOMXPath($dom);
$postsElements = $xpath->query('//*[@class="post"]');

$posts = [];
foreach ($postsElements as $postElement) {
  $author = $xpath->evaluate('normalize-space(.//*[@class="who-post"])', $postElement);
  $date = $xpath->evaluate('normalize-space(.//*[@class="date-post"])', $postElement);

  $message = '';
  foreach ($xpath->query('.//*[contains(@class, "message")]/p', $postElement) as $messageParagraphElement) {
    $message .= $dom->saveHTML($messageParagraphElement);
  }

  $posts[] = (object)compact('author', 'date', 'message');
}

print_r($posts);