如何使用php DOMDocument从子节点获取文本
我一直在编写一个php代码来从一个站点获取信息,到目前为止我能够获取href属性,但是我找不到从子节点“span”获取文本的方法,有人能帮我吗 html->如何使用php DOMDocument从子节点获取文本,php,web-crawler,domdocument,Php,Web Crawler,Domdocument,我一直在编写一个php代码来从一个站点获取信息,到目前为止我能够获取href属性,但是我找不到从子节点“span”获取文本的方法,有人能帮我吗 html-> <a class="js-publication" href="publication/247931167"> <span class="publication-title">An approach for textual authoring</span> </a> 你可以用
<a class="js-publication" href="publication/247931167">
<span class="publication-title">An approach for textual authoring</span>
</a>
你可以用
@$dom->loadHTMLFile($curPage);
$anchors = $dom->getElementsByTagName('a');
foreach ($anchors as $element) {
$class_ = $element->getAttribute('class');
if (0 !== strpos($class_, 'js-publication')) {
$href = $element->getAttribute('href');
if(0 === stripos($href,'publication/')){
echo $href;//link para a publicação;
echo "\n";
}
}
}
$html = <<< LOL
<a class="js-publication" href="publication/247931167">
<span class="publication-title">An approach for textual authoring</span>
</a>
LOL;
$dom = new DOMDocument();
$dom->loadHTML($html);
$xpath = new DOMXpath($dom);
foreach ($xpath->query("//a[@class='js-publication']") as $element){
echo $element->getAttribute('href');
echo $element->textContent;
}
//publication/247931167
//An approach for textual authoring
echo $xpath->query("//a[@class='js-publication']/span")[0]->textContent;
echo $xpath->query("//a[@class='js-publication']")[0]->getAttribute('href');