Php 避免使用box xpath和curl的标题
Iam正在编写代码以使用xpath和curl获取web数据 这些代码得到了ulli的包含和使用 但是我不想得到头球 我编写以下代码以避免出现标题,但无法Php 避免使用box xpath和curl的标题,php,html,curl,xpath,domdocument,Php,Html,Curl,Xpath,Domdocument,Iam正在编写代码以使用xpath和curl获取web数据 这些代码得到了ulli的包含和使用 但是我不想得到头球 我编写以下代码以避免出现标题,但无法 if($row->item(0)->tagName != '<ul class="graybg"><li>مدل خودرو</li> <li>مشخصات</li><li>قیمت نمایندگی</li><li>قیمت بازا
if($row->item(0)->tagName != '<ul class="graybg"><li>مدل خودرو</li> <li>مشخصات</li><li>قیمت نمایندگی</li><li>قیمت بازار آزاد</li></ul>')
<代码>如果(<代码>如果($ch = curl_init ("http://www.pedal.ir/price/");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_ENCODING, 'UTF-8');
$page = curl_exec($ch);
$dom = new DOMDocument('1.0', 'utf-8');
libxml_use_internal_errors(true);
$dom->loadHTML($page);
libxml_clear_errors();
$xpath = new DOMXpath($dom);
$data = array();
$table_rows = $xpath- >query('/html/body/div/div[1]/div/div/div/div/div/div/div[2]/ul '); // target the row (the browser rendered <tbody>, but actually it really doesnt have one)
if($table_rows->length <= 0) { // exit if not found
echo 'no table rows found';
exit;
}
foreach($table_rows as $tr) { // foreach row
$row = $tr->childNodes;
if($row->item(0)->tagName != '<ul class="graybg"><li>مدل خودرو</li> <li>مشخصات</li><li>قیمت نمایندگی</li><li>قیمت بازار آزاد</li></ul>') { // avoid headers
$data[] = array(
'moled' =>trim($row->item(0)->nodeValue),
'detail' => trim($row->item(2)->nodeValue),
'pricenama' => trim($row->item(4)->nodeValue),
'pricebaza' => trim($row->item(6)->nodeValue),
);
}
}
echo '<pre>';
print_r($data);;
$ch=curl\u init(“http://www.pedal.ir/price/");
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0(Windows;U;Windows NT 5.1;en-US;rv:1.8.1.13)Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch,CURLOPT_头,0);
curl_setopt($ch,CURLOPT_编码,'UTF-8');
$page=curl\u exec($ch);
$dom=新的DOMDocument('1.0','utf-8');
libxml\u使用\u内部错误(true);
$dom->loadHTML($page);
libxml_clear_errors();
$xpath=newdomxpath($dom);
$data=array();
$table_rows=$xpath->query('/html/body/div/div[1]/div/div/div/div/div/div/div[2]/ul');//以行为目标(浏览器呈现,但实际上它没有)
如果($table_行->长度子节点;
如果(Ro行-($Ro行->项目(0)中的(($Ro行-($Ro行->项目(0))))->((($Ro行-(($Ro行-(($Ro行-(($Ro行-((($Ro行-<(($Ro行-((($Ro行-<(($Ro行-((((((((($)从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从从)0)0)0))))))->((((((((<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'){//避免标题
$data[]=数组(
“moled”=>trim($row->item(0)->nodeValue),
“细节”=>trim($row->item(2)->nodeValue),
“pricenama”=>trim($row->item(4)->nodeValue),
“pricebaza”=>trim($row->item(6)->nodeValue),
);
}
}
回声';
打印($data);;
您可以将谓词[非(@class)]
添加到xpath中,以筛选出
具有类
属性:
foreach($table_rows as $tr) { // foreach row
$row = $tr->childNodes;
if($row->item(0)->parentNode->getAttribute('class') !== 'graybg') { // avoid headers
$data[] = array(
'moled' =>trim($row->item(0)->nodeValue),
'detail' => trim($row->item(2)->nodeValue),
'pricenama' => trim($row->item(4)->nodeValue),
'pricebaza' => trim($row->item(6)->nodeValue),
);
}
}
无论如何,绝对路径是不可靠的,因为它往往会由于HTML源代码的轻微更改而中断。请尝试在元素的
id
或类
的基础上构建xpath。作为一种替代方法,由于标头具有标识它的不同类,因此可以将其包含在检查中:
foreach($table_rows as $tr) { // foreach row
$row = $tr->childNodes;
if($row->item(0)->parentNode->getAttribute('class') !== 'graybg') { // avoid headers
$data[] = array(
'moled' =>trim($row->item(0)->nodeValue),
'detail' => trim($row->item(2)->nodeValue),
'pricenama' => trim($row->item(4)->nodeValue),
'pricebaza' => trim($row->item(6)->nodeValue),
);
}
}