PHP没有正确使用cURL解析rss
我只想得到'channel'标签的名称,即channel…当我使用它解析来自Google的rss时,脚本工作正常………但是当我将它用于其他提供商时,它会给出一个输出'#text',而不是给出预期输出的'channel'。下面是我的脚本请帮助我PHP没有正确使用cURL解析rss,php,xml,rss,Php,Xml,Rss,我只想得到'channel'标签的名称,即channel…当我使用它解析来自Google的rss时,脚本工作正常………但是当我将它用于其他提供商时,它会给出一个输出'#text',而不是给出预期输出的'channel'。下面是我的脚本请帮助我 $url = 'http://ibnlive.in.com/ibnrss/rss/sports/cricket.xml'; $get = perform_curl($url); $xml = new DOMDocument();
$url = 'http://ibnlive.in.com/ibnrss/rss/sports/cricket.xml';
$get = perform_curl($url);
$xml = new DOMDocument();
$xml -> loadXML($get['remote_content']);
$fetch = $xml -> documentElement;
$gettitle = $fetch -> firstChild -> nodeName;
echo $gettitle;
function perform_curl($rss_feed_provider_url){
$url = $rss_feed_provider_url;
$curl_handle = curl_init();
// Do we have a cURL session?
if ($curl_handle) {
// Set the required CURL options that we need.
// Set the URL option.
curl_setopt($curl_handle, CURLOPT_URL, $url);
// Set the HEADER option. We don't want the HTTP headers in the output.
curl_setopt($curl_handle, CURLOPT_HEADER, false);
// Set the FOLLOWLOCATION option. We will follow if location header is present.
curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, true);
// Instead of using WRITEFUNCTION callbacks, we are going to receive the remote contents as a return value for the curl_exec function.
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, true);
// Try to fetch the remote URL contents.
// This function will block until the contents are received.
$remote_contents = curl_exec($curl_handle);
// Do the cleanup of CURL.
curl_close($curl_handle);
$remote_contents = utf8_encode($remote_contents);
$handle = @simplexml_load_string($remote_contents);
$return_result = array();
if(is_object($handle)){
$return_result['handle'] = true;
$return_result['remote_content'] = $remote_contents;
return $return_result;
}
else{
$return_result['handle'] = false;
$return_result['content_error'] = 'INVALID RSS SOURCE, PLEASE CHECK IF THE SOURCE IS A VALID XML DOCUMENT.';
return $return_result;
}
} // End of if ($curl_handle)
else{
$return_result['curl_error'] = 'CURL INITIALIZATION FAILED.';
return false;
}
}
它给出一个输出“#text”,而不是给出预期输出的“channel”
这是因为$fetch->firstChild->nodeType
是3,这是一个文本节点
或只是一些文本。您可以通过以下方式选择频道:
echo $fetch->getElementsByTagName('channel')->item(0)->nodeName;
及
给你
string(5) "
"
或空格和一个新行符号,由于格式原因,该符号恰好出现在xml标记之间
ps:您链接的RSS提要在验证失败请看一下XML——它已经用空格打印出来了,因此被正确解析。根节点的第一个子节点是文本节点。如果您想更轻松地使用它,我建议使用,或者使用对DomDocument的查询来获取感兴趣的标记 下面是使用SimpleXML的方法
$xml = new SimpleXMLElement($get['remote_content']);
print $xml->channel[0]->title;
$xml = new SimpleXMLElement($get['remote_content']);
print $xml->channel[0]->title;