Php 在1次调用中加载外部XML文件并获取html头信息
我有一个php文件,它从另一个站点获取一个xml文件,然后将该信息放入我的数据库 我遇到的问题是,他们的网站在任何1小时内只允许360个请求,所以我试图对其进行编码,以便在抓取文件时检查标题信息 我让它使用Php 在1次调用中加载外部XML文件并获取html头信息,php,xml,html-head,Php,Xml,Html Head,我有一个php文件,它从另一个站点获取一个xml文件,然后将该信息放入我的数据库 我遇到的问题是,他们的网站在任何1小时内只允许360个请求,所以我试图对其进行编码,以便在抓取文件时检查标题信息 我让它使用 $requesttest = 'http://www.footballwebpages.co.uk/teams.xml'; if($requesttest == NULL) return false; $ch = curl_init($requesttest); curl_setop
$requesttest = 'http://www.footballwebpages.co.uk/teams.xml';
if($requesttest == NULL) return false;
$ch = curl_init($requesttest);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if($httpcode == 429){
return 'Try again later, too many requests recieved.';
} else if($httpcode>=200 && $httpcode<300){
/* run code to grab xml file */
$comps = array ( 0 => 1, /* Premier_League */
1 => 2 /* Championship */
);
$comps_total = count($comps);
$comps_no = 0;
while ($comps_no < $comps_total) {
$url = 'http://www.footballwebpages.co.uk/teams.xml?comp=' . $comps[$comps_no];
$full_list = simplexml_load_file($url);
/* Code for grabbing and storing info from XML */
} else {
return 'Football Web Pages Offline';
}
$requesttest='1http://www.footballwebpages.co.uk/teams.xml';
如果($requesttest==NULL),则返回false;
$ch=curl\u init($requesttest);
curl_setopt($ch,CURLOPT_超时,5);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,5);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
$data=curl\u exec($ch);
$httpcode=curl\u getinfo($ch,CURLINFO\u HTTP\u代码);
卷曲关闭($ch);
如果($httpcode==429){
return“请稍后再试,收到的请求太多”;
}如果($httpcode>=200&$httpcode 1,/*英超联赛*/
1=>2/*冠军*/
);
$comps_total=计数($comps);
$comps_no=0;
而($comps_no<$comps_total){
$url='1http://www.footballwebpages.co.uk/teams.xml?comp=“.$comps[$comps_no];
$full_list=simplexml_load_文件($url);
/*用于从XML中获取和存储信息的代码*/
}否则{
返回“离线足球网页”;
}
目前,它检查主“teams”页面,查看是否已达到请求限制,然后获取竞赛集的每个xml。问题是,如果在第一次检查时,只有一个请求可用,当它进入下一个阶段时,它将失败。如何在加载xml文件时检查标题信息,而不必调用页面检查标题,然后调用页面获取xml文件
如果一次调用中的头代码介于200和300之间,则基本上加载xml文件,这样就不会浪费两个请求来获取一个xml页面。您可能会使用类似于以下的方法,忘记对基本url的第一次调用,因为它是冗余的,而是使用函数的返回值来确定是否应进行进一步处理:
<?php
/* utility function to get data and return an object */
function getxml( $comp=1 ){
global $ch;
global $url;
curl_setopt( $ch, CURLOPT_URL, $url . '?comp=' . $comp );
$data = curl_exec( $ch );
$status = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
return (object)array(
'xmldata' => $data,
'status' => $status
);
}
/* All the comps available - more than specified! */
$comps=array(
'Barclays_Premier_League' => 1,
'Sky_Bet_Championship' => 2,
'Sky_Bet_League_One' => 3,
'Sky_Bet_League_Two' => 4,
'National_League' => 5,
'National_League_North' => 6,
'National_League_South' => 7,
'Evo-Stik_Southern_League_Premier_Division' => 8,
'Evo-Stik_Southern_League_Division_One_Central' => 9,
'Evo-Stik_Southern_League_Division_One_South_&_West' => 10,
'Ryman_League_Premier_Division' => 11,
'Ryman_League_Division_One_North' => 12,
'Ryman_League_Division_One_South' => 13,
'Evo-Stik_League_Premier_Division' => 14,
'Evo-Stik_League_Division_One_North' => 15,
'Evo-Stik_League_Division_One_South' => 16,
'Scottish_Premiership' => 17,
'Scottish_Championship' => 18,
'Scottish_League_One' => 19,
'Scottish_League_Two' => 20
);
/* only interested in first two */
$comps=array_slice( $comps, 0, 2, true );
/* I don't use simple_xml() - used to process xml data */
$dom=new DOMDocument;
/* base url */
$url= 'http://www.footballwebpages.co.uk/teams.xml';
/*
initialise curl request object but
set the url for each $comp in the function
*/
$ch = curl_init();
curl_setopt( $ch, CURLOPT_TIMEOUT, 5 );
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
/*
If there have been too many requests when launching
the 429 condition should break out of the entire loop -
thus using only 1 request
*/
foreach( $comps as $key => $comp ){
$xml=getxml( $comp );
switch( $xml->status ){
case 429: echo 'Try again later, too many requests recieved.'; break 2;
case 200:
/* if everything is ok, process $xml */
$dom->loadXML( $xml->xmldata );
/* example of processing xml data */
echo '
<h1>'.$dom->getElementsByTagName('competition')->item(0)->nodeValue.'</h1>
<ul>';
$col=$dom->getElementsByTagName('team');
if( $col ){
foreach( $col as $team ) echo '<li>'.$team->childNodes->item(1)->nodeValue.', '.$team->childNodes->item(3)->nodeValue.'</li>';
}
echo '
</ul>';
break;
default:/* If no response or an unknown response exit */
echo 'Football Web Pages Offline';
break 2;
}
}
curl_close( $ch );
$dom=$ch=$comps=null;
?>
$data,
“状态”=>$status
);
}
/*所有可用组件-超过指定值*/
$comps=数组(
“巴克莱超级联赛”=>1,
“天空赌王锦标赛”=>2,
“天空打赌联盟第一”=>3,
“天空打赌联盟二号”=>4,
“国家联盟”=>5,
“全国北方联盟”=>6,
“国家联盟南部”=>7,
“Evo-Stik南部联赛超级联赛”=>8,
“Evo-Stikúu Southern戋u League戋Division戋u One戋u Central”=>9,
“Evo-Stik南部联盟分区南部和西部”=>10,
“莱曼联赛超级联赛”=>11,
“莱曼联盟北一区”=>12,
“莱曼联盟分区南部”=>13,
“Evo-Stik联赛超级联赛”=>14,
“Evo-Stik联赛分区一北”=>15,
“Evo-Stik联赛分区南部”=>16,
“苏格兰超级联赛”=>17,
“苏格兰足球锦标赛”=>18,
“苏格兰第一联赛”=>19,
“苏格兰二级联赛”=>20
);
/*只对前两个感兴趣*/
$comps=array\u slice($comps,0,2,true);
/*我不使用simple_xml()-用于处理xml数据*/
$dom=新的DOMDocument;
/*基本url*/
$url='1http://www.footballwebpages.co.uk/teams.xml';
/*
初始化curl请求对象,但
为函数中的每个$comp设置url
*/
$ch=curl_init();
curl_setopt($ch,CURLOPT_超时,5);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,5);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
/*
如果启动时请求过多
429条件应该打破整个循环-
因此仅使用1个请求
*/
foreach($comps as$key=>$comp){
$xml=getxml($comp);
开关($xml->status){
案例429:echo“稍后再试,收到的请求太多”;中断2;
案例200:
/*如果一切正常,请处理$xml*/
$dom->loadXML($xml->xmldata);
/*处理xml数据的示例*/
回声'
“.$dom->getElementsByTagName('competition')->item(0)->nodeValue。”
”;
$col=$dom->getElementsByTagName('team');
如果($col){
foreach($col as$team)回显“- ”。$team->childNodes->item(1)->nodeValue.”,“。$team->childNodes->item(3)->nodeValue.”
”;
}
回声'
';
打破
默认值:/*如果没有响应或未知响应退出*/
echo“离线足球网页”;
破口2;
}
}
卷曲关闭($ch);
$dom=$ch=$comps=null;
?>
虽然($comps\u no<$comps\u total){
~循环没有增量-它会一直持续下去…你也不会关闭循环是的,我删掉了代码,因为它很长:)完整的代码中有增量