Php 在1次调用中加载外部XML文件并获取html头信息_Php_Xml_Html Head

Php 在1次调用中加载外部XML文件并获取html头信息

php xml

Php 在1次调用中加载外部XML文件并获取html头信息,php,xml,html-head,Php,Xml,Html Head,我有一个php文件，它从另一个站点获取一个xml文件，然后将该信息放入我的数据库我遇到的问题是，他们的网站在任何1小时内只允许360个请求，所以我试图对其进行编码，以便在抓取文件时检查标题信息我让它使用 $requesttest = 'http://www.footballwebpages.co.uk/teams.xml'; if($requesttest == NULL) return false; $ch = curl_init($requesttest); curl_setop

我有一个php文件，它从另一个站点获取一个xml文件，然后将该信息放入我的数据库

我遇到的问题是，他们的网站在任何1小时内只允许360个请求，所以我试图对其进行编码，以便在抓取文件时检查标题信息

我让它使用

$requesttest = 'http://www.footballwebpages.co.uk/teams.xml';
if($requesttest == NULL) return false;  
$ch = curl_init($requesttest);  
curl_setopt($ch, CURLOPT_TIMEOUT, 5);  
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);  
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);  
$data = curl_exec($ch);  
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);  
curl_close($ch); 

if($httpcode == 429){
    return 'Try again later, too many requests recieved.';
} else if($httpcode>=200 && $httpcode<300){
    /* run code to grab xml file */
    $comps = array (    0 => 1, /* Premier_League */
                    1 => 2 /* Championship */ 
                    );
    $comps_total = count($comps);
    $comps_no = 0;

    while ($comps_no < $comps_total) {
        $url = 'http://www.footballwebpages.co.uk/teams.xml?comp=' . $comps[$comps_no];
        $full_list = simplexml_load_file($url);
        /* Code for grabbing and storing info from XML */
} else {
    return 'Football Web Pages Offline';
}

$requesttest='1http://www.footballwebpages.co.uk/teams.xml';
如果（$requesttest==NULL），则返回false；
$ch=curl\u init（$requesttest）；
curl_setopt（$ch，CURLOPT_超时，5）；
curl_setopt（$ch，CURLOPT_CONNECTTIMEOUT，5）；
curl_setopt（$ch，CURLOPT_RETURNTRANSFER，true）；
$data=curl\u exec（$ch）；
$httpcode=curl\u getinfo（$ch，CURLINFO\u HTTP\u代码）；
卷曲关闭（$ch）；
如果（$httpcode==429）{
return“请稍后再试，收到的请求太多”；
}如果（$httpcode>=200&$httpcode 1，/*英超联赛*/
1=>2/*冠军*/
);
$comps_total=计数（$comps）；
$comps_no=0；
而（$comps_no<$comps_total）{
$url='1http://www.footballwebpages.co.uk/teams.xml?comp=“.$comps[$comps_no]；
$full_list=simplexml_load_文件（$url）；
/*用于从XML中获取和存储信息的代码*/
}否则{
返回“离线足球网页”；
}

目前，它检查主“teams”页面，查看是否已达到请求限制，然后获取竞赛集的每个xml。问题是，如果在第一次检查时，只有一个请求可用，当它进入下一个阶段时，它将失败。如何在加载xml文件时检查标题信息，而不必调用页面检查标题，然后调用页面获取xml文件

如果一次调用中的头代码介于200和300之间，则基本上加载xml文件，这样就不会浪费两个请求来获取一个xml页面。

您可能会使用类似于以下的方法，忘记对基本url的第一次调用，因为它是冗余的，而是使用函数的返回值来确定是否应进行进一步处理：

<?php
    /* utility function to get data and return an object */
    function getxml( $comp=1 ){
        global $ch;
        global $url;

        curl_setopt( $ch, CURLOPT_URL, $url . '?comp=' . $comp );
        $data = curl_exec( $ch );
        $status = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); 

        return (object)array(
            'xmldata'   =>  $data,
            'status'    =>  $status
        );
    }
    /* All the comps available - more than specified! */
    $comps=array( 
        'Barclays_Premier_League' => 1,
        'Sky_Bet_Championship' => 2,
        'Sky_Bet_League_One' => 3,
        'Sky_Bet_League_Two' => 4,
        'National_League' => 5,
        'National_League_North' => 6,
        'National_League_South' => 7,
        'Evo-Stik_Southern_League_Premier_Division' => 8,
        'Evo-Stik_Southern_League_Division_One_Central' => 9,
        'Evo-Stik_Southern_League_Division_One_South_&_West' => 10,
        'Ryman_League_Premier_Division' => 11,
        'Ryman_League_Division_One_North' => 12,
        'Ryman_League_Division_One_South' => 13,
        'Evo-Stik_League_Premier_Division' => 14,
        'Evo-Stik_League_Division_One_North' => 15,
        'Evo-Stik_League_Division_One_South' => 16,
        'Scottish_Premiership' => 17,
        'Scottish_Championship' => 18,
        'Scottish_League_One' => 19,
        'Scottish_League_Two' => 20
    );
    /* only interested in first two */
    $comps=array_slice( $comps, 0, 2, true );


    /* I don't use simple_xml() - used to process xml data */
    $dom=new DOMDocument;

    /* base url */
    $url= 'http://www.footballwebpages.co.uk/teams.xml';

    /* 
        initialise curl request object but 
        set the url for each $comp in the function 
    */
    $ch = curl_init();
    curl_setopt( $ch, CURLOPT_TIMEOUT, 5 );  
    curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );  
    curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );   

    /* 
    If there have been too many requests when launching 
    the 429 condition should break out of the entire loop -
    thus using only 1 request
    */
    foreach( $comps as $key => $comp ){
        $xml=getxml( $comp );
        switch( $xml->status ){
            case 429: echo 'Try again later, too many requests recieved.'; break 2;
            case 200:
                /* if everything is ok, process $xml */
                $dom->loadXML( $xml->xmldata );


                /* example of processing xml data */
                echo '
                <h1>'.$dom->getElementsByTagName('competition')->item(0)->nodeValue.'</h1>
                    <ul>';

                $col=$dom->getElementsByTagName('team');
                if( $col ){
                    foreach( $col as $team ) echo '<li>'.$team->childNodes->item(1)->nodeValue.', '.$team->childNodes->item(3)->nodeValue.'</li>';
                }
                echo '
                    </ul>';
            break;
            default:/* If no response or an unknown response exit */
                echo 'Football Web Pages Offline';
            break 2;
        }
    }

    curl_close( $ch ); 
    $dom=$ch=$comps=null;
?>

$data，
“状态”=>$status
);
}
/*所有可用组件-超过指定值*/
$comps=数组（
“巴克莱超级联赛”=>1，
“天空赌王锦标赛”=>2，
“天空打赌联盟第一”=>3，
“天空打赌联盟二号”=>4，
“国家联盟”=>5，
“全国北方联盟”=>6，
“国家联盟南部”=>7，
“Evo-Stik南部联赛超级联赛”=>8，
“Evo-Stikúu Southern戋u League戋Division戋u One戋u Central”=>9，
“Evo-Stik南部联盟分区南部和西部”=>10，
“莱曼联赛超级联赛”=>11，
“莱曼联盟北一区”=>12，
“莱曼联盟分区南部”=>13，
“Evo-Stik联赛超级联赛”=>14，
“Evo-Stik联赛分区一北”=>15，
“Evo-Stik联赛分区南部”=>16，
“苏格兰超级联赛”=>17，
“苏格兰足球锦标赛”=>18，
“苏格兰第一联赛”=>19，
“苏格兰二级联赛”=>20
);
/*只对前两个感兴趣*/
$comps=array\u slice（$comps，0，2，true）；
/*我不使用simple_xml（）-用于处理xml数据*/
$dom=新的DOMDocument；
/*基本url*/
$url='1http://www.footballwebpages.co.uk/teams.xml';
/* 
初始化curl请求对象，但
为函数中的每个$comp设置url
*/
$ch=curl_init（）；
curl_setopt（$ch，CURLOPT_超时，5）；
curl_setopt（$ch，CURLOPT_CONNECTTIMEOUT，5）；
curl_setopt（$ch，CURLOPT_RETURNTRANSFER，true）；
/* 
如果启动时请求过多
429条件应该打破整个循环-
因此仅使用1个请求
*/
foreach（$comps as$key=>$comp）{
$xml=getxml（$comp）；
开关（$xml->status）{
案例429:echo“稍后再试，收到的请求太多”；中断2；
案例200：
/*如果一切正常，请处理$xml*/
$dom->loadXML（$xml->xmldata）；
/*处理xml数据的示例*/
回声'
“.$dom->getElementsByTagName（'competition'）->item（0）->nodeValue。”
”；
$col=$dom->getElementsByTagName（'team'）；
如果（$col）{
foreach（$col as$team）回显“”。$team->childNodes->item（1）->nodeValue.”，“。$team->childNodes->item（3）->nodeValue.””；
}
回声'
'；
打破
默认值：/*如果没有响应或未知响应退出*/
echo“离线足球网页”；
破口2；
}
}
卷曲关闭（$ch）；
$dom=$ch=$comps=null；
?>

虽然（$comps\u no<$comps\u total）{

~循环没有增量-它会一直持续下去…你也不会关闭循环是的，我删掉了代码，因为它很长：）完整的代码中有增量