Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/php/261.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
PHP Curl下载问题_Php_Arrays_Curl - Fatal编程技术网

PHP Curl下载问题

PHP Curl下载问题,php,arrays,curl,Php,Arrays,Curl,我有一个函数,它将url数组作为输入。我已经验证了url是正确的,我可以完美地循环它们。我还使用curl_getinfo验证了curl下载的页面是否正确。但是,curl(html)的输出对于每个页面都是相同的。这是我的密码: $urls = array(); $urls = getpages($mainpage); print_r($urls); foreach($urls as $link) { echo $link. '&l

我有一个函数,它将url数组作为输入。我已经验证了url是正确的,我可以完美地循环它们。我还使用curl_getinfo验证了curl下载的页面是否正确。但是,curl(html)的输出对于每个页面都是相同的。这是我的密码:

          $urls = array();
     $urls = getpages($mainpage);
     print_r($urls);
     foreach($urls as $link) {
         echo $link. '<br><br><br>';
         $circdl = my_curl($link);
         echo $circdl. '<br><br><br>';
         $circdl = NULL;
     }
$link还可以像curl_getinfo中的curl一样适当地输出。我已经通过这个循环运行了另一个url数组,它们工作得很好,但我怀疑这里的问题在于url的格式(符号)。我真的很困惑为什么这些页面没有按预期下载

下面是my_curl函数:

 function my_curl($url)
 {
$timeout=10;
$error_report=TRUE;
$curl = curl_init();
$cookiepath = drupal_get_path('module','mymodule'). '/cookies.txt';

// HEADERS AND OPTIONS APPEAR TO BE A FIREFOX BROWSER REFERRED BY GOOGLE
$header[] = "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // BROWSERS USUALLY LEAVE BLANK

// SET THE CURL OPTIONS - SEE http://php.net/manual/en/function.curl-setopt.php
curl_setopt( $curl, CURLOPT_URL,            $url  );
curl_setopt( $curl, CURLOPT_USERAGENT,      'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'  );
curl_setopt( $curl, CURLOPT_HTTPHEADER,     $header  );
curl_setopt( $curl, CURLOPT_REFERER,        'http://www.google.com'  );
curl_setopt( $curl, CURLOPT_ENCODING,       'gzip,deflate'  );
curl_setopt( $curl, CURLOPT_AUTOREFERER,    TRUE  );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE  );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE  );
curl_setopt( $curl, CURLOPT_COOKIEFILE,     $cookiepath );
curl_setopt( $curl, CURLOPT_COOKIEJAR,      $cookiepath );
curl_setopt( $curl, CURLOPT_TIMEOUT,        $timeout  );

// RUN THE CURL REQUEST AND GET THE RESULTS
$htm = curl_exec($curl);

// Check for page request

//$info = curl_getinfo($curl);
//echo 'Took ' . $info['total_time'] . ' seconds to send a request to ' . $info['url'];

// ON FAILURE HANDLE ERROR MESSAGE
if ($htm === FALSE)
{
    if ($error_report)
    {
        $err = curl_errno($curl);
        $inf = curl_getinfo($curl);
        echo "CURL FAIL: $url TIMEOUT=$timeout, CURL_ERRNO=$err";
        var_dump($inf);
    }
    curl_close($curl);
    return FALSE;
}

// ON SUCCESS RETURN XML / HTML STRING
curl_close($curl);
return $htm;
}

非常有趣的是,如果我运行这个:

 echo my_curl('http://www.site.com/savings/viewcircular?promotionId=81498&sneakpeek=&currentPageNumber=2')
输出正确!!??:(


感谢您的帮助!

我发现问题出在传递给我的函数的url的编码上。我错误地删除了编码并附加了一个“人类可读的”url结束。这导致主机无法正确识别页面。我如何解决这一问题的是忽略我更好的判断,而不使用编码。当数组传递后,页面已正确加载。感谢所有查看此页面的人。这真的让我感到困惑

下面是我的一段代码以供解释:

 function getpages($url) {
 global $host;
 $circdl = my_curl($url);
 $circqp = htmlqp($circdl,'body');
 //Extract last page number
 $lastpagenumber = $circqp->branch()->find('li[class="last-page"]')->text();
 $lastpagenumberurl = $circqp->branch()->find('li[class="last-page"]')->children('a')->attr('href');  
 //Extract page link root
 $pagelinkroot = substr_replace($lastpagenumberurl,"",-2);
 $currentpage = "=";               
 $lpn = intval($lastpagenumber);

 //Move through the remaining pages
 $pagelinks = array();
   for ($i = 1; $i <= $lpn; ++$i) {
   $pagelinks[] = join(array($host,$pagelinkroot,$currentpage,$i));
   }
   return $pagelinks;
 }
函数getpages($url){ 全球$东道国; $circdl=my_curl($url); $circqp=htmlqp($circdl,'body'); //提取最后页码 $lastpagenumber=$circqp->branch()->find('li[class=“last page”])->text(); $lastpagenumberrurl=$circqp->branch()->find('li[class=“last page”])->children('a')->attr('href'); //提取页面链接根 $pagelinkroot=substr\U replace($LASTPAGENUMBERRURL,“,-2); $currentpage=“=”; $lpn=intval($lastpagenumber); //浏览其余页面 $pagelinks=array();
对于($i=1;$i您可以发布
my_curl()的代码
方法,因为它似乎是保存相关代码的函数?我刚刚创建了一个包含两个页面的数组,并在循环中运行了它,结果很好。我能看到的唯一区别是$link变量显示的是:而不是这个。我肯定认为这是一个编码问题。
 function getpages($url) {
 global $host;
 $circdl = my_curl($url);
 $circqp = htmlqp($circdl,'body');
 //Extract last page number
 $lastpagenumber = $circqp->branch()->find('li[class="last-page"]')->text();
 $lastpagenumberurl = $circqp->branch()->find('li[class="last-page"]')->children('a')->attr('href');  
 //Extract page link root
 $pagelinkroot = substr_replace($lastpagenumberurl,"",-2);
 $currentpage = "=";               
 $lpn = intval($lastpagenumber);

 //Move through the remaining pages
 $pagelinks = array();
   for ($i = 1; $i <= $lpn; ++$i) {
   $pagelinks[] = join(array($host,$pagelinkroot,$currentpage,$i));
   }
   return $pagelinks;
 }