Php 从阻止CURL的页面获取HTML

Php 从阻止CURL的页面获取HTML,php,curl,fopen,Php,Curl,Fopen,我被要求从一个页面上抓取某一行,但似乎网站已经阻止了CURL请求 有问题的地点是 我试着改变UserAgent,看看他们是否阻止了它,但似乎没有奏效 我使用的代码如下: <?php $curl_handle=curl_init(); //This is the URL you would like the content grabbed from curl_setopt($curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0"); curl_setop

我被要求从一个页面上抓取某一行,但似乎网站已经阻止了CURL请求

有问题的地点是

我试着改变UserAgent,看看他们是否阻止了它,但似乎没有奏效

我使用的代码如下:

<?php

$curl_handle=curl_init();
//This is the URL you would like the content grabbed from
curl_setopt($curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0");
curl_setopt($curl_handle,CURLOPT_URL,'http://www.habbo.com/home/Intricat');
//This is the amount of time in seconds until it times out, this is useful if the server you are requesting data from is down. This way you can offer a "sorry page"
curl_setopt($curl_handle,CURLOPT_CONNECTTIMEOUT,2);

curl_setopt($curl_handle,CURLOPT_RETURNTRANSFER,1);
$buffer = curl_exec($curl_handle);
//This Keeps everything running smoothly
curl_close($curl_handle);

// Change the message bellow as you wish, please keep in mind you must have your message within the " " Quotes.
if (empty($buffer))
{
    print "Sorry, It seems our weather resources are currently unavailable, please check back later.";
}
else
{
    print $buffer;
}
?>

如果他们阻止了CURL请求,我可以从该页面获取一行代码吗


编辑:通过我的服务器运行curl-i时,站点似乎首先设置了cookie?

进入浏览器并复制要发送的确切标题, 由于请求看起来完全相同,该站点无法判断您正在尝试卷曲。
如果使用Cookie,请将其作为标题附加。

使用浏览器复制要发送的确切标题, 由于请求看起来完全相同,该站点无法判断您正在尝试卷曲。
如果使用Cookie,请将其作为标题附加。

这是我多年前做的Curl类的剪切粘贴,希望你能自己从中挑选一些宝石

function get_url($url)
{ 
    curl_setopt ($this->ch, CURLOPT_URL, $url); 
    curl_setopt ($this->ch, CURLOPT_USERAGENT, $this->user_agent);
    curl_setopt ($this->ch, CURLOPT_COOKIEFILE, $this->cookie_name);
    curl_setopt ($this->ch, CURLOPT_COOKIEJAR, $this->cookie_name);
    if(!is_null($this->referer))
    {
        curl_setopt ($this->ch, CURLOPT_REFERER, $this->referer);  
    }
    curl_setopt ($this->ch, CURLOPT_SSL_VERIFYHOST, 2);
    curl_setopt ($this->ch, CURLOPT_HEADER, 0); 
    if($this->follow)
    {
        curl_setopt ($this->ch, CURLOPT_FOLLOWLOCATION, 1);
    }
    else
    {
        curl_setopt ($this->ch, CURLOPT_FOLLOWLOCATION, 0);
    }
    curl_setopt ($this->ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt ($this->ch, CURLOPT_HTTPHEADER, array("Accept: text/html,text/vnd.wap.wml,*.*"));
    curl_setopt ($this->ch, CURLOPT_SSL_VERIFYPEER, FALSE);  // this line makes it work under https

    $try=0;
    $result="";
    while( ($try<=$this->retry_attempts) && (empty($result)) )  // force a retry upto 5 times
    {
        $try++;
        $result = curl_exec($this->ch);
        $this->response=curl_getinfo($this->ch);
        // $response['http_code'] 4xx is an error
    }
    // set refering URL to current url for next page.
    if($this->referer_to_last) $this->set_referer($url);

    return $result; 
}
函数get_url($url) { curl_setopt($this->ch,CURLOPT_URL,$URL); curl\u setopt($this->ch,CURLOPT\u USERAGENT,$this->user\u agent); curl\u setopt($this->ch,CURLOPT\u COOKIEFILE,$this->cookie\u name); curl\u setopt($this->ch,CURLOPT\u COOKIEJAR,$this->cookie\u name); 如果(!为空($this->referer)) { curl\u setopt($this->ch,CURLOPT\u REFERER,$this->REFERER); } curl_setopt($this->ch,CURLOPT_SSL_VERIFYHOST,2); curl_setopt($this->ch,CURLOPT_头,0); 如果($this->follow) { curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,1); } 其他的 { curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,0); } curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($this->ch,CURLOPT_HTTPHEADER,array(“Accept:text/html,text/vnd.wap.wml,**)); curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,FALSE);//这一行使它在https下工作 $try=0; $result=“”; while($tryretry_尝试次数)&&&(empty($result))//强制重试最多5次 { $try++; $result=curl\u exec($this->ch); $this->response=curl\u getinfo($this->ch); //$response['http_code']4xx是一个错误 } //将引用URL设置为下一页的当前URL。 如果($this->referer\u to\u last)$this->set\u referer($url); 返回$result; }
这是我几年前参加的Curl课程的剪贴画,希望你能自己从中挑选一些宝石

function get_url($url)
{ 
    curl_setopt ($this->ch, CURLOPT_URL, $url); 
    curl_setopt ($this->ch, CURLOPT_USERAGENT, $this->user_agent);
    curl_setopt ($this->ch, CURLOPT_COOKIEFILE, $this->cookie_name);
    curl_setopt ($this->ch, CURLOPT_COOKIEJAR, $this->cookie_name);
    if(!is_null($this->referer))
    {
        curl_setopt ($this->ch, CURLOPT_REFERER, $this->referer);  
    }
    curl_setopt ($this->ch, CURLOPT_SSL_VERIFYHOST, 2);
    curl_setopt ($this->ch, CURLOPT_HEADER, 0); 
    if($this->follow)
    {
        curl_setopt ($this->ch, CURLOPT_FOLLOWLOCATION, 1);
    }
    else
    {
        curl_setopt ($this->ch, CURLOPT_FOLLOWLOCATION, 0);
    }
    curl_setopt ($this->ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt ($this->ch, CURLOPT_HTTPHEADER, array("Accept: text/html,text/vnd.wap.wml,*.*"));
    curl_setopt ($this->ch, CURLOPT_SSL_VERIFYPEER, FALSE);  // this line makes it work under https

    $try=0;
    $result="";
    while( ($try<=$this->retry_attempts) && (empty($result)) )  // force a retry upto 5 times
    {
        $try++;
        $result = curl_exec($this->ch);
        $this->response=curl_getinfo($this->ch);
        // $response['http_code'] 4xx is an error
    }
    // set refering URL to current url for next page.
    if($this->referer_to_last) $this->set_referer($url);

    return $result; 
}
函数get_url($url) { curl_setopt($this->ch,CURLOPT_URL,$URL); curl\u setopt($this->ch,CURLOPT\u USERAGENT,$this->user\u agent); curl\u setopt($this->ch,CURLOPT\u COOKIEFILE,$this->cookie\u name); curl\u setopt($this->ch,CURLOPT\u COOKIEJAR,$this->cookie\u name); 如果(!为空($this->referer)) { curl\u setopt($this->ch,CURLOPT\u REFERER,$this->REFERER); } curl_setopt($this->ch,CURLOPT_SSL_VERIFYHOST,2); curl_setopt($this->ch,CURLOPT_头,0); 如果($this->follow) { curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,1); } 其他的 { curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,0); } curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($this->ch,CURLOPT_HTTPHEADER,array(“Accept:text/html,text/vnd.wap.wml,**)); curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,FALSE);//这一行使它在https下工作 $try=0; $result=“”; while($tryretry_尝试次数)&&&(empty($result))//强制重试最多5次 { $try++; $result=curl\u exec($this->ch); $this->response=curl\u getinfo($this->ch); //$response['http_code']4xx是一个错误 } //将引用URL设置为下一页的当前URL。 如果($this->referer\u to\u last)$this->set\u referer($url); 返回$result; }
您对所谈论的块类型不是很具体。有问题的网站
http://www.habbo.com/home/Intricat
首先检查浏览器是否启用了javascript:

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
    <meta http-equiv="Content-Script-Type" content="text/javascript">
    <script type="text/javascript">function setCookie(c_name, value, expiredays) {
        var exdate = new Date();
        exdate.setDate(exdate.getDate() + expiredays);
        document.cookie = c_name + "=" + escape(value) + ((expiredays == null) ? "" : ";expires=" + exdate.toGMTString()) + ";path=/";
    }
    function getHostUri() {
        var loc = document.location;
        return loc.toString();
    }
    setCookie('YPF8827340282Jdskjhfiw_928937459182JAX666', '179.222.19.192', 10);
    setCookie('DOAReferrer', document.referrer, 10);
    location.href = getHostUri();</script>
</head>
<body>
<noscript>This site requires JavaScript and Cookies to be enabled. Please change your browser settings or upgrade your
    browser.
</noscript>
</body>
</html>

函数setCookie(c_名称、值、过期日期){
var exdate=新日期();
exdate.setDate(exdate.getDate()+expiredays);
document.cookie=c_name+“=”+escape(value)+((expiredays==null)?:“expires=“+exdate.togmString())+“path=/”;
}
函数getHostUri(){
var loc=文件位置;
返回loc.toString();
}
setCookie('YPF8827340282Jdskjhfiw_928937459182JAX666','179.222.19.192',10);
setCookie('DoaReferer',document.Referer,10);
location.href=getHostUri();
此站点需要启用JavaScript和Cookie。请更改浏览器设置或升级浏览器
浏览器

由于curl不支持javascript,您需要使用一个HTTP客户机,该客户机具有-或者-您需要模仿该脚本并创建cookie和您自己的新请求URI。

您对所谈论的块类型不是很具体。有问题的网站
http://www.habbo.com/home/Intricat
首先检查浏览器是否启用了javascript:

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
    <meta http-equiv="Content-Script-Type" content="text/javascript">
    <script type="text/javascript">function setCookie(c_name, value, expiredays) {
        var exdate = new Date();
        exdate.setDate(exdate.getDate() + expiredays);
        document.cookie = c_name + "=" + escape(value) + ((expiredays == null) ? "" : ";expires=" + exdate.toGMTString()) + ";path=/";
    }
    function getHostUri() {
        var loc = document.location;
        return loc.toString();
    }
    setCookie('YPF8827340282Jdskjhfiw_928937459182JAX666', '179.222.19.192', 10);
    setCookie('DOAReferrer', document.referrer, 10);
    location.href = getHostUri();</script>
</head>
<body>
<noscript>This site requires JavaScript and Cookies to be enabled. Please change your browser settings or upgrade your
    browser.
</noscript>
</body>
</html>

函数setCookie(c_名称、值、过期日期){
var exdate=新日期();
exdate.setDate(exdate.getDate()+expiredays);
document.cookie=c_name+“=”+escape(value)+((expiredays==null)?:“expires=“+exdate.togmString())+“path=/”;
}
函数getHostUri(){
var loc=文件位置;
返回loc.toString();
}
setCookie('YPF8827340282Jdskjhfiw_928937459182JAX666','179.222.19.192',10);
setCookie('DoaReferer',document.Referer,10);
location.href=getHostUri();
此站点需要启用JavaScript和Cookie。请更改浏览器设置或升级浏览器
浏览器
卷曲