Php 使用hhb_curl刮取POST数据

Php 使用hhb_curl刮取POST数据,php,curl,Php,Curl,我要感谢hanshenrik在这方面的出色工作 我试过同样的代码,不幸的是不起作用。我想最近有些东西改变了,它不起作用???任何获取结果的帮助都会很有帮助 $registration_number='R23CCP'; $vehicle_maker='CITROEN'; $ch=hhb_curl_init(); $debugHeaders=array(); $debugCookies=array(); $debugRequest=''; $html=hhb_curl_exec2($ch,

我要感谢hanshenrik在这方面的出色工作

我试过同样的代码,不幸的是不起作用。我想最近有些东西改变了,它不起作用???任何获取结果的帮助都会很有帮助

$registration_number='R23CCP';
$vehicle_maker='CITROEN';


$ch=hhb_curl_init();

$debugHeaders=array();
$debugCookies=array();
$debugRequest='';

$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest);
//first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff...
$domd=@DOMDocument::loadHTML($html);
assert(is_object($domd));
$__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value');
$__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value');
$__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value');

var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd);

//now to get the POST stuff
curl_setopt_array($ch,array(
CURLOPT_POST=>true,
CURLOPT_POSTFIELDS=>http_build_query(array(
'__LASTFOCUS'=>'',
'__EVENTTARGET'=>'',
'__VIEWSTATE'=>$__VIEWSTATE,
'__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR,
'__EVENTVALIDATION'=>$__EVENTVALIDATION,
'ctl00$MainContent$txtSearchVrm'=>$registration_number,
'ctl00$MainContent$MakeTextBox'=>$vehicle_maker,
'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'',
'ctl00$MainContent$butSearch'=>'Search',
))
));
$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest);
var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest);


function hhb_curl_init($custom_options_array = array())
{
if (empty($custom_options_array)) {
$custom_options_array = array();
//i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL
//at least i want to allow NULL aswell :/
}
if (!is_array($custom_options_array)) {
throw new InvalidArgumentException('$custom_options_array must be an array!');
}
;
$options_array = array(
CURLOPT_AUTOREFERER => true,
CURLOPT_BINARYTRANSFER => true,
CURLOPT_COOKIESESSION => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_FORBID_REUSE => false,
CURLOPT_HTTPGET => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => 11,
CURLOPT_ENCODING => ""
//CURLOPT_REFERER=>'example.org',
//CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0'
);
if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) {
//do this only conditionally because tmpfile() call..
static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014
$curl_cookiefiles_arr[]            = $options_array[CURLOPT_COOKIEFILE] = tmpfile();
$options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]);
$options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri'];

}
//we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption
foreach ($custom_options_array as $key => $val) {
$options_array[$key] = $val;
}
unset($key, $val, $custom_options_array);
$curl = curl_init();
curl_setopt_array($curl, $options_array);
return $curl;
}
function hhb_curl_exec($ch, $url)
{
static $hhb_curl_domainCache = "";
//$hhb_curl_domainCache=&$this->hhb_curl_domainCache;
//$ch=&$this->curlh;
if (!is_resource($ch) || get_resource_type($ch) !== 'curl') {
throw new InvalidArgumentException('$ch must be a curl handle!');
}
if (!is_string($url)) {
throw new InvalidArgumentException('$url must be a string!');
}

$tmpvar = "";
if (parse_url($url, PHP_URL_HOST) === null) {
if (substr($url, 0, 1) !== '/') {
$url = $hhb_curl_domainCache . '/' . $url;
} else {
$url = $hhb_curl_domainCache . $url;
}
}
;

curl_setopt($ch, CURLOPT_URL, $url);
$html = curl_exec($ch);
if (curl_errno($ch)) {
throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch));
// echo 'Curl error: ' . curl_error($ch);
}
if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/ ) {
throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true));
}
;
//remember that curl (usually) auto-follows the "Location: " http redirects..
$hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST);
return $html;
}
function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "")
{
$returnHeaders    = array();
$returnCookies    = array();
$verboseDebugInfo = "";
if (!is_resource($ch) || get_resource_type($ch) !== 'curl') {
throw new InvalidArgumentException('$ch must be a curl handle!');
}
if (!is_string($url)) {
throw new InvalidArgumentException('$url must be a string!');
}
$verbosefileh = tmpfile();
$verbosefile  = stream_get_meta_data($verbosefileh);
$verbosefile  = $verbosefile['uri'];
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_STDERR, $verbosefileh);
curl_setopt($ch, CURLOPT_HEADER, 1);
$html             = hhb_curl_exec($ch, $url);
$verboseDebugInfo = file_get_contents($verbosefile);
curl_setopt($ch, CURLOPT_STDERR, NULL);
fclose($verbosefileh);
unset($verbosefile, $verbosefileh);
$headers       = array();
$crlf          = "\x0d\x0a";
$thepos        = strpos($html, $crlf . $crlf, 0);
$headersString = substr($html, 0, $thepos);
$headerArr     = explode($crlf, $headersString);
$returnHeaders = $headerArr;
unset($headersString, $headerArr);
$htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure..
unset($html);
//I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly..
//at least it's tested and seems to work perfectly...
$grabCookieName = function($str,&$len)
{
$len=0;
$ret = "";
$i   = 0;
for ($i = 0; $i < strlen($str); ++$i) {
++$len;
if ($str[$i] === ' ') {
continue;
}
if ($str[$i] === '=') {
--$len;
break;
}
$ret .= $str[$i];
}
return urldecode($ret);
};
foreach ($returnHeaders as $header) {
//Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else
/*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/
Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT;
//Cookie names cannot contain any of the following '=,; \t\r\n\013\014'
//
*/
if (stripos($header, "Set-Cookie:") !== 0) {
continue;
/**/
}
$header = trim(substr($header, strlen("Set-Cookie:")));
$len=0;
while (strlen($header) > 0) {
$cookiename                 = $grabCookieName($header,$len);
$returnCookies[$cookiename] = '';
$header                     = substr($header, $len + 1); //also remove the = 
if (strlen($header) < 1) {
break;
}
;
$thepos = strpos($header, ';');
if ($thepos === false) { //last cookie in this Set-Cookie.
$returnCookies[$cookiename] = urldecode($header);
break;
}
$returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos));
$header                     = trim(substr($header, $thepos + 1)); //also remove the ;
}
}
unset($header, $cookiename, $thepos);
return $htmlBody;
}
?>
$registration\u number='R23CCP';
$vehicle_maker='CITROEN';
$ch=hhb_curl_init();
$debugHeaders=array();
$debugCookies=array();
$debugRequest='';
$html=hhb\u curl\u exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx“,$debugHeaders,$debugCookies,$debugRequest);
//首先做一个空请求,获取会话id、cookies和奇怪的VIEWSTATE内容。。。
$domd=@DOMDocument::loadHTML($html);
断言(is_object($domd));
$\uu VIEWSTATE=$domd->getElementById(“\uu VIEWSTATE”)->getAttribute('value');
$\uu VIEWSTATEGENERATOR=$domd->getElementById(“\uu VIEWSTATEGENERATOR”)->getAttribute('value');
$\uuuEventValidation=$domd->getElementById(“\uuuEventValidation”)->getAttribute('value');
变量转储(“视图状态:”、$视图状态、$视图状态生成器:”、$视图状态生成器、$事件验证:”、$事件验证、'headers:'、$debugHeaders、'cookies:'、$debugCookies、'html:'request:'、$debugRequest、'domd:'、$domd);
//现在去拿邮件
curl_setopt_数组($ch,数组(
CURLOPT_POST=>true,
CURLOPT_POSTFIELDS=>http_build_查询(数组(
“\uu LASTFOCUS”=>”,
“\u EVENTTARGET”=>”,
“\uuu VIEWSTATE”=>$\uu VIEWSTATE,
“\uu VIEWSTATEGENERATOR”=>$\uu VIEWSTATEGENERATOR,
“\uuu EVENTVALIDATION”=>$\uu EVENTVALIDATION,
'ctl00$MainContent$txtSearchVrm'=>$registration\u number,
“ctl00$MainContent$MakeTextBox”=>$vehicle\u maker,
“ctl00$MainContent$txtV5CDocumentReferenceNumber'=>”,
'ctl00$MainContent$butSearch'=>'Search',
))
));
$html=hhb\u curl\u exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx“,$debugHeaders,$debugCookies,$debugRequest);
var_dump('headers:'、$debugHeaders、'cookies:'、$debugCookies、'html:'、$html、'request:'、$debugRequest);
函数hhb\u curl\u init($custom\u options\u array=array())
{
if(空($custom_options_array)){
$custom_options_array=array();
//对此我感觉有点糟糕。curl\u init的argv[1]需要一个字符串(url),或NULL
//至少我也希望允许NULL:/
}
如果(!is_数组($custom_options_数组)){
抛出新的InvalidArgumentException(“$custom\u options\u数组必须是数组!”);
}
;
$options\u array=array(
CURLOPT_AUTOREFERER=>true,
CURLOPT_BINARYTRANSFER=>true,
CURLOPT_COOKIESESSION=>正确,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT\u禁止重用=>false,
CURLOPT_HTTPGET=>true,
CURLOPT_RETURNTRANSFER=>true,
CURLOPT_SSL_VERIFYPEER=>false,
CURLOPT_CONNECTTIMEOUT=>10,
CULLOPT_超时=>11,
CURLOPT_编码=>“”
//CURLOPT_REFERER=>example.org,
//CURLOPT_USERAGENT=>'Mozilla/5.0(Windows NT 6.1;WOW64;rv:36.0)Gecko/20100101 Firefox/36.0'
);
如果(!array\u key\u存在(CURLOPT\u COOKIEFILE,$custom\u options\u array)){
//只能有条件地执行此操作,因为tmpfile()调用。。
静态$curl\u cookiefiles\u arr=array();//的解决方法https://bugs.php.net/bug.php?id=66014
$curl_cookiefiles_arr[]=$options_array[CURLOPT_COOKIEFILE]=tmpfile();
$options\u array[CURLOPT\u COOKIEFILE]=流式获取元数据($options\u array[CURLOPT\u COOKIEFILE]);
$options_数组[CURLOPT_COOKIEFILE]=$options_数组[CURLOPT_COOKIEFILE]['uri'];
}
//我们不能使用array_merge(),因为它处理整数键的方式,它会/可能导致损坏
foreach($key=>$val的自定义_选项_数组){
$options_数组[$key]=$val;
}
取消设置($key、$val、$custom_options_数组);
$curl=curl_init();
curl_setopt_数组($curl$options_数组);
返回$curl;
}
函数hhb_curl_exec($ch,$url)
{
静态$hhb_curl_domainCache=“”;
//$hhb_curl_domainCache=&$this->hhb_curl_domainCache;
//$ch=&$this->curlh;
如果(!is_resource($ch)| get_resource_type($ch)!='curl'){
抛出新的InvalidArgumentException(“$ch必须是卷曲句柄!”);
}
如果(!是字符串($url)){
抛出新的InvalidArgumentException(“$url必须是字符串!”);
}
$tmpvar=“”;
if(parse_url($url,PHP_url_HOST)==null){
if(substr($url,0,1)!='/')){
$url=$hhb\u curl\u domainCache.'/'.$url;
}否则{
$url=$hhb\u curl\u domainCache.$url;
}
}
;
curl_setopt($ch,CURLOPT_URL,$URL);
$html=curl\u exec($ch);
if(旋度误差($ch)){
在url.var_export($url,true)上引发新异常('Curl error(Curl_errno=')。Curl_errno($ch)。':'。Curl_error($ch));
//回显“旋度误差:”。旋度误差($ch);
}
如果($html==''&&203!=($tmpvar=curl\u getinfo($ch,CURLINFO\u HTTP\u CODE))/*203是“成功,但没有输出”。*/){
抛出新异常('Curl未为'.var_导出($url,true)返回任何内容)。'但HTTP_响应_代码为'.var_导出($tmpvar,true));
}
;
//记住,curl(通常)自动跟随“Location:”http重定向。。
$hhb\u curl\u domainCache=parse\u url(curl\u getinfo($ch,CURLINFO\u EFFECTIVE\u url),PHP\u url\u HOST);
返回$html;
}
函数hhb_curl_exec2($ch,$url,&$returnHeaders=array(),&$returnCookies=array(),&$verboseduginfo=“”)
{
$returnHeaders=array();
$returnCookies=array();
$verbosedbugginfo=“”;
如果(!is_resource($ch)| get_resource_type($ch)!='curl'){
抛出新的InvalidArgumentException(“$ch必须是卷曲句柄!”);
}
如果(!是字符串($url)){
抛出新的InvalidArgumentException(“$url必须是字符串!”);
}
$verbosefileh=tmpfile();
$verbosefile=stream\u get\u meta\u数据($verbosefileh);
$verbosefile=$verbosefile['uri'];
curl_setopt($ch,CURLOPT_VERBOSE,1);
curl_setopt($ch,CURLOPT_STDERR,$verbosefileh);
curl_setopt($ch,CURLOPT_头,1);
$html=hhb\u curl\u exec($ch,$url);
$verboseDebugInfo=文件\获取\内容($verbosefile);
curl_setopt($ch,CURLOPT_STDERR,NULL);
fclose($verbosefileh);
取消设置($verbosefile,$verbosefileh);
$headers=array();
$crlf=“\x0d\x0a”;
$thepos=strpos($html,$crlf.$crlf,0);
$headersString=substr($html,0,$thepos);
$headerArr=爆炸($crlf,$HEADERSTRING);
$returnHeaders=$headerArr;
未设置($headerString,$headerArr);
$htmlBody=substr($html,$thepos+4);//应该可以处理utf8/ascii头…utf32?不太确定。。
U