PHP数据刮取和多线程进程数限制

PHP数据刮取和多线程进程数限制,php,parallel-processing,curl-multi,Php,Parallel Processing,Curl Multi,我正在编写一个脚本,它将使用cURL和DOMDocument从另一个站点抓取数据。 我将发布3部分代码来更好地解释我的脚本(抱歉,如果它太长了),但我会检查构建脚本的逻辑是否正确(我从未使用过scrape和multi_cURL),而且我不是专业程序员 作为第一步我已经让一个进程运行起来,并且成功了。 代码是这样的: $urlCurl = '[url to scrap]'; $options = Array( CURLOPT_RETURNTRANSFER => TR


我正在编写一个脚本,它将使用cURL和DOMDocument从另一个站点抓取数据。 我将发布3部分代码来更好地解释我的脚本(抱歉,如果它太长了),但我会检查构建脚本的逻辑是否正确(我从未使用过scrape和multi_cURL),而且我不是专业程序员

作为第一步我已经让一个进程运行起来,并且成功了。 代码是这样的:

$urlCurl = '[url to scrap]';

$options = Array(
            CURLOPT_RETURNTRANSFER => TRUE,  // Setting cURL's option to return the webpage data
            CURLOPT_FOLLOWLOCATION => TRUE,  // Setting cURL to follow 'location' HTTP headers
            CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers
            CURLOPT_CONNECTTIMEOUT => 300,   // Setting the amount of time (in seconds) before the request times out
            CURLOPT_TIMEOUT => 300,  // Setting the maximum amount of time for cURL to execute queries
            CURLOPT_MAXREDIRS => 10, // Setting the maximum number of redirections to follow
            CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8",  // Setting the useragent
            CURLOPT_URL => $urlCurl, // Setting cURL's URL option with the $url variable passed into the function
        );

$chCurl = curl_init($urlCurl);
curl_setopt_array($chCurl, $options);   // Setting cURL's options using the previously assigned array data in $options
$resultChCurl = curl_exec($chCurl);
$html = $resultChCurl;

$dom = new DOMDocument();
$html = $dom->loadHTML($html);

$dom->preserveWhiteSpace = false;

//$result = array();

$tables = $dom->getElementsByTagName('table');
foreach($tables as $table) {
    if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        $marketLink = $urlCurl;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result[] = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }}

    print_r($result);
 function curl_multi_download(array $urls, callable $callback, array $custom_options = array())
 {
     $time_start_insert = microtime(true);

     // make sure the rolling window isn't greater than the # of urls
     $rolling_window = 50;
     $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

     $master = curl_multi_init();
     $curl_arr = array();
     $options = array(
         CURLOPT_RETURNTRANSFER => true,
         CURLOPT_FOLLOWLOCATION => true,
         CURLOPT_MAXREDIRS => 5,
     ) + $custom_options;

     // start the first batch of requests
     for ( $i = 0; $i < $rolling_window; $i++ )
     {
        $ch = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }
  do
    {
  while(($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
  if($execrun != CURLM_OK)
        break;
  // a request was just completed -- find out which one
  while( $done = curl_multi_info_read($master) )
       {
        $info = curl_getinfo($done['handle']);

        // request successful.  process output using the callback function.
        $output = curl_multi_getcontent($done['handle']);
        $extract = call_user_func_array($callback, array($info, $output));

        if (empty($fullArr)) {
            $fullArr = array($extract);
        } else {
            (array_push($fullArr,$extract));
        }

        echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
        echo 'done';
        echo "<br>";
        print_r($fullArr);
        echo "<br><br>*******************END FULL ARRAY*************************<br>";
        $time_end_insert = microtime(true);
        //dividing with 60 will give the execution time in minutes other wise seconds
        $execution_time_insert = ($time_end_insert - $time_start_insert)/60;
        //execution time of the script
        echo '<br><br><p class="finalTime">++++++++ total execution time: '.round($execution_time_insert,2,PHP_ROUND_HALF_DOWN).' minutes +++++++++++</p><br>';

        if ( isset($urls[$i+1]) )
        {
            // start a new request (it's important to do this before removing the old one)
            $ch = curl_init();
            $options[CURLOPT_URL] = $urls[$i++];  // increment i
            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);
        }

        // remove the curl handle that just completed
        curl_multi_remove_handle($master, $done['handle']);
      }
   } while ($running);

   curl_multi_close($master);
   return true;
}

function curl_multi_getcontent_utf8( $ch )
{
   $data = curl_multi_getcontent( $ch );
   if ( !is_string($data) )
      return $data;

   unset($charset);
   $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);

   /* 1: HTTP Content-Type: header */
   preg_match( '@([\w/+]+)(;\s*charset=(\S+))?@i', $content_type, $matches );
   if ( isset( $matches[3] ) )
       $charset = $matches[3];

   /* 2: <meta> element in the page */
    if ( !isset($charset) )
    {
        preg_match( '@<meta\s+http-equiv="Content-Type"\s+content="([\w/]+)(;\s*charset=([^\s"]+))?@i', $data, $matches );
        if ( isset( $matches[3] ) )
           $charset = $matches[3];
    }

    /* 3: <xml> element in the page */
    if ( !isset($charset) )
    {
    preg_match( '@<\?xml.+encoding="([^\s"]+)@si', $data, $matches );
    if ( isset( $matches[1] ) )
        $charset = $matches[1];
    }

    /* 4: PHP's heuristic detection */
   if ( !isset($charset) )
   {
      $encoding = mb_detect_encoding($data);
      if ($encoding)
        $charset = $encoding;
   }

   /* 5: Default for HTML */
   if ( !isset($charset) )
   {
       if (strstr($content_type, "text/html") === 0)
          $charset = "ISO 8859-1";
   }

   /* Convert it if it is anything but UTF-8 */
   /* You can change "UTF-8"  to "UTF-8//IGNORE" to
   ignore conversion errors and still output something reasonable */
   if ( isset($charset) && strtoupper($charset) != "UTF-8" )
      $data = iconv($charset, 'UTF-8', $data);

   return $data;
}

curl_multi_download(array('[LINKS TO SCRAPE]'), 'process_response');

function process_response( $info, $response ) 
{

if ( $info['http_code'] != 200 )
{
    echo "Error retrieving URL " . $info['url'] . "<br/>";
    return;
};
//var_dump($info);
//var_dump($response);
    $fullArr = array();
    $infos = scrape($response,$dayTime,$date,$hour);
    return $infos;
}

function curl_download( $url )
{
  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
  $data = self::curl_exec($ch);
  curl_close($ch);

  return $data;
}

function scrape($data,$dayTime,$date,$hour) {

  $html = $data;
  $dom = new DOMDocument();
  $html = $dom->loadHTML($html);

  $dom->preserveWhiteSpace = false;

  $result = array();

  $tables = $dom->getElementsByTagName('table');
  foreach($tables as $table) {
     if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        //$marketLink = $nodes;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        //'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }       
    return $result;
 }
为了节省时间,我尝试实现multi_cURL,我从这个简单的脚本开始

function urlBuilder() {

$countryList = array();
for ($i = 1; $i <= 50; $i++) {
    echo "<br>".$i."<br>";
    $origin = "[VARIABLE URL TO SCRAP]";
    if (empty($countryList)) {
        $countryList = array($origin);
    } else {
        (array_push($countryList,$origin));
    }
}
return $countryList;
}

function start() {

   $fullArr = array();

   $nodes = urlBuilder();
   $node_count = count($nodes);

   $curl_arr = array();
   $master = curl_multi_init();

   for($i = 0; $i < $node_count; $i++)
      {
        $url = $nodes[$i];
        $curl_arr[$i] = curl_init($url);
        curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
        curl_multi_add_handle($master, $curl_arr[$i]);
      } 
   do {
        curl_multi_exec($master,$running);
      } while($running > 0);
   echo "results: ";
   for($i = 0; $i < $node_count; $i++)
     {
        $results = curl_multi_getcontent  ( $curl_arr[$i]  );
        $infos = scrape($results,$nodes[$i],$dayTime,$date,$hour);

        if (empty($fullArr)) {
           $fullArr = array($infos);
        } else {
        (array_push($fullArr,$infos));
        }
   }

   echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
   echo 'done';
   echo "<br>";
   print_r($fullArr);
   echo "<br><br>*******************END FULL ARRAY*************************<br>";
函数urlBuilder(){
$countryList=array();
(i=1;i=0);
回声“结果:”;
对于($i=0;$i<$node_count;$i++)
{
$results=curl\u multi\u getcontent($curl\u arr[$i]);
$infos=scrap($results,$nodes[$i],$days,$date,$hour);
如果(空($fullArr)){
$fullArr=阵列($infos);
}否则{
(阵列推送($fullArr,$infos));
}
}
echo“

*******************现在应该有完整的数组******************************************
”; 回音“完成”; 回声“
”; 印刷费($fullArr); echo“

*********************************************************************************************
”;
但我遇到的第一个问题是,对于155个并行连接,最后的30/40发送回“NULL”数据(在90%的测试中并非每次都是如此)。 所以第一个问题是:脚本是否因为连接太多而返回空数据?

我想,这就是为什么我试图在代码中实现本教程(),以获得一个好的多连接处理程序,最后的代码是:

$urlCurl = '[url to scrap]';

$options = Array(
            CURLOPT_RETURNTRANSFER => TRUE,  // Setting cURL's option to return the webpage data
            CURLOPT_FOLLOWLOCATION => TRUE,  // Setting cURL to follow 'location' HTTP headers
            CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers
            CURLOPT_CONNECTTIMEOUT => 300,   // Setting the amount of time (in seconds) before the request times out
            CURLOPT_TIMEOUT => 300,  // Setting the maximum amount of time for cURL to execute queries
            CURLOPT_MAXREDIRS => 10, // Setting the maximum number of redirections to follow
            CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8",  // Setting the useragent
            CURLOPT_URL => $urlCurl, // Setting cURL's URL option with the $url variable passed into the function
        );

$chCurl = curl_init($urlCurl);
curl_setopt_array($chCurl, $options);   // Setting cURL's options using the previously assigned array data in $options
$resultChCurl = curl_exec($chCurl);
$html = $resultChCurl;

$dom = new DOMDocument();
$html = $dom->loadHTML($html);

$dom->preserveWhiteSpace = false;

//$result = array();

$tables = $dom->getElementsByTagName('table');
foreach($tables as $table) {
    if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        $marketLink = $urlCurl;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result[] = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }}

    print_r($result);
 function curl_multi_download(array $urls, callable $callback, array $custom_options = array())
 {
     $time_start_insert = microtime(true);

     // make sure the rolling window isn't greater than the # of urls
     $rolling_window = 50;
     $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

     $master = curl_multi_init();
     $curl_arr = array();
     $options = array(
         CURLOPT_RETURNTRANSFER => true,
         CURLOPT_FOLLOWLOCATION => true,
         CURLOPT_MAXREDIRS => 5,
     ) + $custom_options;

     // start the first batch of requests
     for ( $i = 0; $i < $rolling_window; $i++ )
     {
        $ch = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }
  do
    {
  while(($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
  if($execrun != CURLM_OK)
        break;
  // a request was just completed -- find out which one
  while( $done = curl_multi_info_read($master) )
       {
        $info = curl_getinfo($done['handle']);

        // request successful.  process output using the callback function.
        $output = curl_multi_getcontent($done['handle']);
        $extract = call_user_func_array($callback, array($info, $output));

        if (empty($fullArr)) {
            $fullArr = array($extract);
        } else {
            (array_push($fullArr,$extract));
        }

        echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
        echo 'done';
        echo "<br>";
        print_r($fullArr);
        echo "<br><br>*******************END FULL ARRAY*************************<br>";
        $time_end_insert = microtime(true);
        //dividing with 60 will give the execution time in minutes other wise seconds
        $execution_time_insert = ($time_end_insert - $time_start_insert)/60;
        //execution time of the script
        echo '<br><br><p class="finalTime">++++++++ total execution time: '.round($execution_time_insert,2,PHP_ROUND_HALF_DOWN).' minutes +++++++++++</p><br>';

        if ( isset($urls[$i+1]) )
        {
            // start a new request (it's important to do this before removing the old one)
            $ch = curl_init();
            $options[CURLOPT_URL] = $urls[$i++];  // increment i
            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);
        }

        // remove the curl handle that just completed
        curl_multi_remove_handle($master, $done['handle']);
      }
   } while ($running);

   curl_multi_close($master);
   return true;
}

function curl_multi_getcontent_utf8( $ch )
{
   $data = curl_multi_getcontent( $ch );
   if ( !is_string($data) )
      return $data;

   unset($charset);
   $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);

   /* 1: HTTP Content-Type: header */
   preg_match( '@([\w/+]+)(;\s*charset=(\S+))?@i', $content_type, $matches );
   if ( isset( $matches[3] ) )
       $charset = $matches[3];

   /* 2: <meta> element in the page */
    if ( !isset($charset) )
    {
        preg_match( '@<meta\s+http-equiv="Content-Type"\s+content="([\w/]+)(;\s*charset=([^\s"]+))?@i', $data, $matches );
        if ( isset( $matches[3] ) )
           $charset = $matches[3];
    }

    /* 3: <xml> element in the page */
    if ( !isset($charset) )
    {
    preg_match( '@<\?xml.+encoding="([^\s"]+)@si', $data, $matches );
    if ( isset( $matches[1] ) )
        $charset = $matches[1];
    }

    /* 4: PHP's heuristic detection */
   if ( !isset($charset) )
   {
      $encoding = mb_detect_encoding($data);
      if ($encoding)
        $charset = $encoding;
   }

   /* 5: Default for HTML */
   if ( !isset($charset) )
   {
       if (strstr($content_type, "text/html") === 0)
          $charset = "ISO 8859-1";
   }

   /* Convert it if it is anything but UTF-8 */
   /* You can change "UTF-8"  to "UTF-8//IGNORE" to
   ignore conversion errors and still output something reasonable */
   if ( isset($charset) && strtoupper($charset) != "UTF-8" )
      $data = iconv($charset, 'UTF-8', $data);

   return $data;
}

curl_multi_download(array('[LINKS TO SCRAPE]'), 'process_response');

function process_response( $info, $response ) 
{

if ( $info['http_code'] != 200 )
{
    echo "Error retrieving URL " . $info['url'] . "<br/>";
    return;
};
//var_dump($info);
//var_dump($response);
    $fullArr = array();
    $infos = scrape($response,$dayTime,$date,$hour);
    return $infos;
}

function curl_download( $url )
{
  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
  $data = self::curl_exec($ch);
  curl_close($ch);

  return $data;
}

function scrape($data,$dayTime,$date,$hour) {

  $html = $data;
  $dom = new DOMDocument();
  $html = $dom->loadHTML($html);

  $dom->preserveWhiteSpace = false;

  $result = array();

  $tables = $dom->getElementsByTagName('table');
  foreach($tables as $table) {
     if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        //$marketLink = $nodes;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        //'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }       
    return $result;
 }
函数curl\u multi\u下载(数组$URL,可调用$callback,数组$custom\u options=array())
{
$time\u start\u insert=微时间(真);
//确保滚动窗口不大于URL的#
$rolling_window=50;
$rolling\u window=(sizeof($url)<$rolling\u window)?sizeof($url):$rolling\u window;
$master=curl_multi_init();
$curl_arr=array();
$options=array(
CURLOPT_RETURNTRANSFER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_MAXREDIRS=>5,
)+$custom_选项;
//启动第一批请求
对于($i=0;$i<$rolling_window;$i++)
{
$ch=curl_init();
$options[CURLOPT_URL]=$URL[$i];
curl_setopt_数组($ch$options);
卷曲多加手柄($master,$ch);
}
做
{
而($execrun=curl\u multi\u exec($master,$running))==CURLM\u CALL\u multi\u PERFORM);
如果($execrun!=CURLM_OK)
打破
//一个请求刚刚完成--找出哪一个
而($done=curl\u multi\u info\u read($master))
{
$info=curl_getinfo($done['handle']);
//请求成功。使用回调函数处理输出。
$output=curl\u multi\u getcontent($done['handle']);
$extract=call_user_func_数组($callback,array($info,$output));
如果(空($fullArr)){
$fullArr=数组($extract);
}否则{
(数组推送($fullArr,$extract));
}
echo“

*******************现在应该有完整的数组******************************************
”; 回音“完成”; 回声“
”; 印刷费($fullArr); echo“

*********************************************************************************************
”; $time\u end\u insert=微时间(真); //除以60将给出以分钟或秒为单位的执行时间 $execution\u time\u insert=($time\u end\u insert-$time\u start\u insert)/60; //脚本的执行时间 echo'

+total execution time:'.round($execution\u time\u insert,2,PHP\u round\u HALF\u DOWN)。'minutes+; 如果(isset($URL[$i+1])) { //启动新请求(在删除旧请求之前执行此操作很重要) $ch=curl_init(); $options[CURLOPT_URL]=$URL[$i++];//增量i curl_setopt_数组($ch$options); 卷曲多加手柄($master,$ch); } //移除刚刚完成的卷曲手柄 卷曲多重移除手柄($master,$done['handle'); } }同时($运行); 卷曲多合($master); 返回true; } 函数curl\u multi\u getcontent\u utf8($ch) { $data=curl\u multi\u getcontent($ch); 如果(!是字符串($data)) 返回$data; 未设置($字符集); $content\u type=curl\u getinfo($ch,CURLINFO\u content\u type); /*1:HTTP内容类型:标头*/ preg_match('@([\w/+]+)(;\s*字符集=(\s+)))@i',$content_type,$matches); 如果(isset($matches[3])) $charset=$matches[3]; /*2:页面中的元素*/ 如果(!isset($charset)) { preg_match('@loadHTML($html); $dom->preserveWhiteSpace=false; $result=array(); $tables=$dom->getElementsByTagName('table'); foreach($tables作为$table){ 如果($table->hasAttribute('class')){ $rows=$dom->getElementsByTagName(“tr”); foreach($行作为$行){ $cols=$row->getElementsByTagName('td'); $aref=$row->getElementsByTagName('a'); foreach($aref作为$profile){ 如果($cols->item(0)->nodeValue!=“卖方”)和($cols->item(1)->nodeValue!=“比率”)和($cols->item(2)->nodeValue!=“金额”)){ $seller=$cols->item(0)->nodeValue; $sellerID=intval(preg_replace('/[^0-9]+/','',($profile->getAttribute('href')),10); $sellerType=sellerTypeExtract($profile->getAttribute('href'); $currencysell=preg_replace('/[0-9]/','',($cols->item(1)->nodeValue)); $amount=floatval($cols->item(1)->nodeValue); $rate=rateExtractor($cols->item(2)->nodeValue); $currencybo