PHP数据刮取和多线程进程数限制_Php_Parallel Processing_Curl Multi

PHP数据刮取和多线程进程数限制

php parallel-processing

PHP数据刮取和多线程进程数限制,php,parallel-processing,curl-multi,Php,Parallel Processing,Curl Multi,我正在编写一个脚本，它将使用cURL和DOMDocument从另一个站点抓取数据。我将发布3部分代码来更好地解释我的脚本（抱歉，如果它太长了），但我会检查构建脚本的逻辑是否正确（我从未使用过scrape和multi_cURL），而且我不是专业程序员作为第一步我已经让一个进程运行起来，并且成功了。代码是这样的： $urlCurl = '[url to scrap]'; $options = Array( CURLOPT_RETURNTRANSFER => TR

我正在编写一个脚本，它将使用cURL和DOMDocument从另一个站点抓取数据。我将发布3部分代码来更好地解释我的脚本（抱歉，如果它太长了），但我会检查构建脚本的逻辑是否正确（我从未使用过scrape和multi_cURL），而且我不是专业程序员

作为第一步我已经让一个进程运行起来，并且成功了。代码是这样的：

$urlCurl = '[url to scrap]';

$options = Array(
            CURLOPT_RETURNTRANSFER => TRUE,  // Setting cURL's option to return the webpage data
            CURLOPT_FOLLOWLOCATION => TRUE,  // Setting cURL to follow 'location' HTTP headers
            CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers
            CURLOPT_CONNECTTIMEOUT => 300,   // Setting the amount of time (in seconds) before the request times out
            CURLOPT_TIMEOUT => 300,  // Setting the maximum amount of time for cURL to execute queries
            CURLOPT_MAXREDIRS => 10, // Setting the maximum number of redirections to follow
            CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8",  // Setting the useragent
            CURLOPT_URL => $urlCurl, // Setting cURL's URL option with the $url variable passed into the function
        );

$chCurl = curl_init($urlCurl);
curl_setopt_array($chCurl, $options);   // Setting cURL's options using the previously assigned array data in $options
$resultChCurl = curl_exec($chCurl);
$html = $resultChCurl;

$dom = new DOMDocument();
$html = $dom->loadHTML($html);

$dom->preserveWhiteSpace = false;

//$result = array();

$tables = $dom->getElementsByTagName('table');
foreach($tables as $table) {
    if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        $marketLink = $urlCurl;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result[] = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }}

    print_r($result);

 function curl_multi_download(array $urls, callable $callback, array $custom_options = array())
 {
     $time_start_insert = microtime(true);

     // make sure the rolling window isn't greater than the # of urls
     $rolling_window = 50;
     $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

     $master = curl_multi_init();
     $curl_arr = array();
     $options = array(
         CURLOPT_RETURNTRANSFER => true,
         CURLOPT_FOLLOWLOCATION => true,
         CURLOPT_MAXREDIRS => 5,
     ) + $custom_options;

     // start the first batch of requests
     for ( $i = 0; $i < $rolling_window; $i++ )
     {
        $ch = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }
  do
    {
  while(($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
  if($execrun != CURLM_OK)
        break;
  // a request was just completed -- find out which one
  while( $done = curl_multi_info_read($master) )
       {
        $info = curl_getinfo($done['handle']);

        // request successful.  process output using the callback function.
        $output = curl_multi_getcontent($done['handle']);
        $extract = call_user_func_array($callback, array($info, $output));

        if (empty($fullArr)) {
            $fullArr = array($extract);
        } else {
            (array_push($fullArr,$extract));
        }

        echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
        echo 'done';
        echo "<br>";
        print_r($fullArr);
        echo "<br><br>*******************END FULL ARRAY*************************<br>";
        $time_end_insert = microtime(true);
        //dividing with 60 will give the execution time in minutes other wise seconds
        $execution_time_insert = ($time_end_insert - $time_start_insert)/60;
        //execution time of the script
        echo '<br><br><p class="finalTime">++++++++ total execution time: '.round($execution_time_insert,2,PHP_ROUND_HALF_DOWN).' minutes +++++++++++</p><br>';

        if ( isset($urls[$i+1]) )
        {
            // start a new request (it's important to do this before removing the old one)
            $ch = curl_init();
            $options[CURLOPT_URL] = $urls[$i++];  // increment i
            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);
        }

        // remove the curl handle that just completed
        curl_multi_remove_handle($master, $done['handle']);
      }
   } while ($running);

   curl_multi_close($master);
   return true;
}

function curl_multi_getcontent_utf8( $ch )
{
   $data = curl_multi_getcontent( $ch );
   if ( !is_string($data) )
      return $data;

   unset($charset);
   $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);

   /* 1: HTTP Content-Type: header */
   preg_match( '@([\w/+]+)(;\s*charset=(\S+))?@i', $content_type, $matches );
   if ( isset( $matches[3] ) )
       $charset = $matches[3];

   /* 2: <meta> element in the page */
    if ( !isset($charset) )
    {
        preg_match( '@<meta\s+http-equiv="Content-Type"\s+content="([\w/]+)(;\s*charset=([^\s"]+))?@i', $data, $matches );
        if ( isset( $matches[3] ) )
           $charset = $matches[3];
    }

    /* 3: <xml> element in the page */
    if ( !isset($charset) )
    {
    preg_match( '@<\?xml.+encoding="([^\s"]+)@si', $data, $matches );
    if ( isset( $matches[1] ) )
        $charset = $matches[1];
    }

    /* 4: PHP's heuristic detection */
   if ( !isset($charset) )
   {
      $encoding = mb_detect_encoding($data);
      if ($encoding)
        $charset = $encoding;
   }

   /* 5: Default for HTML */
   if ( !isset($charset) )
   {
       if (strstr($content_type, "text/html") === 0)
          $charset = "ISO 8859-1";
   }

   /* Convert it if it is anything but UTF-8 */
   /* You can change "UTF-8"  to "UTF-8//IGNORE" to
   ignore conversion errors and still output something reasonable */
   if ( isset($charset) && strtoupper($charset) != "UTF-8" )
      $data = iconv($charset, 'UTF-8', $data);

   return $data;
}

curl_multi_download(array('[LINKS TO SCRAPE]'), 'process_response');

function process_response( $info, $response ) 
{

if ( $info['http_code'] != 200 )
{
    echo "Error retrieving URL " . $info['url'] . "<br/>";
    return;
};
//var_dump($info);
//var_dump($response);
    $fullArr = array();
    $infos = scrape($response,$dayTime,$date,$hour);
    return $infos;
}

function curl_download( $url )
{
  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
  $data = self::curl_exec($ch);
  curl_close($ch);

  return $data;
}

function scrape($data,$dayTime,$date,$hour) {

  $html = $data;
  $dom = new DOMDocument();
  $html = $dom->loadHTML($html);

  $dom->preserveWhiteSpace = false;

  $result = array();

  $tables = $dom->getElementsByTagName('table');
  foreach($tables as $table) {
     if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        //$marketLink = $nodes;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        //'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }       
    return $result;
 }

为了节省时间，我尝试实现multi_cURL，我从这个简单的脚本开始

function urlBuilder() {

$countryList = array();
for ($i = 1; $i <= 50; $i++) {
    echo "<br>".$i."<br>";
    $origin = "[VARIABLE URL TO SCRAP]";
    if (empty($countryList)) {
        $countryList = array($origin);
    } else {
        (array_push($countryList,$origin));
    }
}
return $countryList;
}

function start() {

   $fullArr = array();

   $nodes = urlBuilder();
   $node_count = count($nodes);

   $curl_arr = array();
   $master = curl_multi_init();

   for($i = 0; $i < $node_count; $i++)
      {
        $url = $nodes[$i];
        $curl_arr[$i] = curl_init($url);
        curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
        curl_multi_add_handle($master, $curl_arr[$i]);
      } 
   do {
        curl_multi_exec($master,$running);
      } while($running > 0);
   echo "results: ";
   for($i = 0; $i < $node_count; $i++)
     {
        $results = curl_multi_getcontent  ( $curl_arr[$i]  );
        $infos = scrape($results,$nodes[$i],$dayTime,$date,$hour);

        if (empty($fullArr)) {
           $fullArr = array($infos);
        } else {
        (array_push($fullArr,$infos));
        }
   }

   echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
   echo 'done';
   echo "<br>";
   print_r($fullArr);
   echo "<br><br>*******************END FULL ARRAY*************************<br>";

函数urlBuilder（）{
$countryList=array（）；
（i=1；i=0）；
回声“结果：”；
对于（$i=0；$i<$node_count；$i++）
{
$results=curl\u multi\u getcontent（$curl\u arr[$i]）；
$infos=scrap（$results，$nodes[$i]，$days，$date，$hour）；
如果（空（$fullArr））{
$fullArr=阵列（$infos）；
}否则{
（阵列推送（$fullArr，$infos））；
}
}
echo“

*******************现在应该有完整的数组******************************************
”；
回音“完成”；
回声“
”；
印刷费（$fullArr）；
echo“

*********************************************************************************************
”；

但我遇到的第一个问题是，对于155个并行连接，最后的30/40发送回“NULL”数据（在90%的测试中并非每次都是如此）。 所以第一个问题是：脚本是否因为连接太多而返回空数据？

我想，这就是为什么我试图在代码中实现本教程（），以获得一个好的多连接处理程序，最后的代码是：

$urlCurl = '[url to scrap]';

$options = Array(
            CURLOPT_RETURNTRANSFER => TRUE,  // Setting cURL's option to return the webpage data
            CURLOPT_FOLLOWLOCATION => TRUE,  // Setting cURL to follow 'location' HTTP headers
            CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers
            CURLOPT_CONNECTTIMEOUT => 300,   // Setting the amount of time (in seconds) before the request times out
            CURLOPT_TIMEOUT => 300,  // Setting the maximum amount of time for cURL to execute queries
            CURLOPT_MAXREDIRS => 10, // Setting the maximum number of redirections to follow
            CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8",  // Setting the useragent
            CURLOPT_URL => $urlCurl, // Setting cURL's URL option with the $url variable passed into the function
        );

$chCurl = curl_init($urlCurl);
curl_setopt_array($chCurl, $options);   // Setting cURL's options using the previously assigned array data in $options
$resultChCurl = curl_exec($chCurl);
$html = $resultChCurl;

$dom = new DOMDocument();
$html = $dom->loadHTML($html);

$dom->preserveWhiteSpace = false;

//$result = array();

$tables = $dom->getElementsByTagName('table');
foreach($tables as $table) {
    if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        $marketLink = $urlCurl;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result[] = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }}

    print_r($result);

 function curl_multi_download(array $urls, callable $callback, array $custom_options = array())
 {
     $time_start_insert = microtime(true);

     // make sure the rolling window isn't greater than the # of urls
     $rolling_window = 50;
     $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

     $master = curl_multi_init();
     $curl_arr = array();
     $options = array(
         CURLOPT_RETURNTRANSFER => true,
         CURLOPT_FOLLOWLOCATION => true,
         CURLOPT_MAXREDIRS => 5,
     ) + $custom_options;

     // start the first batch of requests
     for ( $i = 0; $i < $rolling_window; $i++ )
     {
        $ch = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }
  do
    {
  while(($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
  if($execrun != CURLM_OK)
        break;
  // a request was just completed -- find out which one
  while( $done = curl_multi_info_read($master) )
       {
        $info = curl_getinfo($done['handle']);

        // request successful.  process output using the callback function.
        $output = curl_multi_getcontent($done['handle']);
        $extract = call_user_func_array($callback, array($info, $output));

        if (empty($fullArr)) {
            $fullArr = array($extract);
        } else {
            (array_push($fullArr,$extract));
        }

        echo "<br><br>*******************NOW THERE SHOULD BE FULL ARRAY*************************<br>";
        echo 'done';
        echo "<br>";
        print_r($fullArr);
        echo "<br><br>*******************END FULL ARRAY*************************<br>";
        $time_end_insert = microtime(true);
        //dividing with 60 will give the execution time in minutes other wise seconds
        $execution_time_insert = ($time_end_insert - $time_start_insert)/60;
        //execution time of the script
        echo '<br><br><p class="finalTime">++++++++ total execution time: '.round($execution_time_insert,2,PHP_ROUND_HALF_DOWN).' minutes +++++++++++</p><br>';

        if ( isset($urls[$i+1]) )
        {
            // start a new request (it's important to do this before removing the old one)
            $ch = curl_init();
            $options[CURLOPT_URL] = $urls[$i++];  // increment i
            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);
        }

        // remove the curl handle that just completed
        curl_multi_remove_handle($master, $done['handle']);
      }
   } while ($running);

   curl_multi_close($master);
   return true;
}

function curl_multi_getcontent_utf8( $ch )
{
   $data = curl_multi_getcontent( $ch );
   if ( !is_string($data) )
      return $data;

   unset($charset);
   $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);

   /* 1: HTTP Content-Type: header */
   preg_match( '@([\w/+]+)(;\s*charset=(\S+))?@i', $content_type, $matches );
   if ( isset( $matches[3] ) )
       $charset = $matches[3];

   /* 2: <meta> element in the page */
    if ( !isset($charset) )
    {
        preg_match( '@<meta\s+http-equiv="Content-Type"\s+content="([\w/]+)(;\s*charset=([^\s"]+))?@i', $data, $matches );
        if ( isset( $matches[3] ) )
           $charset = $matches[3];
    }

    /* 3: <xml> element in the page */
    if ( !isset($charset) )
    {
    preg_match( '@<\?xml.+encoding="([^\s"]+)@si', $data, $matches );
    if ( isset( $matches[1] ) )
        $charset = $matches[1];
    }

    /* 4: PHP's heuristic detection */
   if ( !isset($charset) )
   {
      $encoding = mb_detect_encoding($data);
      if ($encoding)
        $charset = $encoding;
   }

   /* 5: Default for HTML */
   if ( !isset($charset) )
   {
       if (strstr($content_type, "text/html") === 0)
          $charset = "ISO 8859-1";
   }

   /* Convert it if it is anything but UTF-8 */
   /* You can change "UTF-8"  to "UTF-8//IGNORE" to
   ignore conversion errors and still output something reasonable */
   if ( isset($charset) && strtoupper($charset) != "UTF-8" )
      $data = iconv($charset, 'UTF-8', $data);

   return $data;
}

curl_multi_download(array('[LINKS TO SCRAPE]'), 'process_response');

function process_response( $info, $response ) 
{

if ( $info['http_code'] != 200 )
{
    echo "Error retrieving URL " . $info['url'] . "<br/>";
    return;
};
//var_dump($info);
//var_dump($response);
    $fullArr = array();
    $infos = scrape($response,$dayTime,$date,$hour);
    return $infos;
}

function curl_download( $url )
{
  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
  $data = self::curl_exec($ch);
  curl_close($ch);

  return $data;
}

function scrape($data,$dayTime,$date,$hour) {

  $html = $data;
  $dom = new DOMDocument();
  $html = $dom->loadHTML($html);

  $dom->preserveWhiteSpace = false;

  $result = array();

  $tables = $dom->getElementsByTagName('table');
  foreach($tables as $table) {
     if ($table->hasAttribute('class')) {
        $rows = $dom->getElementsByTagName("tr");
            foreach($rows as $row) {

                $cols = $row->getElementsByTagName('td'); 
                $aref = $row->getElementsByTagName('a');

                foreach($aref as $profile) {
                    if (($cols->item(0)->nodeValue != "Seller") and ($cols->item(1)->nodeValue != "Ratio") and ($cols->item(2)->nodeValue != "Amount")) {
                        $seller =  $cols->item(0)->nodeValue; 
                        $sellerID = intval(preg_replace('/[^0-9]+/', '', ($profile->getAttribute( 'href' ))), 10);
                        $sellerType = sellerTypeExtract($profile->getAttribute( 'href' ));
                        $currencysell = preg_replace('/[.0-9]/','',($cols->item(1)->nodeValue)); 
                        $amount =  floatval($cols->item(1)->nodeValue);
                        $rate = rateExtractor($cols->item(2)->nodeValue);
                        $currencybought = preg_replace('/[.0-9'.$currencysell.'=]/','',($cols->item(2)->nodeValue));
                        //$marketLink = $nodes;
                        $server = "servername";
                        $tenGoldref = tenGoldref($rate);
                    }

                    $result = array (
                        'seller' => $seller,
                        'sellerprofile'=> $sellerID,
                        'sellertype' => $sellerType,
                        'currencysold' => $currencysell,
                        'amount'=> $amount,
                        'rate'=> $rate,
                        'currencybought'=> $currencybought,
                        //'marketLink' => $marketLink,
                        'server' => $server,
                        'tengold' => $tenGoldref,
                        'timeMoment' => $dayTime,
                        'date' => $date,
                        'hour' => $hour
                    );
                }
            } 
        }
    }       
    return $result;
 }

函数curl\u multi\u下载（数组$URL，可调用$callback，数组$custom\u options=array（））
{
$time\u start\u insert=微时间（真）；
//确保滚动窗口不大于URL的#
$rolling_window=50；
$rolling\u window=（sizeof（$url）<$rolling\u window）？sizeof（$url）：$rolling\u window；
$master=curl_multi_init（）；
$curl_arr=array（）；
$options=array(
CURLOPT_RETURNTRANSFER=>true，
CURLOPT_FOLLOWLOCATION=>true，
CURLOPT_MAXREDIRS=>5，
)+$custom_选项；
//启动第一批请求
对于（$i=0；$i<$rolling_window；$i++）
{
$ch=curl_init（）；
$options[CURLOPT_URL]=$URL[$i]；
curl_setopt_数组（$ch$options）；
卷曲多加手柄（$master，$ch）；
}
做
{
而（$execrun=curl\u multi\u exec（$master，$running））==CURLM\u CALL\u multi\u PERFORM）；
如果（$execrun！=CURLM_OK）
打破
//一个请求刚刚完成--找出哪一个
而（$done=curl\u multi\u info\u read（$master））
{
$info=curl_getinfo（$done['handle']）；
//请求成功。使用回调函数处理输出。
$output=curl\u multi\u getcontent（$done['handle']）；
$extract=call_user_func_数组（$callback，array（$info，$output））；
如果（空（$fullArr））{
$fullArr=数组（$extract）；
}否则{
（数组推送（$fullArr，$extract））；
}
echo“

*******************现在应该有完整的数组******************************************
”；
回音“完成”；
回声“
”；
印刷费（$fullArr）；
echo“

*********************************************************************************************
”；
$time\u end\u insert=微时间（真）；
//除以60将给出以分钟或秒为单位的执行时间
$execution\u time\u insert=（$time\u end\u insert-$time\u start\u insert）/60；
//脚本的执行时间
echo'

+total execution time:'.round（$execution\u time\u insert，2，PHP\u round\u HALF\u DOWN）。'minutes+；
如果（isset（$URL[$i+1]））
{
//启动新请求（在删除旧请求之前执行此操作很重要）
$ch=curl_init（）；
$options[CURLOPT_URL]=$URL[$i++]；//增量i
curl_setopt_数组（$ch$options）；
卷曲多加手柄（$master，$ch）；
}
//移除刚刚完成的卷曲手柄
卷曲多重移除手柄（$master，$done['handle'）；
}
}同时（$运行）；
卷曲多合（$master）；
返回true；
}
函数curl\u multi\u getcontent\u utf8（$ch）
{
$data=curl\u multi\u getcontent（$ch）；
如果（！是字符串（$data））
返回$data；
未设置（$字符集）；
$content\u type=curl\u getinfo（$ch，CURLINFO\u content\u type）；
/*1:HTTP内容类型：标头*/
preg_match（'@（[\w/+]+）（；\s*字符集=（\s+）））@i'，$content_type，$matches）；
如果（isset（$matches[3]））
$charset=$matches[3]；
/*2：页面中的元素*/
如果（！isset（$charset））
{
preg_match（'@loadHTML（$html）；
$dom->preserveWhiteSpace=false；
$result=array（）；
$tables=$dom->getElementsByTagName（'table'）；
foreach（$tables作为$table）{
如果（$table->hasAttribute（'class'））{
$rows=$dom->getElementsByTagName（“tr”）；
foreach（$行作为$行）{
$cols=$row->getElementsByTagName（'td'）；
$aref=$row->getElementsByTagName（'a'）；
foreach（$aref作为$profile）{
如果（$cols->item（0）->nodeValue！=“卖方”）和（$cols->item（1）->nodeValue！=“比率”）和（$cols->item（2）->nodeValue！=“金额”））{
$seller=$cols->item（0）->nodeValue；
$sellerID=intval（preg_replace（'/[^0-9]+/'，''，（$profile->getAttribute（'href'）），10）；
$sellerType=sellerTypeExtract（$profile->getAttribute（'href'）；
$currencysell=preg_replace（'/[0-9]/'，''，（$cols->item（1）->nodeValue））；
$amount=floatval（$cols->item（1）->nodeValue）；
$rate=rateExtractor（$cols->item（2）->nodeValue）；
$currencybo