最近写个程序需要抓取多个远程页面,如果按照正常的 php 程序需要一个页面抓完再抓下一个页面,但一个页面的抓取时间又比较长,如果能像 js 一样用 ajax 效果就好了。查了下,发现可以使用 curl 多多线程来模拟并发访问,可以加快访问的速度。
function rolling_curl($urls, $delay) { $queue = curl_multi_init(); $map = array(); foreach ($urls as $url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_TIMEOUT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_NOSIGNAL, true); curl_multi_add_handle($queue, $ch); $map[(string) $ch] = $url; } $responses = array(); do { while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ; if ($code != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($queue)) { // get the info and content returned on the request $info = curl_getinfo($done['handle']); $error = curl_error($done['handle']); $results = callback(curl_multi_getcontent($done['handle']), $delay); $responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results'); // remove the curl handle that just completed curl_multi_remove_handle($queue, $done['handle']); curl_close($done['handle']); } // Block for data in / output; error handling is done by curl_multi_exec if ($active > 0) { curl_multi_select($queue, 0.5); } } while ($active); curl_multi_close($queue); return $responses; }
这个函数可以直接使用。 $delay 建议设成 5 ,单位是毫秒。
$urls = array( "http://www.cnn.com/", "http://www.canada.com/", "http://www.yahoo.com/" ); rolling_curl($urls,5);
你判断是否有货的条件是怎么写的?
kimsufi是判断购买页面的源代码是否有提交按钮(没货的时候是没有提交按钮的)