Rolling cURL: PHP²¢·¢×î¼Ñʵ¼ù
ÔÎÄ£ºhttp://blogread.cn/it/article.php?id=5501
ÔÚʵ¼ÊÏîÄ¿»òÕß×Ô¼º±àдС¹¤¾ß(±ÈÈçÐÂΞۺÏ,ÉÌÆ·¼Û¸ñ¼à¿Ø,±È¼Û)µÄ¹ý³ÌÖÐ, ͨ³£ÐèÒª´ÓµÚ3·½ÍøÕ¾»òÕßAPI½Ó¿Ú»ñÈ¡Êý¾Ý, ÔÚÐèÒª´¦Àí1¸öURL¶ÓÁÐʱ, ΪÁËÌá¸ßÐÔÄÜ, ¿ÉÒÔ²ÉÓÃcURLÌṩµÄcurl_multi_*×庯ÊýʵÏÖ¼òµ¥µÄ²¢·¢.
±¾ÎĽ«Ì½ÌÖÁ½ÖÖ¾ßÌåµÄʵÏÖ·½·¨, ²¢¶Ô²»Í¬µÄ·½·¨×ö¼òµ¥µÄÐÔÄܶԱÈ.
1. ¾µäcURL²¢·¢»úÖƼ°Æä´æÔÚµÄÎÊÌâ
¾µäµÄcURLʵÏÖ»úÖÆÔÚÍøÉϺÜÈÝÒ×ÕÒµ½, ±ÈÈç²Î¿¼PHPÔÚÏßÊÖ²áµÄÈçÏÂʵÏÖ·½Ê½:
function classic_curl($urls, $delay) { $queue = curl_multi_init(); $map = array(); foreach ($urls as $url) { // create cURL resources $ch = curl_init(); // set URL and other appropriate options curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_TIMEOUT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_NOSIGNAL, true); // add handle curl_multi_add_handle($queue, $ch); $map[$url] = $ch; } $active = null; // execute the handles do { $mrc = curl_multi_exec($queue, $active); } while ($mrc == CURLM_CALL_MULTI_PERFORM); while ($active > 0 && $mrc == CURLM_OK) { if (curl_multi_select($queue, 0.5) != -1) { do { $mrc = curl_multi_exec($queue, $active); } while ($mrc == CURLM_CALL_MULTI_PERFORM); } } $responses = array(); foreach ($map as $url=>$ch) { $responses[$url] = callback(curl_multi_getcontent($ch), $delay); curl_multi_remove_handle($queue, $ch); curl_close($ch); } curl_multi_close($queue); return $responses; }
Ê×ÏȽ«ËùÓеÄURLѹÈë²¢·¢¶ÓÁÐ, È»ºóÖ´Ðв¢·¢¹ý³Ì, µÈ´ýËùÓÐÇëÇó½ÓÊÕÍêÖ®ºó½øÐÐÊý¾ÝµÄ½âÎöµÈºóÐø´¦Àí. ÔÚʵ¼ÊµÄ´¦Àí¹ý³ÌÖÐ, ÊÜÍøÂç´«ÊäµÄÓ°Ïì, ²¿·ÖURLµÄÄÚÈÝ»áÓÅÏÈÓÚÆäËûURL·µ»Ø, µ«ÊǾµäcURL²¢·¢±ØÐëµÈ´ý×îÂýµÄÄǸöURL·µ»ØÖ®ºó²Å¿ªÊ¼´¦Àí, µÈ´ýÒ²¾ÍÒâζ×ÅCPUµÄ¿ÕÏкÍÀË·Ñ. Èç¹ûURL¶ÓÁкܶÌ, ÕâÖÖ¿ÕÏкÍÀË·Ñ»¹´¦ÔڿɽÓÊܵķ¶Î§, µ«Èç¹û¶ÓÁкܳ¤, ÕâÖֵȴýºÍÀ˷ѽ«±äµÃ²»¿É½ÓÊÜ.
2. ¸Ä½øµÄRolling cURL²¢·¢·½Ê½
×Ðϸ·ÖÎö²»ÄÑ·¢ÏÖ¾µäcURL²¢·¢»¹´æÔÚÓÅ»¯µÄ¿Õ¼ä, ÓÅ»¯µÄ·½Ê½Ê±µ±Ä³¸öURLÇëÇóÍê±ÏÖ®ºó¾¡¿ÉÄÜ¿ìµÄÈ¥´¦ÀíËü, ±ß´¦Àí±ßµÈ´ýÆäËûµÄURL·µ»Ø, ¶ø²»ÊǵȴýÄǸö×îÂýµÄ½Ó¿Ú·µ»ØÖ®ºó²Å¿ªÊ¼´¦ÀíµÈ¹¤×÷, ´Ó¶ø±ÜÃâCPUµÄ¿ÕÏкÍÀË·Ñ. Ïл°²»¶à˵, ÏÂÃæÌùÉϾßÌåµÄʵÏÖ:
function rolling_curl($urls, $delay) { $queue = curl_multi_init(); $map = array(); foreach ($urls as $url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_TIMEOUT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_NOSIGNAL, true); curl_multi_add_handle($queue, $ch); $map[(string) $ch] = $url; } $responses = array(); do { while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ; if ($code != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($queue)) { // get the info and content returned on the request $info = curl_getinfo($done[\'handle\']); $error = curl_error($done[\'handle\']); $results = callback(curl_multi_getcontent($done[\'handle\']), $delay); $responses[$map[(string) $done[\'handle\']]] = compact(\'info\', \'error\', \'results\'); // remove the curl handle that just completed curl_multi_remove_handle($queue, $done[\'handle\']); curl_close($done[\'handle\']); } // Block for data in / output; error handling is done by curl_multi_exec if ($active > 0) { curl_multi_select($queue, 0.5); } } while ($active); curl_multi_close($queue); return $responses; }
3. Á½ÖÖ²¢·¢ÊµÏÖµÄÐÔÄܶԱÈ
¸Ä½øÇ°ºóµÄÐÔÄܶԱÈÊÔÑéÔÚLINUXÖ÷»úÉϽøÐÐ, ²âÊÔʱʹÓõIJ¢·¢¶ÓÁÐÈçÏÂ:
http://item.taobao.com/item.htm?id=14392877692
http://item.taobao.com/item.htm?id=16231676302
http://item.taobao.com/item.htm?id=17037160462
http://item.taobao.com/item.htm?id=5522416710
http://item.taobao.com/item.htm?id=16551116403
http://item.taobao.com/item.htm?id=14088310973
¼òҪ˵Ã÷ÏÂʵÑéÉè¼ÆµÄÔÔòºÍÐÔÄܲâÊÔ½á¹ûµÄ¸ñʽ: Ϊ±£Ö¤½á¹ûµÄ¿É¿¿, ÿ×éʵÑéÖظ´20´Î, ÔÚµ¥´ÎʵÑéÖÐ, ¸ø¶¨ÏàͬµÄ½Ó¿ÚURL¼¯ºÏ, ·Ö±ð²âÁ¿Classic(Ö¸¾µäµÄ²¢·¢»úÖÆ)ºÍRolling(Ö¸¸Ä½øºóµÄ²¢·¢»úÖÆ)Á½ÖÖ²¢·¢»úÖƵĺÄʱ(ÃëΪµ¥Î»), ºÄʱ¶ÌÕßʤ³ö(Winner), ²¢¼ÆËã½ÚÊ¡µÄʱ¼ä(Excellence, ÃëΪµ¥Î»)ÒÔ¼°ÐÔÄÜÌáÉý±ÈÀý(Excel. %). ΪÁ˾¡Á¿Ìù½üÕæʵµÄÇëÇó¶øÓÖ±£³ÖʵÑéµÄ¼òµ¥, ÔÚ¶Ô·µ»Ø½á¹ûµÄ´¦ÀíÉÏÖ»ÊÇ×öÁ˼òµ¥µÄÕýÔò±í´ïʽƥÅä, ¶øûÓнøÐÐÆäËû¸´ÔӵIJÙ×÷. ÁíÍâ, ΪÁËÈ·¶¨½á¹û´¦Àí»Øµ÷¶ÔÐÔÄܶԱȲâÊÔ½á¹ûµÄÓ°Ïì, ¿ÉÒÔʹÓÃusleepÄ£ÄâÏÖʵÖбȽϸºÔðµÄÊý¾Ý´¦ÀíÂß¼(ÈçÌáÈ¡, ·Ö´Ê, дÈëÎļþ»òÊý¾Ý¿âµÈ).
ÐÔÄܲâÊÔÖÐÓõ½µÄ»Øµ÷º¯ÊýΪ:
function callback($data, $delay) { preg_match_all(\'/(.+)<\\/h3>/iU\', $data, $matches); usleep($delay); return compact(\'data\', \'matches\'); }Êý¾Ý´¦Àí»Øµ÷ÎÞÑÓ³Ùʱ: Rolling CurlÂÔÓÅ, µ«ÐÔÄÜÌáÉýЧ¹û²»Ã÷ÏÔ.
Êý¾Ý´¦Àí»Øµ÷ÑÓ³Ù5ºÁÃë: Rolling CurlÍêʤ, ÐÔÄÜÌáÉý40%×óÓÒ.
ͨ¹ýÉÏÃæµÄÐÔÄܶԱÈ, ÔÚ´¦ÀíURL¶ÓÁв¢·¢µÄÓ¦Óó¡¾°ÖÐRolling cURLÓ¦¸ÃÊǸü¼ÓµÄÑ¡Ôñ, ²¢·¢Á¿·Ç³£´ó(1000+)ʱ, ¿ÉÒÔ¿ØÖƲ¢·¢¶ÓÁеÄ×î´ó³¤¶È, ±ÈÈç20, ÿµ±1¸öURL·µ»Ø²¢´¦ÀíÍê±ÏÖ®ºóÁ¢¼´¼ÓÈë1¸öÉÐδÇëÇóµÄURLµ½¶ÓÁÐÖÐ, ÕâÑùд³öÀ´µÄ´úÂë»á¸ü¼Ó½¡×³, ²»ÖÁÓÚ²¢·¢ÊýÌ«´ó¶ø¿¨ËÀ»ò±ÀÀ£.
ÏêϸµÄʵÏÖÇë²Î¿¼: http://code.google.com/p/rolling-curl/
ÍƼöÐÅÏ¢
- ¡¾ÊÓƵ²¥·Å¡¿JplayerÊÓƵ²¥·ÅÆ÷µÄʹÓÃ
- memcacheÄÚ´æÔÀí
- Memcache¼¼Êõ·ÖÏí£º½éÉÜ¡¢Ê¹Óᢴ洢¡¢Ëã·¨¡¢ÓÅ»¯....
- php³£ÓÃÕýÔò±í´ïʽ
- phpÐÔÄܼà²âÄ£¿éXHProf
- ÈÃCI¿ò¼ÜÖ§³Öservice²ã
- ʹÓÃPHPÉú³É´øLOGOµÄ¸öÐÔ»¯¶þάÂëͼÏñ
- ¹ØÓÚCodeIgniterÄã¿ÉÄܲ»ÖªµÀµÄ5¸ö֪ʶµã
- Memcache ºÁÃ뼶³¬Ê±¼°ÆäËû³£¼ûÎÊÌâ»ã×Ü
- [PHP±Ê¼Ç]PHPQueryÒ»¸ö´¦ÀíDOMµÄÀûÆ÷
ÈÈÃÅÐÅÏ¢
- nohup: redirecting stderr to stdou....
- ʹÓÃlog_formatΪNginx·þÎñÆ÷ÉèÖøüÏêϸµÄÈÕÖ¾¸ñʽ
- jquery easyUI--dataGrid-Json
- [Ô´´]·ÂGoogle Reader¡¢ÐÂÀË΢²©¡¢ÌÚѶ΢²©µ....
- ÀûÓÃKeepalived+mysql¹¹½¨¸ß¿ÉÓÃMySQLË«Ö÷×Ô¶....
- Nginx+keepalivedʵÏÖ¸ºÔؾùºâºÍË«»úÈȱ¸¸ß¿ÉÓÃ
- jqueryʵÏÖÒ³Ãæ¼ÓÔؽø¶ÈÌõ
- Rolling cURL: PHP²¢·¢×î¼Ñʵ¼ù
- codeigniter ·ÓÉÖÕ¼«ÓÅ»¯(url rewrite)
- linuxÏÂÉèÖÃsshÎÞÃÜÂëµÇ¼
×î½ü¸üÐÂ
- PHP»ñÈ¡Óû§µÄÕæʵIP£¬²¢ÅжÏÊÇ·ñÄÚÍøIP
- PHP ´íÎóÈÕÖ¾ error_log
- ÀûÓÃbigpipe»úÖÆʵÏÖÒ³ÃæÄ£¿éµÄÒì²½äÖȾ chunked¼¼Êõ
- php¿ØÖÆÎļþÏÂÔØËÙ¶È
- js + php ¶ÁÈ¡¡¢²¥·ÅÊÓƵÁ÷ ¼æÈÝfirefox£¬c....
- ¡¾ÊÓƵ²¥·Å¡¿JplayerÊÓƵ²¥·ÅÆ÷µÄʹÓÃ
- UNICODE Óë UTF-8 µÄ¹Øϵ
- memcacheÄÚ´æÔÀí
- Memcache¼¼Êõ·ÖÏí£º½éÉÜ¡¢Ê¹Óᢴ洢¡¢Ëã·¨¡¢ÓÅ»¯....
- phpʹÓÃmb_detect_encoding¼ì²â×Ö·û´®±àÂë
ÆÀÂÛ