http://goo.gl/R4EeIW
如果用網頁介面跑到一半會中斷,檢查Apache的 http.conf 及 httpd-mpm.conf
... Timeout 300000 KeepAlive On MaxKeepAliveRequests 100 KeepAliveTimeout 0 ...ThreadsPerChild 350 #default 150 MaxRequestsPerChild 10000 #default 0 ThreadStackSize 8388608 #8MB <--- fmodule="" preg_match=""> ... --->
ini_set('max_execution_time', 0); ini_set('memory_limit', '2G'); @ob_end_flush(); @ob_implicit_flush(); $page = 2; $regexp1 = '/(?:[\s\S]*?)]*>([\s\S]*?)<\/a>(?:[\s\S]*?)<\/h3>/i'; $regexp2 = '/(?:[\s\S]*?)longitude"\s+?content="(.*)"\s?\/?>(?:[\s\S]*?)]+?data-action="up_small_classify"[^>]*?>([\s\S]*?)<\/a>(?:[\s\S]*?)]+?data-action="up_address"[^>]*?>([\s\S]*?)<\/a>/i'; $www = 'http://www.ipeen.com.tw'; $www_page = $www.'/search/taiwan/000/4-7-0-0/?p='; for($i=1;$i<=$page;$i++){ preg_match_all($regexp1, get_html($www_page.$i), $t); foreach($t[1] as $key=>$url){ preg_match($regexp2, get_html($www.$url), $info); echo trim($t[3][$key])."(".$t[2][$key].")\n".trim($info[3])."\n".trim($info[4])."\n".$info[1].", ".$info[2]."\n=================================".PHP_EOL; } } function get_html($url=''){ if(empty($url)) return null; $opts = stream_context_create(['http'=>['method'=>'GET', 'max_redirects'=>200, 'header'=>"User-Agent: 安全衛士360+hao123\r\n"]]); return file_get_contents($url, false, $opts); }
沒有留言:
張貼留言