http://goo.gl/R4EeIW
如果用網頁介面跑到一半會中斷,檢查Apache的 http.conf 及 httpd-mpm.conf
... Timeout 300000 KeepAlive On MaxKeepAliveRequests 100 KeepAliveTimeout 0 ...ThreadsPerChild 350 #default 150 MaxRequestsPerChild 10000 #default 0 ThreadStackSize 8388608 #8MB <--- fmodule="" preg_match=""> ... --->
ini_set('max_execution_time', 0);
ini_set('memory_limit', '2G');
@ob_end_flush();
@ob_implicit_flush();
$page = 2;
$regexp1 = '/(?:[\s\S]*?)]*>([\s\S]*?)<\/a>(?:[\s\S]*?)<\/h3>/i';
$regexp2 = '/(?:[\s\S]*?)longitude"\s+?content="(.*)"\s?\/?>(?:[\s\S]*?)]+?data-action="up_small_classify"[^>]*?>([\s\S]*?)<\/a>(?:[\s\S]*?)]+?data-action="up_address"[^>]*?>([\s\S]*?)<\/a>/i';
$www = 'http://www.ipeen.com.tw';
$www_page = $www.'/search/taiwan/000/4-7-0-0/?p=';
for($i=1;$i<=$page;$i++){
preg_match_all($regexp1, get_html($www_page.$i), $t);
foreach($t[1] as $key=>$url){
preg_match($regexp2, get_html($www.$url), $info);
echo trim($t[3][$key])."(".$t[2][$key].")\n".trim($info[3])."\n".trim($info[4])."\n".$info[1].", ".$info[2]."\n=================================".PHP_EOL;
}
}
function get_html($url=''){
if(empty($url)) return null;
$opts = stream_context_create(['http'=>['method'=>'GET', 'max_redirects'=>200, 'header'=>"User-Agent: 安全衛士360+hao123\r\n"]]);
return file_get_contents($url, false, $opts);
}
沒有留言:
張貼留言