http://goo.gl/R4EeIW
如果用網頁介面跑到一半會中斷,檢查Apache的 http.conf 及 httpd-mpm.conf
1 2 3 4 5 6 7 8 9 10 11 12 | ... Timeout 300000 KeepAlive On MaxKeepAliveRequests 100 KeepAliveTimeout 0 ... <ifmodule mpm_winnt_module= "" > ThreadsPerChild 350 #default 150 MaxRequestsPerChild 10000 #default 0 ThreadStackSize 8388608 #8MB <--- fmodule="" preg_match=""> ... <!------->< /ifmodule > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | ini_set ( 'max_execution_time' , 0); ini_set ( 'memory_limit' , '2G' ); @ob_end_flush(); @ob_implicit_flush(); $page = 2; $regexp1 = '/<h3\s+id="shop_h3_\d+"\s+class="name">(?:[\s\S]*?)<a\s+href="(\ shop\="" (\d+)[^"]+)(?:[\s\s]*?)[^="">]*>([\s\S]*?)<\/a>(?:[\s\S]*?)<\/h3>/i' ; $regexp2 = '/<meta\s+property="place:location:latitude"\s+?content="(.*)"\s?\ ?="">(?:[\s\S]*?)longitude"\s+?content="(.*)"\s?\/?>(?:[\s\S]*?)<a[^>]+?data-action="up_small_classify"[^>]*?>([\s\S]*?)<\/a>(?:[\s\S]*?)<a[^>]+?data-action="up_address"[^>]*?>([\s\S]*?)<\/a>/i' ; $www = 'http://www.ipeen.com.tw' ; $www_page = $www . '/search/taiwan/000/4-7-0-0/?p=' ; for ( $i =1; $i <= $page ; $i ++){ preg_match_all( $regexp1 , get_html( $www_page . $i ), $t ); foreach ( $t [1] as $key => $url ){ preg_match( $regexp2 , get_html( $www . $url ), $info ); echo trim( $t [3][ $key ]). "(" . $t [2][ $key ]. ")\n" .trim( $info [3]). "\n" .trim( $info [4]). "\n" . $info [1]. ", " . $info [2]. "\n=================================" .PHP_EOL; } } function get_html( $url = '' ){ if ( empty ( $url )) return null; $opts = stream_context_create([ 'http' =>[ 'method' => 'GET' , 'max_redirects' =>200, 'header' => "User-Agent: 安全衛士360+hao123\r\n" ]]); return file_get_contents ( $url , false, $opts ); } </a[^></a[^></meta\s+property= "place:location:latitude" \s+?content= "(.*)" \s?\></a\s+href= "(\></h3\s+id=" shop_h3_\d+ "\s+class=" name"> |