(猜測是舊系統的網頁編碼為UTF-8,但送出表單時的charset為BIG5,因無法找到對應字元所造成的)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | String name = "王蘐老蘑先蘒生蘓王蘔老蘕先蘖生蘗王蘘老蘐先蘙生蘚" ; Pattern regex = Pattern.compile( "&#(\\d{5});" ); StringBuffer sb = new StringBuffer(); long time1 = System.currentTimeMillis(); //使用HTMLDocument及HTMLEditorKit HTMLDocument doc = new HTMLDocument(); new HTMLEditorKit().read( new StringReader(name), doc, 0 ); System.out.println(doc.getText( 0 , doc.getLength())); long time2 = System.currentTimeMillis(); System.out.println(time2 - time1); //60毫秒 //使用正則表示式 Matcher match = regex.matcher(name); while (match.find()){ match.appendReplacement(sb, new String(Character.toChars(Integer.parseInt(match.group( 1 ))))); } match.appendTail(sb); System.out.println(sb); long time3 = System.currentTimeMillis(); System.out.println(time3 - time2); //1毫秒 |