在用搜索引擎搜索时,所输入的字符通常会被编码然后再查询,这篇文章将Java中对UTF8字符编码方式的检测,UTF8URL编码解码的代码挂了出来,需要时改变main函数中的url然后直接运行即可。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
package com.rain.demo; import java.io.UnsupportedEncodingException; public class UTF8{ /** * Utf8URL编码 * @param s * @return */ public static final String Utf8URLencode(String text) { StringBuffer result = new StringBuffer(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c >= 0 && c <= 255) { result.append(c); }else { byte[] b = new byte[0]; try { b = Character.toString(c).getBytes("UTF-8"); }catch (Exception ex) { } for (int j = 0; j < b.length; j++) { int k = b[j]; if (k < 0) k += 256; result.append("%" + Integer.toHexString(k).toUpperCase()); } } } return result.toString(); } /** * Utf8URL解码 * @param text * @return */ public static final String Utf8URLdecode(String text) { String result = ""; int p = 0; if (text!=null && text.length()>0){ text = text.toLowerCase(); p = text.indexOf("%e"); if (p == -1) return text; while (p != -1) { result += text.substring(0, p); text = text.substring(p, text.length()); if (text == "" || text.length() < 9) return result; result += CodeToWord(text.substring(0, 9)); text = text.substring(9, text.length()); p = text.indexOf("%e"); } } return result + text; } /** * utf8URL编码转字符 * @param text * @return */ private static final String CodeToWord(String text) { String result; if (Utf8codeCheck(text)) { byte[] code = new byte[3]; code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256); code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256); code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256); try { result = new String(code, "UTF-8"); }catch (UnsupportedEncodingException ex) { result = null; } } else { result = text; } return result; } /** * 编码是否有效 * @param text * @return */ private static final boolean Utf8codeCheck(String text){ String sign = ""; if (text.startsWith("%e")) for (int i = 0, p = 0; p != -1; i++) { p = text.indexOf("%", p); if (p != -1) p++; sign += p; } return sign.equals("147-1"); } /** * 判断是否Utf8Url编码 * @param text * @return */ public static final boolean isUtf8Url(String text) { text = text.toLowerCase(); int p = text.indexOf("%"); if (p != -1 && text.length() - p > 9) { text = text.substring(p, p + 9); } return Utf8codeCheck(text); } /** * 测试 * @param args */ public static void main(String[] args) { String url; url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr="; if(CharTools.isUtf8Url(url)){ System.out.println(CharTools.Utf8URLdecode(url)); } url = "http://www.google.com/search?hl=zh-cn&newwindow=1&q=中国大百科在线全文检索&btng=搜索&lr="; if(!CharTools.isUtf8Url(url)){ System.out.println(CharTools.Utf8URLencode(url)); } } } |