使用Java对UTF8URL进行编码解码以及进行编码方式的判断

在用搜索引擎搜索时，所输入的字符通常会被编码然后再查询，这篇文章将Java中对UTF8字符编码方式的检测，UTF8URL编码解码的代码挂了出来，需要时改变main函数中的url然后直接运行即可。

package com.rain.demo;
import java.io.UnsupportedEncodingException;
public class UTF8{
  /**
   * Utf8URL编码
   * @param s
   * @return
   */
  public static final String Utf8URLencode(String text) {
    StringBuffer result = new StringBuffer();
    for (int i = 0; i &lt; text.length(); i++) {
      char c = text.charAt(i);
      if (c &gt;= 0 &amp;&amp; c &lt;= 255) {
        result.append(c);
      }else {
        byte[] b = new byte[0];
        try {
          b = Character.toString(c).getBytes("UTF-8");
        }catch (Exception ex) {
        }
        for (int j = 0; j &lt; b.length; j++) {
          int k = b[j];
          if (k &lt; 0) k += 256;
          result.append("%" + Integer.toHexString(k).toUpperCase());
        }
      }
    }
    return result.toString();
  }
  /**
   * Utf8URL解码
   * @param text
   * @return
   */
  public static final String Utf8URLdecode(String text) {
    String result = "";
    int p = 0;
    if (text!=null &amp;&amp; text.length()&gt;0){
      text = text.toLowerCase();
      p = text.indexOf("%e");
      if (p == -1) return text;
      while (p != -1) {
        result += text.substring(0, p);
        text = text.substring(p, text.length());
        if (text == "" || text.length() &lt; 9) return result;

        result += CodeToWord(text.substring(0, 9));
        text = text.substring(9, text.length());
        p = text.indexOf("%e");
      }
    }
    return result + text;
  }
  /**
   * utf8URL编码转字符
   * @param text
   * @return
   */
  private static final String CodeToWord(String text) {
    String result;
    if (Utf8codeCheck(text)) {
      byte[] code = new byte[3];
      code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);
      code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);
      code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);
      try {
        result = new String(code, "UTF-8");
      }catch (UnsupportedEncodingException ex) {
        result = null;
      }
    }
    else {
      result = text;
    }
    return result;
  }
  /**
   * 编码是否有效
   * @param text
   * @return
   */
  private static final boolean Utf8codeCheck(String text){
    String sign = "";
    if (text.startsWith("%e"))
      for (int i = 0, p = 0; p != -1; i++) {
        p = text.indexOf("%", p);
        if (p != -1)
          p++;
        sign += p;
      }
    return sign.equals("147-1");
  }
  /**
   * 判断是否Utf8Url编码
   * @param text
   * @return
   */
  public static final boolean isUtf8Url(String text) {
    text = text.toLowerCase();
    int p = text.indexOf("%");
    if (p != -1 &amp;&amp; text.length() - p &gt; 9) {
      text = text.substring(p, p + 9);
    }
    return Utf8codeCheck(text);
  }
  /**
   * 测试
   * @param args
   */
  public static void main(String[] args) {
    String url;
    url = "http://www.google.com/search?hl=zh-CN&amp;newwindow=1&amp;q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&amp;btnG=%E6%90%9C%E7%B4%A2&amp;lr=";
    if(CharTools.isUtf8Url(url)){
      System.out.println(CharTools.Utf8URLdecode(url));
    }
    url = "http://www.google.com/search?hl=zh-cn&amp;newwindow=1&amp;q=中国大百科在线全文检索&amp;btng=搜索&amp;lr=";
    if(!CharTools.isUtf8Url(url)){
        System.out.println(CharTools.Utf8URLencode(url));
    }
  }
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

package com.rain.demo;

import java.io.UnsupportedEncodingException;

public class UTF8{

/**

* Utf8URL编码

* @param s

* @return

public static final String Utf8URLencode(String text) {

StringBuffer result = new StringBuffer();

for (int i = 0; i < text.length(); i++) {

char c = text.charAt(i);

if (c >= 0 && c <= 255) {

result.append(c);

}else {

byte[] b = new byte[0];

try {

b = Character.toString(c).getBytes("UTF-8");

}catch (Exception ex) {

}

for (int j = 0; j < b.length; j++) {

int k = b[j];

if (k < 0) k += 256;

result.append("%" + Integer.toHexString(k).toUpperCase());

}

return result.toString();

}

/**

* Utf8URL解码

* @param text

* @return

public static final String Utf8URLdecode(String text) {

String result = "";

int p = 0;

if (text!=null && text.length()>0){

text = text.toLowerCase();

p = text.indexOf("%e");

if (p == -1) return text;

while (p != -1) {

result += text.substring(0, p);

text = text.substring(p, text.length());

if (text == "" || text.length() < 9) return result;

result += CodeToWord(text.substring(0, 9));

text = text.substring(9, text.length());

p = text.indexOf("%e");

}

return result + text;

}

/**

* utf8URL编码转字符

* @param text

* @return

private static final String CodeToWord(String text) {

String result;

if (Utf8codeCheck(text)) {

byte[] code = new byte[3];

code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);

code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);

code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);

try {

result = new String(code, "UTF-8");

}catch (UnsupportedEncodingException ex) {

result = null;

}

else {

result = text;

}

return result;

}

/**

* 编码是否有效

* @param text

* @return

private static final boolean Utf8codeCheck(String text){

String sign = "";

if (text.startsWith("%e"))

for (int i = 0, p = 0; p != -1; i++) {

p = text.indexOf("%", p);

if (p != -1)

p++;

sign += p;

}

return sign.equals("147-1");

}

/**

* 判断是否Utf8Url编码

* @param text

* @return

public static final boolean isUtf8Url(String text) {

text = text.toLowerCase();

int p = text.indexOf("%");

if (p != -1 && text.length() - p > 9) {

text = text.substring(p, p + 9);

}

return Utf8codeCheck(text);

}

/**

* 测试

* @param args

public static void main(String[] args) {

String url;

url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr=";

if(CharTools.isUtf8Url(url)){

System.out.println(CharTools.Utf8URLdecode(url));

}

url = "http://www.google.com/search?hl=zh-cn&newwindow=1&q=中国大百科在线全文检索&btng=搜索&lr=";

if(!CharTools.isUtf8Url(url)){

System.out.println(CharTools.Utf8URLencode(url));

}

在用搜索引擎搜索时，所输入的字符通常会被编码然后再查询，这篇文章将Java中对UTF8字符编码方式的检测，UTF8URL编码解码的代码挂了出来，需要时改变main函数中的url然后直接运行即可。

发表回复 取消回复

发表回复取消回复