在Java中将字符串转换为URL标准字符串
我有一个字符串,例如:在Java中将字符串转换为URL标准字符串,java,Java,我有一个字符串,例如: Cerepedia, una apliación web Cerepedia,unaaplicacionweb 我想将其转换为URL有效的内容,例如: Cerepedia, una apliación web Cerepedia,unaaplicacionweb 注意:特殊字符转换和空格删除 顺便问一下,URL中允许使用逗号吗?您看过了吗?这似乎是你所需要的。尽管特殊字符将被转换为转义实体,而不是从其“特殊”属性中剥离。请在下面的类中尝试convertNonAsc
Cerepedia, una apliación web
Cerepedia,unaaplicacionweb
我想将其转换为URL有效的内容,例如:
Cerepedia, una apliación web
Cerepedia,unaaplicacionweb
注意:特殊字符转换和空格删除
顺便问一下,URL中允许使用逗号吗?您看过了吗?这似乎是你所需要的。尽管特殊字符将被转换为转义实体,而不是从其“特殊”属性中剥离。请在下面的类中尝试convertNonAscii()
public class AsciiUtils {
/**
* Contains a list of all the characters that map one to one for UNICODE.
*/
private static final String PLAIN_ASCII =
"AaEeIiOoUu" // grave
+ "AaEeIiOoUuYy" // acute
+ "AaEeIiOoUuYy" // circumflex
+ "AaEeIiOoUuYy" // tilde
+ "AaEeIiOoUuYy" // umlaut
+ "Aa" // ring
+ "Cc" // cedilla
+ "Nn" // n tilde (spanish)
;
/**
* Actual accented values, corresponds one to one with ASCII
*/
private static final String UNICODE =
"\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9"
+"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD"
+"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177"
+"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177"
+"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF"
+"\u00C5\u00E5"
+"\u00C7\u00E7"
+"\u00D1\u00F1"
;
// private constructor, can't be instanciated!
private AsciiUtils() {
}
/**
* Removes accentued from a string and replace with ascii equivalent
* @param s The string to englishify
* @return The string without the french and spanish stuff.
*/
public static String convertNonAscii(String s) {
StringBuilder b = new StringBuilder();
int n = s.length();
for (int i = 0; i < n; i++) {
char c = s.charAt(i);
int pos = UNICODE.indexOf(c);
if (pos > -1) {
b.append(PLAIN_ASCII.charAt(pos));
} else {
b.append(c);
}
}
return b.toString();
}
}
公共类asciuitils{
/**
*包含UNICODE中一对一映射的所有字符的列表。
*/
私有静态最终字符串纯ASCII=
“aaeeiiouu”//grave
+“aaeeiiouuyy”//acute
+“aaeeiiouuyy”//扬抑
+“aaeeiiouuyy”//tilde
+“aaeeioouuyy”//umlaut
+“Aa”//环
+“抄送”//cedilla
+“Nn”//n蒂尔德(西班牙语)
;
/**
*实际重音值,对应一个ASCII值
*/
私有静态最终字符串UNICODE=
“\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9”
+“\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD”
+“\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177”
+“\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177”
+“\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF”
+“\u00C5\u00E5”
+“\u00C7\u00E7”
+“\u00D1\u00F1”
;
//私有构造函数,不能实例化!
专用AsciiUtils(){
}
/**
*从字符串中删除重音符号并替换为等效的ascii
*@param将字符串设置为englishify
*@返回不带法语和西班牙语内容的字符串。
*/
公共静态字符串转换(字符串s){
StringBuilder b=新的StringBuilder();
int n=s.长度();
对于(int i=0;i-1){
b、 附加(纯ASCII.charAt(pos));
}否则{
b、 附加(c);
}
}
返回b.toString();
}
}
URLEncoder用+代替空格。Don发布的Asccii类不会删除空格,但下一个函数可用于该propouse:
public static String removeSpaces(String s) {
StringTokenizer st = new StringTokenizer(s," ",false);
String t="";
while (st.hasMoreElements()) t += st.nextElement();
return t;
}
注意Don解决方案适用于代码中的字符串,但不适用于来自UTF-8编码文件的字符串 这是我拥有的最好的解决方案,使用URLEncode并在之后转义十六进制字符:
String s = "Cerepedia, una apliación web";
String ENCODING= "uft-8";
String encoded_s = URLEncoder.encode(s,ENCODING); // Cerepedia+una+aplicaci%C3%83%C2%B3n+web
String s_hexa_free = EncodingTableUtils.replaceHexa(,ENCODING)); // Cerepedia+una+aplicacion+web
编码表
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
public class EncodingTableUtils {
public final static HashMap iso88591 = new HashMap();
static {
iso88591.put("%C3%A1", "a"); // á
iso88591.put("%C3%81", "A"); // Á
iso88591.put("%C3%A9", "e"); // é
iso88591.put("%C3%89", "E"); // É
iso88591.put("%C3%AD", "i"); // í
iso88591.put("%C3%8D", "I"); // Í
iso88591.put("%C3%93", "O"); // Ó
iso88591.put("%C3%B3", "o"); // ó
iso88591.put("%C3%BA", "u"); // ú
iso88591.put("%C3%9A", "U"); // Ú
iso88591.put("%C3%91", "N"); // Ñ
iso88591.put("%C3%B1", "n"); // ñ
}
public final static HashMap utf8 = new HashMap();
static {
utf8.put("%C3%83%C2%A1", "a"); // á
utf8.put("%C3%83%EF%BF", "A"); // Á
utf8.put("%BD%C3%83%C2", "e"); // é
utf8.put("%A9%C3%83%E2", "E"); // É
utf8.put("%80%B0%C3%83", "i"); // í
utf8.put("%C2%AD%C3%83", "I"); // Í
utf8.put("%EF%BF%BD%C3", "O"); // Ó
utf8.put("%C3%83%C2%B3", "o"); // ó
utf8.put("%83%E2%80%9C", "u"); // ú
utf8.put("%C3%83%C2%BA", "U"); // Ú
utf8.put("%C3%83%C5%A1", "N"); // Ñ
utf8.put("%C3%83%E2%80", "n"); // ñ
}
public final static HashMap enc_table = new HashMap();
static {
enc_table.put("iso-8859-1", iso88591);
enc_table.put("utf-8", utf8);
}
/**
* Replace Hexadecimal characters with equivalent english not special ones
* <p>Example: á Hexa: %C3%A1 gets replaced with a</p>
* @param s Usually a string coming from URLEncode.encode
* @param enc Encoding UTF-8 or ISO-8850-1
*/
public static String convertHexaDecimal(String s, String enc) {
HashMap characters = (HashMap) enc_table.get(enc.toLowerCase());
if(characters==null) return "";
Set keys = characters.keySet();
Iterator it = keys.iterator();
while(it.hasNext()) {
String key = (String) it.next();
String regex = EscapeChars.forRegex(key);
String replacement = (String) characters.get(key);
s = s.replaceAll(regex, replacement);
}
return s;
}
}
import java.util.HashMap;
导入java.util.Iterator;
导入java.util.Set;
公共类编码表{
public final static HashMap iso88591=新HashMap();
静止的{
iso88591.put(“%C3%A1”,“a”);//
iso88591.put(“%C3%81”,“A”);//Á
iso88591.put(“%C3%A9”,“e”);//
iso88591.put(“%C3%89”,“E”);//É
iso88591.put(“%C3%AD”,“i”);//i
iso88591.put(“%C3%8D”,“I”);//Í
iso88591.put(“%C3%93”,“O”);//Ó
iso88591.put(“%C3%B3”,“o”);//
iso88591.put(“%C3%BA”,“u”);//ú
iso88591.put(“%C3%9A”,“U”);//Ú
iso88591.put(“%C3%91”,“N”);//
iso88591.put(“%C3%B1”,“n”);//ñ
}
public final static HashMap utf8=新HashMap();
静止的{
utf8.put(“%C3%83%C2%A1”,“a”);//
utf8.put(“%C3%83%EF%BF”,“A”);//Á
utf8.put(“%BD%C3%83%C2”,“e”);//
utf8.put(“%A9%C3%83%E2”,“E”);//É
utf8.put(“%80%B0%C3%83”,“i”);//i
utf8.put(“%C2%AD%C3%83”,“I”);//Í
utf8.put(“%EF%BF%BD%C3”,“O”);//Ó
utf8.put(“%C3%83%C2%B3”,“o”);//
utf8.put(“%83%E2%80%9C”,“u”);//ú
utf8.put(“%C3%83%C2%BA”,“U”);//Ú
utf8.put(“%C3%83%C5%A1”,“N”);//
utf8.put(“%C3%83%E2%80”,“n”);//ñ
}
public final static HashMap enc_table=new HashMap();
静止的{
附件表格put(“iso-8859-1”,iso88591);
附件表格put(“utf-8”,utf8);
}
/**
*将十六进制字符替换为等效的英文字符,而不是特殊字符
*示例:áHexa:%C3%A1被替换为
*@param s通常是来自URLEncode.encode的字符串
*@param enc编码UTF-8或ISO-8850-1
*/
公共静态字符串转换器十六进制(字符串s、字符串enc){
HashMap characters=(HashMap)enc_table.get(enc.toLowerCase());
如果(字符==null)返回“”;
Set keys=characters.keySet();
Iterator it=keys.Iterator();
while(it.hasNext()){
String key=(String)it.next();
字符串regex=EscapeChars.forRegex(键);
字符串替换=(字符串)字符。获取(键);
s=s.replaceAll(正则表达式,替换);
}
返回s;
}
}
逃逸类
public final class EscapeChars {
/**
* Replace characters having special meaning in regular expressions
* with their escaped equivalents, preceded by a '\' character.
*
* <P>The escaped characters include :
*<ul>
*<li>.
*<li>\
*<li>?, * , and +
*<li>&
*<li>:
*<li>{ and }
*<li>[ and ]
*<li>( and )
*<li>^ and $
*</ul>
*/
public static String forRegex(String aRegexFragment){
final StringBuilder result = new StringBuilder();
final StringCharacterIterator iterator = new StringCharacterIterator(aRegexFragment);
char character = iterator.current();
while (character != CharacterIterator.DONE ){
/*
* All literals need to have backslashes doubled.
*/
if (character == '.') {
result.append("\\.");
}
else if (character == '\\') {
result.append("\\\\");
}
else if (character == '?') {
result.append("\\?");
}
else if (character == '*') {
result.append("\\*");
}
else if (character == '+') {
result.append("\\+");
}
else if (character == '&') {
result.append("\\&");
}
else if (character == ':') {
result.append("\\:");
}
else if (character == '{') {
result.append("\\{");
}
else if (character == '}') {
result.append("\\}");
}
else if (character == '[') {
result.append("\\[");
}
else if (character == ']') {
result.append("\\]");
}
else if (character == '(') {
result.append("\\(");
}
else if (character == ')') {
result.append("\\)");
}
else if (character == '^') {
result.append("\\^");
}
else if (character == '$') {
result.append("\\$");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
}
公共最终类逃逸卡{
/**
*替换正则表达式中具有特殊含义的字符
*使用其转义等价项,前面带有“\”字符。
*
*转义字符包括:
*
*- 。
*
- \
*
- ?,*,和+
*
- &
*
- :
*
- {and}
*
- [和]
*
- (和)
*
- ^$
*
*/
用于正则表达式的公共静态字符串(字符串aRegexFragment){
最终StringBuilder结果=新建StringBuilder();
最终StringCharacterIterator迭代器=新StringCharacterIterator(aRegexFragment);
char character=iterator.current();
while(character!=CharacterIterator.DONE){
/*
*所有文字都需要将反斜杠加倍。
*/
如果(字符=='。){
结果。追加(“\\”);