Javascript 将带破折号字符的punycode转换为Unicode
我需要将punycodeJavascript 将带破折号字符的punycode转换为Unicode,javascript,unicode,punycode,Javascript,Unicode,Punycode,我需要将punycodeNIATO-OTABD转换为nñIñatoñ 前几天我发现了,但是如果中间有一个破折号,PyyCo码转换就不起作用了。 有没有解决“破折号”问题的建议?我花时间创建了下面的punycode。它基于RFC3492中的C代码。要与域名一起使用,您必须从输入/输出到解码/编码中删除/添加xn-- utf16类是从JavaScripts内部字符表示转换为unicode并返回的必要条件 还有ToASCII和ToUnicode函数,可以更容易地在少量编码的IDN和ASCII之间转换
NIATO-OTABD
转换为nñIñatoñ
前几天我发现了,但是如果中间有一个破折号,PyyCo码转换就不起作用了。
有没有解决“破折号”问题的建议?我花时间创建了下面的punycode。它基于RFC3492中的C代码。要与域名一起使用,您必须从输入/输出到解码/编码中删除/添加
xn--
utf16类
是从JavaScripts内部字符表示转换为unicode并返回的必要条件
还有ToASCII
和ToUnicode
函数,可以更容易地在少量编码的IDN和ASCII之间转换
//Javascript Punycode converter derived from example in RFC3492.
//This implementation is created by some@domain.name and released into public domain
var punycode = new function Punycode() {
// This object converts to and from puny-code used in IDN
//
// punycode.ToASCII ( domain )
//
// Returns a puny coded representation of "domain".
// It only converts the part of the domain name that
// has non ASCII characters. I.e. it dosent matter if
// you call it with a domain that already is in ASCII.
//
// punycode.ToUnicode (domain)
//
// Converts a puny-coded domain name to unicode.
// It only converts the puny-coded parts of the domain name.
// I.e. it dosent matter if you call it on a string
// that already has been converted to unicode.
//
//
this.utf16 = {
// The utf16-class is necessary to convert from javascripts internal character representation to unicode and back.
decode:function(input){
var output = [], i=0, len=input.length,value,extra;
while (i < len) {
value = input.charCodeAt(i++);
if ((value & 0xF800) === 0xD800) {
extra = input.charCodeAt(i++);
if ( ((value & 0xFC00) !== 0xD800) || ((extra & 0xFC00) !== 0xDC00) ) {
throw new RangeError("UTF-16(decode): Illegal UTF-16 sequence");
}
value = ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000;
}
output.push(value);
}
return output;
},
encode:function(input){
var output = [], i=0, len=input.length,value;
while (i < len) {
value = input[i++];
if ( (value & 0xF800) === 0xD800 ) {
throw new RangeError("UTF-16(encode): Illegal UTF-16 value");
}
if (value > 0xFFFF) {
value -= 0x10000;
output.push(String.fromCharCode(((value >>>10) & 0x3FF) | 0xD800));
value = 0xDC00 | (value & 0x3FF);
}
output.push(String.fromCharCode(value));
}
return output.join("");
}
}
//Default parameters
var initial_n = 0x80;
var initial_bias = 72;
var delimiter = "\x2D";
var base = 36;
var damp = 700;
var tmin=1;
var tmax=26;
var skew=38;
var maxint = 0x7FFFFFFF;
// decode_digit(cp) returns the numeric value of a basic code
// point (for use in representing integers) in the range 0 to
// base-1, or base if cp is does not represent a value.
function decode_digit(cp) {
return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : base;
}
// encode_digit(d,flag) returns the basic code point whose value
// (when used for representing integers) is d, which needs to be in
// the range 0 to base-1. The lowercase form is used unless flag is
// nonzero, in which case the uppercase form is used. The behavior
// is undefined if flag is nonzero and digit d has no uppercase form.
function encode_digit(d, flag) {
return d + 22 + 75 * (d < 26) - ((flag != 0) << 5);
// 0..25 map to ASCII a..z or A..Z
// 26..35 map to ASCII 0..9
}
//** Bias adaptation function **
function adapt(delta, numpoints, firsttime ) {
var k;
delta = firsttime ? Math.floor(delta / damp) : (delta >> 1);
delta += Math.floor(delta / numpoints);
for (k = 0; delta > (((base - tmin) * tmax) >> 1); k += base) {
delta = Math.floor(delta / ( base - tmin ));
}
return Math.floor(k + (base - tmin + 1) * delta / (delta + skew));
}
// encode_basic(bcp,flag) forces a basic code point to lowercase if flag is zero,
// uppercase if flag is nonzero, and returns the resulting code point.
// The code point is unchanged if it is caseless.
// The behavior is undefined if bcp is not a basic code point.
function encode_basic(bcp, flag) {
bcp -= (bcp - 97 < 26) << 5;
return bcp + ((!flag && (bcp - 65 < 26)) << 5);
}
// Main decode
this.decode=function(input,preserveCase) {
// Dont use utf16
var output=[];
var case_flags=[];
var input_length = input.length;
var n, out, i, bias, basic, j, ic, oldi, w, k, digit, t, len;
// Initialize the state:
n = initial_n;
i = 0;
bias = initial_bias;
// Handle the basic code points: Let basic be the number of input code
// points before the last delimiter, or 0 if there is none, then
// copy the first basic code points to the output.
basic = input.lastIndexOf(delimiter);
if (basic < 0) basic = 0;
for (j = 0; j < basic; ++j) {
if(preserveCase) case_flags[output.length] = ( input.charCodeAt(j) -65 < 26);
if ( input.charCodeAt(j) >= 0x80) {
throw new RangeError("Illegal input >= 0x80");
}
output.push( input.charCodeAt(j) );
}
// Main decoding loop: Start just after the last delimiter if any
// basic code points were copied; start at the beginning otherwise.
for (ic = basic > 0 ? basic + 1 : 0; ic < input_length; ) {
// ic is the index of the next character to be consumed,
// Decode a generalized variable-length integer into delta,
// which gets added to i. The overflow checking is easier
// if we increase i as we go, then subtract off its starting
// value at the end to obtain delta.
for (oldi = i, w = 1, k = base; ; k += base) {
if (ic >= input_length) {
throw RangeError ("punycode_bad_input(1)");
}
digit = decode_digit(input.charCodeAt(ic++));
if (digit >= base) {
throw RangeError("punycode_bad_input(2)");
}
if (digit > Math.floor((maxint - i) / w)) {
throw RangeError ("punycode_overflow(1)");
}
i += digit * w;
t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
if (digit < t) { break; }
if (w > Math.floor(maxint / (base - t))) {
throw RangeError("punycode_overflow(2)");
}
w *= (base - t);
}
out = output.length + 1;
bias = adapt(i - oldi, out, oldi === 0);
// i was supposed to wrap around from out to 0,
// incrementing n each time, so we'll fix that now:
if ( Math.floor(i / out) > maxint - n) {
throw RangeError("punycode_overflow(3)");
}
n += Math.floor( i / out ) ;
i %= out;
// Insert n at position i of the output:
// Case of last character determines uppercase flag:
if (preserveCase) { case_flags.splice(i, 0, input.charCodeAt(ic -1) -65 < 26);}
output.splice(i, 0, n);
i++;
}
if (preserveCase) {
for (i = 0, len = output.length; i < len; i++) {
if (case_flags[i]) {
output[i] = (String.fromCharCode(output[i]).toUpperCase()).charCodeAt(0);
}
}
}
return this.utf16.encode(output);
};
//** Main encode function **
this.encode = function (input,preserveCase) {
//** Bias adaptation function **
var n, delta, h, b, bias, j, m, q, k, t, ijv, case_flags;
if (preserveCase) {
// Preserve case, step1 of 2: Get a list of the unaltered string
case_flags = this.utf16.decode(input);
}
// Converts the input in UTF-16 to Unicode
input = this.utf16.decode(input.toLowerCase());
var input_length = input.length; // Cache the length
if (preserveCase) {
// Preserve case, step2 of 2: Modify the list to true/false
for (j=0; j < input_length; j++) {
case_flags[j] = input[j] != case_flags[j];
}
}
var output=[];
// Initialize the state:
n = initial_n;
delta = 0;
bias = initial_bias;
// Handle the basic code points:
for (j = 0; j < input_length; ++j) {
if ( input[j] < 0x80) {
output.push(
String.fromCharCode(
case_flags ? encode_basic(input[j], case_flags[j]) : input[j]
)
);
}
}
h = b = output.length;
// h is the number of code points that have been handled, b is the
// number of basic code points
if (b > 0) output.push(delimiter);
// Main encoding loop:
//
while (h < input_length) {
// All non-basic code points < n have been
// handled already. Find the next larger one:
for (m = maxint, j = 0; j < input_length; ++j) {
ijv = input[j];
if (ijv >= n && ijv < m) m = ijv;
}
// Increase delta enough to advance the decoder's
// <n,i> state to <m,0>, but guard against overflow:
if (m - n > Math.floor((maxint - delta) / (h + 1))) {
throw RangeError("punycode_overflow (1)");
}
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_length; ++j) {
ijv = input[j];
if (ijv < n ) {
if (++delta > maxint) return Error("punycode_overflow(2)");
}
if (ijv == n) {
// Represent delta as a generalized variable-length integer:
for (q = delta, k = base; ; k += base) {
t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
if (q < t) break;
output.push( String.fromCharCode(encode_digit(t + (q - t) % (base - t), 0)) );
q = Math.floor( (q - t) / (base - t) );
}
output.push( String.fromCharCode(encode_digit(q, preserveCase && case_flags[j] ? 1:0 )));
bias = adapt(delta, h + 1, h == b);
delta = 0;
++h;
}
}
++delta, ++n;
}
return output.join("");
}
this.ToASCII = function ( domain ) {
var domain_array = domain.split(".");
var out = [];
for (var i=0; i < domain_array.length; ++i) {
var s = domain_array[i];
out.push(
s.match(/[^A-Za-z0-9-]/) ?
"xn--" + punycode.encode(s) :
s
);
}
return out.join(".");
}
this.ToUnicode = function ( domain ) {
var domain_array = domain.split(".");
var out = [];
for (var i=0; i < domain_array.length; ++i) {
var s = domain_array[i];
out.push(
s.match(/^xn--/) ?
punycode.decode(s.slice(4)) :
s
);
}
return out.join(".");
}
}();
//从RFC3492中的示例派生的Javascript Punycode转换器。
//此实现是由创建的some@domain.name并发布到公共领域
var punycode=新函数punycode(){
//此对象与IDN中使用的少量代码进行转换
//
//punycode.ToASCII(域)
//
//返回“域”的微小编码表示形式。
//它只转换域名中
//具有非ASCII字符。即,如果
//您使用一个已经是ASCII格式的域来调用它。
//
//punycode.ToUnicode(域)
//
//将少量编码的域名转换为unicode。
//它只转换域名中编码很少的部分。
//也就是说,如果你用一根绳子来称呼它,那没关系
//已经转换为unicode的。
//
//
此.utf16={
//utf16类是从javascripts内部字符表示转换为unicode并返回所必需的。
解码:功能(输入){
变量输出=[],i=0,len=input.length,value,extra;
而(我>>10)和0x3FF)| 0xD800);
值=0xDC00 |(值&0x3FF);
}
push(String.fromCharCode(value));
}
返回output.join(“”);
}
}
//默认参数
var初始值=0x80;
var初始偏差=72;
var分隔符=“\x2D”;
var基数=36;
var-damp=700;
var tmin=1;
var tmax=26;
var-skew=38;
var maxint=0x7FFFFFFF;
//解码数字(cp)返回基本代码的数值
//0到0范围内的点(用于表示整数)
//base-1,或如果cp不表示值,则为base。
功能解码数字(cp){
返回cp-48<10?cp-22:cp-65<26?cp-65:cp-97<26?cp-97:base;
}
//encode_数字(d,标志)返回其值为
//(用于表示整数时)是d,需要在
//范围0到base-1。除非标记为
//非零,在这种情况下使用大写形式。行为
//如果标志为非零且数字d没有大写形式,则未定义。
函数编码\位(d,标志){
返回d+22+75*(d<26)-(标志!=0)>1);
delta+=数学楼层(delta/numpoints);
对于(k=0;delta>((基准-tmin)*tmax)>>1);k+=base){
delta=数学地板(delta/(基底-tmin));
}
返回数学层(k+(基-tmin+1)*delta/(delta+skew));
}
//encode_basic(bcp,标志)如果标志为零,则强制基本代码点为小写,
//大写if标志为非零,并返回结果代码点。
//如果没有大小写,则代码点不变。
//如果bcp不是基本代码点,则该行为未定义。
函数编码_基本(bcp,标志){
bcp-=(bcp-97<26)0?基本+1:0;ic<输入长度;){
//ic是要使用的下一个字符的索引,
//将广义变长整数解码为增量,
//它被添加到i中。溢出检查更容易
//如果我们在前进中增加i,那么减去它的起始值
//值以获取增量。
对于(oldi=i,w=1,k=base;;k+=base){
如果(ic>=输入长度){
抛出范围错误(“punycode_bad_input(1)”;
}
数字=解码数字(输入.charCodeAt(ic++);
如果(数字>=基数){
抛出范围错误(“punycode_bad_input(2)”;
}
如果(数字>数学楼层((maxint-i)/w)){
抛出范围错误(“punycode_溢出(1)”;
}
i+=数字*w;
t=k=bias+tmax?tmax:k-bias;
如果(数字数学层(maxint/(base-t))){
抛出范围错误(“punycode_溢出(2)”;
}
w*=(基-t);
}
输出=输出长度+1;
偏差=适应(i-oldi,out,oldi==0);
//我应该从外面绕到0,
//每次递增n,因此我们现在将修复该问题:
if(数学楼层(i/out)>maxint-n){
抛出范围错误(“punycode_溢出(3)”;
}
n+=数学楼层(输入/输出);
i%=输出;
//在输出的位置i处插入n:
//最后一个字符的大小写确定