Javascript UTF-8 ArrayBuffer与字符串之间的转换
我有一个Javascript UTF-8 ArrayBuffer与字符串之间的转换,javascript,string,utf-8,arraybuffer,Javascript,String,Utf 8,Arraybuffer,我有一个ArrayBuffer,其中包含一个使用UTF-8编码的字符串,我找不到一种标准方法将这种ArrayBuffer转换为JSstring(我知道它是使用UTF-16编码的) 我在很多地方看过这段代码,但我不知道它如何处理任何长度超过1字节的UTF-8代码点 return String.fromCharCode.apply(null, new Uint8Array(data)); 类似地,我找不到从字符串转换为UTF-8编码的ArrayBuffer的标准方法,如果您在浏览器中执行此操作,则
ArrayBuffer
,其中包含一个使用UTF-8编码的字符串,我找不到一种标准方法将这种ArrayBuffer
转换为JSstring
(我知道它是使用UTF-16编码的)
我在很多地方看过这段代码,但我不知道它如何处理任何长度超过1字节的UTF-8代码点
return String.fromCharCode.apply(null, new Uint8Array(data));
类似地,我找不到从
字符串
转换为UTF-8编码的ArrayBuffer
的标准方法,如果您在浏览器中执行此操作,则没有内置字符编码库,但您可以
通过以下方式:
功能板(n){
返回n.长度<2?“0”+n:n;
}
var数组=新的UINT8数组(数据);
var str=“”;
对于(变量i=0,len=array.length;i
下面是一个解码3字节UTF-8单元的演示:
警告:已从web标准中删除escape和unescape。这应该可以:
// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt
/* utf.js - UTF-8 <=> UTF-16 convertion
*
* Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>
* Version: 1.0
* LastModified: Dec 25 1999
* This library is free. You can redistribute it and/or modify it.
*/
function Utf8ArrayToStr(array) {
var out, i, len, c;
var char2, char3;
out = "";
len = array.length;
i = 0;
while (i < len) {
c = array[i++];
switch (c >> 4)
{
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
out += String.fromCharCode(c);
break;
case 12: case 13:
// 110x xxxx 10xx xxxx
char2 = array[i++];
out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
char2 = array[i++];
char3 = array[i++];
out += String.fromCharCode(((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0));
break;
}
}
return out;
}
//http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt
/*utf.js-utf-8 utf-16转换
*
*版权所有(C)1999 Masanao Izumo
*版本:1.0
*最后修改日期:1999年12月25日
*这个图书馆是免费的。您可以重新分发和/或修改它。
*/
函数Utf8ArrayToStr(数组){
var out,i,len,c;
var char2,char3;
out=“”;
len=数组长度;
i=0;
而(我>4)
{
案例0:案例1:案例2:案例3:案例4:案例5:案例6:案例7:
//0xxxxxxx
out+=String.fromCharCode(c);
打破
案例12:案例13:
//110x xxxx 10x xxxx
char2=数组[i++];
out+=String.fromCharCode(((c&0x1F)Github上有一个polyfill for over:。这对于节点或浏览器来说很容易,自述文件建议如下:
var uint8array = TextEncoder(encoding).encode(string);
var string = TextDecoder(encoding).decode(uint8array);
如果我记得的话,'utf-8'
是您需要的编码
,当然您需要包装您的缓冲区:
var uint8array = new Uint8Array(utf8buffer);
希望它对您的效果和对我的效果一样好。使用和
程序员寻求从字节数组转换为字符串的主要问题是unicode字符的UTF-8编码(压缩)。此代码将帮助您:
var getString = function (strBytes) {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var result = '';
while (++index < strBytes.length) {
var codePoint = Number(strBytes[index]);
if (codePoint === (codePoint & 0x7F)) {
} else if (0xF0 === (codePoint & 0xF0)) {
codePoint ^= 0xF0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xE0 === (codePoint & 0xE0)) {
codePoint ^= 0xE0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xC0 === (codePoint & 0xC0)) {
codePoint ^= 0xC0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
}
if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
throw RangeError('Invalid code point: ' + codePoint);
if (codePoint <= 0xFFFF)
codeUnits.push(codePoint);
else {
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) | 0xD800;
lowSurrogate = (codePoint % 0x400) | 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
result += String.fromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
}
var getString=function(strBytes){
var MAX_SIZE=0x4000;
var codeUnits=[];
替代变量;
替代变量;
var指数=-1;
var结果=“”;
而(++指数 codePoint=(codePoint方法从对象中readAsArrayBuffer和readAsText将Blob对象转换为ArrayBuffer或DOMString异步
例如,可以从原始文本或字节数组创建Blob对象类型
let blob = new Blob([text], { type: "text/plain" });
let reader = new FileReader();
reader.onload = event =>
{
let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);
我认为最好在承诺中包含这一点:
function textToByteArray(text)
{
let blob = new Blob([text], { type: "text/plain" });
let reader = new FileReader();
let done = function() { };
reader.onload = event =>
{
done(new Uint8Array(event.target.result));
};
reader.readAsArrayBuffer(blob);
return { done: function(callback) { done = callback; } }
}
function byteArrayToText(bytes, encoding)
{
let blob = new Blob([bytes], { type: "application/octet-stream" });
let reader = new FileReader();
let done = function() { };
reader.onload = event =>
{
done(event.target.result);
};
if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }
return { done: function(callback) { done = callback; } }
}
let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
console.log(bytes);
byteArrayToText(bytes, 'UTF-8').done(text =>
{
console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:
function utf8ArrayToString(aBytes) {
var sView = "";
for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
nPart = aBytes[nIdx];
sView += String.fromCharCode(
nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
/* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
(nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
(nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
(nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
(nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
(nPart - 192 << 6) + aBytes[++nIdx] - 128
: /* nPart < 127 ? */ /* one byte */
nPart
);
}
return sView;
}
let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);
// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);
函数text到字节数组(text)
{
设blob=newblob([text],{type:“text/plain”});
let reader=new FileReader();
let done=function(){};
reader.onload=事件=>
{
完成(新的Uint8Array(event.target.result));
};
reader.readAsArrayBuffer(blob);
返回{done:function(callback){done=callback;}}
}
函数byteArrayToText(字节,编码)
{
设blob=newblob([bytes],{type:“application/octet stream”});
let reader=new FileReader();
let done=function(){};
reader.onload=事件=>
{
完成(事件、目标、结果);
};
if(编码){reader.readAsText(blob,编码);}else{reader.readAsText(blob);}
返回{done:function(callback){done=callback;}}
}
let text=“\uD83D\uDCA9=\u2661”;
textToByteArray(text).done(字节=>
{
console.log(字节);
byteArrayToText(字节,'UTF-8')。完成(文本=>
{
console.log(text);//如果不想使用任何外部polyfill库,可以使用以下提供的函数:
函数utf8ArrayToString(aBytes){
var sView=“”;
for(var nPart,nLen=aBytes.length,nIdx=0;nIdx251&&nPart<254&&nIdx+5 /*(nPart-252这类问题的最新答案(使用现在的方法)如下:哇,我从来没有看到UTF-8阵列缓冲与字符串对话!只是开玩笑:)@LightStyle谢谢,完全错过了那个拼写错误!:pvar uintArray=new Uint8Array(“String.split”(“”).map(函数(char){return char.charCodeAt(0);}))
这就是你需要的,我可以在回答中解释,否则我只能保留注释;)您发布的一行代码将把0x00–0xFF范围内的字节解码为相应的Unicode代码点U+0000–U+00FF。换句话说,它不能表示整个Unicode范围内的任何地方。但是,恰好Unicode代码点U+0000–U+00FF与ISO 8859-1(拉丁语1)完全对应,所以您所写的实际上是一个ISO 8859-1解码器。LightStyle的oneliner是与问题中的解码器相对应的编码器。换句话说,它是一个ISO 8859-1编码器。atob/btoa
do base64编码/解码,如果您传递一个诚实的utf8字节数组,它将无法工作:它计划只与n编码字符串,否则它将无法工作,因为btoa
和atob
转换。我可能应该指定,但是ArrayBuffer
中的UTF-8字符串来自一个用不同编程语言编写的单独程序,该程序生成纯UTF-8字符串,正如Esailija所说,我不能将其用作它执行base64 encoding.Done。对于stringToUint
函数也是如此,只需删除
var uint8array = new TextEncoder("utf-8").encode("Plain Text");
var string = new TextDecoder().decode(uint8array);
console.log(uint8array ,string )
var getString = function (strBytes) {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var result = '';
while (++index < strBytes.length) {
var codePoint = Number(strBytes[index]);
if (codePoint === (codePoint & 0x7F)) {
} else if (0xF0 === (codePoint & 0xF0)) {
codePoint ^= 0xF0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xE0 === (codePoint & 0xE0)) {
codePoint ^= 0xE0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xC0 === (codePoint & 0xC0)) {
codePoint ^= 0xC0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
}
if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
throw RangeError('Invalid code point: ' + codePoint);
if (codePoint <= 0xFFFF)
codeUnits.push(codePoint);
else {
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) | 0xD800;
lowSurrogate = (codePoint % 0x400) | 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
result += String.fromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
}
let blob = new Blob([text], { type: "text/plain" });
let reader = new FileReader();
reader.onload = event =>
{
let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);
function textToByteArray(text)
{
let blob = new Blob([text], { type: "text/plain" });
let reader = new FileReader();
let done = function() { };
reader.onload = event =>
{
done(new Uint8Array(event.target.result));
};
reader.readAsArrayBuffer(blob);
return { done: function(callback) { done = callback; } }
}
function byteArrayToText(bytes, encoding)
{
let blob = new Blob([bytes], { type: "application/octet-stream" });
let reader = new FileReader();
let done = function() { };
reader.onload = event =>
{
done(event.target.result);
};
if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }
return { done: function(callback) { done = callback; } }
}
let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
console.log(bytes);
byteArrayToText(bytes, 'UTF-8').done(text =>
{
console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:
function utf8ArrayToString(aBytes) {
var sView = "";
for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
nPart = aBytes[nIdx];
sView += String.fromCharCode(
nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
/* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
(nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
(nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
(nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
(nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
(nPart - 192 << 6) + aBytes[++nIdx] - 128
: /* nPart < 127 ? */ /* one byte */
nPart
);
}
return sView;
}
let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);
// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);