Javascript UTF-8 ArrayBuffer与字符串之间的转换_Javascript_String_Utf 8_Arraybuffer

Javascript UTF-8 ArrayBuffer与字符串之间的转换

javascript string utf-8

Javascript UTF-8 ArrayBuffer与字符串之间的转换,javascript,string,utf-8,arraybuffer,Javascript,String,Utf 8,Arraybuffer,我有一个ArrayBuffer，其中包含一个使用UTF-8编码的字符串，我找不到一种标准方法将这种ArrayBuffer转换为JSstring（我知道它是使用UTF-16编码的）我在很多地方看过这段代码，但我不知道它如何处理任何长度超过1字节的UTF-8代码点 return String.fromCharCode.apply(null, new Uint8Array(data)); 类似地，我找不到从字符串转换为UTF-8编码的ArrayBuffer的标准方法，如果您在浏览器中执行此操作，则

我有一个

ArrayBuffer

，其中包含一个使用UTF-8编码的字符串，我找不到一种标准方法将这种

ArrayBuffer

转换为JS

string

（我知道它是使用UTF-16编码的）

我在很多地方看过这段代码，但我不知道它如何处理任何长度超过1字节的UTF-8代码点

return String.fromCharCode.apply(null, new Uint8Array(data));

类似地，我找不到从

字符串

转换为UTF-8编码的

ArrayBuffer

的标准方法，如果您在浏览器中执行此操作，则没有内置字符编码库，但您可以通过以下方式：

功能板（n）{
返回n.长度<2？“0”+n:n；
}
var数组=新的UINT8数组（数据）；
var str=“”；
对于（变量i=0，len=array.length；i


下面是一个解码3字节UTF-8单元的演示：
警告：已从web标准中删除escape和unescape。
这应该可以：
// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt

/* utf.js - UTF-8 <=> UTF-16 convertion
 *
 * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>
 * Version: 1.0
 * LastModified: Dec 25 1999
 * This library is free.  You can redistribute it and/or modify it.
 */

function Utf8ArrayToStr(array) {
  var out, i, len, c;
  var char2, char3;

  out = "";
  len = array.length;
  i = 0;
  while (i < len) {
    c = array[i++];
    switch (c >> 4)
    { 
      case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
        // 0xxxxxxx
        out += String.fromCharCode(c);
        break;
      case 12: case 13:
        // 110x xxxx   10xx xxxx
        char2 = array[i++];
        out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
        break;
      case 14:
        // 1110 xxxx  10xx xxxx  10xx xxxx
        char2 = array[i++];
        char3 = array[i++];
        out += String.fromCharCode(((c & 0x0F) << 12) |
                                   ((char2 & 0x3F) << 6) |
                                   ((char3 & 0x3F) << 0));
        break;
    }
  }    
  return out;
}

//http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt
/*utf.js-utf-8 utf-16转换
*
*版权所有（C）1999 Masanao Izumo
*版本：1.0
*最后修改日期：1999年12月25日
*这个图书馆是免费的。您可以重新分发和/或修改它。
*/
函数Utf8ArrayToStr（数组）{
var out，i，len，c；
var char2，char3；
out=“”；
len=数组长度；
i=0；
而（我>4）
{ 
案例0:案例1:案例2:案例3:案例4:案例5:案例6:案例7:
//0xxxxxxx
out+=String.fromCharCode（c）；
打破
案例12：案例13：
//110x xxxx 10x xxxx
char2=数组[i++]；
out+=String.fromCharCode（（（c&0x1F）Github上有一个polyfill for over:。这对于节点或浏览器来说很容易，自述文件建议如下：
var uint8array = TextEncoder(encoding).encode(string);
var string = TextDecoder(encoding).decode(uint8array);

如果我记得的话，'utf-8'
是您需要的编码
，当然您需要包装您的缓冲区：
var uint8array = new Uint8Array(utf8buffer);

希望它对您的效果和对我的效果一样好。
使用和
程序员寻求从字节数组转换为字符串的主要问题是unicode字符的UTF-8编码（压缩）。此代码将帮助您：
var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

        if (codePoint === (codePoint & 0x7F)) {

        } else if (0xF0 === (codePoint & 0xF0)) {
            codePoint ^= 0xF0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xE0 === (codePoint & 0xE0)) {
            codePoint ^= 0xE0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xC0 === (codePoint & 0xC0)) {
            codePoint ^= 0xC0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        }

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}

var getString=function（strBytes）{
var MAX_SIZE=0x4000；
var codeUnits=[]；
替代变量；
替代变量；
var指数=-1；
var结果=“”；
而（++指数codePoint=（codePoint方法从对象中readAsArrayBuffer和readAsText将Blob对象转换为ArrayBuffer或DOMString异步
例如，可以从原始文本或字节数组创建Blob对象类型
let blob = new Blob([text], { type: "text/plain" });

let reader = new FileReader();
reader.onload = event =>
{
    let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);

我认为最好在承诺中包含这一点：
function textToByteArray(text)
{
    let blob = new Blob([text], { type: "text/plain" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(new Uint8Array(event.target.result));
    };
    reader.readAsArrayBuffer(blob);

    return { done: function(callback) { done = callback; } }
}

function byteArrayToText(bytes, encoding)
{
    let blob = new Blob([bytes], { type: "application/octet-stream" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(event.target.result);
    };

    if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }

    return { done: function(callback) { done = callback; } }
}

let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
    console.log(bytes);
    byteArrayToText(bytes, 'UTF-8').done(text => 
    {
        console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:



function utf8ArrayToString(aBytes) {
    var sView = "";
    
    for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
        nPart = aBytes[nIdx];
        
        sView += String.fromCharCode(
            nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
                /* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
                (nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
                (nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
                (nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
                (nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
                (nPart - 192 << 6) + aBytes[++nIdx] - 128
            : /* nPart < 127 ? */ /* one byte */
                nPart
        );
    }
    
    return sView;
}

let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);

// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);
函数text到字节数组（text）
{
设blob=newblob（[text]，{type:“text/plain”}）；
let reader=new FileReader（）；
let done=function（）{}；
reader.onload=事件=>
{
完成（新的Uint8Array（event.target.result））；
};
reader.readAsArrayBuffer（blob）；
返回{done:function（callback）{done=callback；}}
}
函数byteArrayToText（字节，编码）
{
设blob=newblob（[bytes]，{type:“application/octet stream”}）；
let reader=new FileReader（）；
let done=function（）{}；
reader.onload=事件=>
{
完成（事件、目标、结果）；
};
if（编码）{reader.readAsText（blob，编码）；}else{reader.readAsText（blob）；}
返回{done:function（callback）{done=callback；}}
}
let text=“\uD83D\uDCA9=\u2661”；
textToByteArray（text）.done（字节=>
{
console.log（字节）；
byteArrayToText（字节，'UTF-8'）。完成（文本=>
{
console.log（text）；//如果不想使用任何外部polyfill库，可以使用以下提供的函数：

函数utf8ArrayToString（aBytes）{
var sView=“”；
for（var nPart，nLen=aBytes.length，nIdx=0；nIdx251&&nPart<254&&nIdx+5/*（nPart-252这类问题的最新答案（使用现在的方法）如下：
哇，我从来没有看到UTF-8阵列缓冲与字符串对话！只是开玩笑：）@LightStyle谢谢，完全错过了那个拼写错误！：pvar uintArray=new Uint8Array（“String.split”（“”）.map（函数（char）{return char.charCodeAt（0）；}））
这就是你需要的，我可以在回答中解释，否则我只能保留注释；）您发布的一行代码将把0x00–0xFF范围内的字节解码为相应的Unicode代码点U+0000–U+00FF。换句话说，它不能表示整个Unicode范围内的任何地方。但是，恰好Unicode代码点U+0000–U+00FF与ISO 8859-1（拉丁语1）完全对应，所以您所写的实际上是一个ISO 8859-1解码器。LightStyle的oneliner是与问题中的解码器相对应的编码器。换句话说，它是一个ISO 8859-1编码器。atob/btoa
do base64编码/解码，如果您传递一个诚实的utf8字节数组，它将无法工作：它计划只与n编码字符串，否则它将无法工作，因为btoa
和atob
转换。我可能应该指定，但是ArrayBuffer
中的UTF-8字符串来自一个用不同编程语言编写的单独程序，该程序生成纯UTF-8字符串，正如Esailija所说，我不能将其用作它执行base64 encoding.Done。对于stringToUint函数也是如此，只需删除
var uint8array = new TextEncoder("utf-8").encode("Plain Text");
var string = new TextDecoder().decode(uint8array);
console.log(uint8array ,string )

var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

        if (codePoint === (codePoint & 0x7F)) {

        } else if (0xF0 === (codePoint & 0xF0)) {
            codePoint ^= 0xF0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xE0 === (codePoint & 0xE0)) {
            codePoint ^= 0xE0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xC0 === (codePoint & 0xC0)) {
            codePoint ^= 0xC0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        }

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}

let blob = new Blob([text], { type: "text/plain" });

let reader = new FileReader();
reader.onload = event =>
{
    let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);

function textToByteArray(text)
{
    let blob = new Blob([text], { type: "text/plain" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(new Uint8Array(event.target.result));
    };
    reader.readAsArrayBuffer(blob);

    return { done: function(callback) { done = callback; } }
}

function byteArrayToText(bytes, encoding)
{
    let blob = new Blob([bytes], { type: "application/octet-stream" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(event.target.result);
    };

    if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }

    return { done: function(callback) { done = callback; } }
}

let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
    console.log(bytes);
    byteArrayToText(bytes, 'UTF-8').done(text => 
    {
        console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:



function utf8ArrayToString(aBytes) {
    var sView = "";
    
    for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
        nPart = aBytes[nIdx];
        
        sView += String.fromCharCode(
            nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
                /* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
                (nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
                (nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
                (nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
                (nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
                (nPart - 192 << 6) + aBytes[++nIdx] - 128
            : /* nPart < 127 ? */ /* one byte */
                nPart
        );
    }
    
    return sView;
}

let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);

// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);