Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/string/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Javascript UTF-8 ArrayBuffer与字符串之间的转换_Javascript_String_Utf 8_Arraybuffer - Fatal编程技术网

Javascript UTF-8 ArrayBuffer与字符串之间的转换

Javascript UTF-8 ArrayBuffer与字符串之间的转换,javascript,string,utf-8,arraybuffer,Javascript,String,Utf 8,Arraybuffer,我有一个ArrayBuffer,其中包含一个使用UTF-8编码的字符串,我找不到一种标准方法将这种ArrayBuffer转换为JSstring(我知道它是使用UTF-16编码的) 我在很多地方看过这段代码,但我不知道它如何处理任何长度超过1字节的UTF-8代码点 return String.fromCharCode.apply(null, new Uint8Array(data)); 类似地,我找不到从字符串转换为UTF-8编码的ArrayBuffer的标准方法,如果您在浏览器中执行此操作,则

我有一个
ArrayBuffer
,其中包含一个使用UTF-8编码的字符串,我找不到一种标准方法将这种
ArrayBuffer
转换为JS
string
(我知道它是使用UTF-16编码的)

我在很多地方看过这段代码,但我不知道它如何处理任何长度超过1字节的UTF-8代码点

return String.fromCharCode.apply(null, new Uint8Array(data));

类似地,我找不到从
字符串
转换为UTF-8编码的
ArrayBuffer
的标准方法,如果您在浏览器中执行此操作,则没有内置字符编码库,但您可以 通过以下方式:

功能板(n){
返回n.长度<2?“0”+n:n;
}
var数组=新的UINT8数组(数据);
var str=“”;
对于(变量i=0,len=array.length;i
下面是一个解码3字节UTF-8单元的演示:

警告:已从web标准中删除escape和unescape。

这应该可以:

// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt

/* utf.js - UTF-8 <=> UTF-16 convertion
 *
 * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>
 * Version: 1.0
 * LastModified: Dec 25 1999
 * This library is free.  You can redistribute it and/or modify it.
 */

function Utf8ArrayToStr(array) {
  var out, i, len, c;
  var char2, char3;

  out = "";
  len = array.length;
  i = 0;
  while (i < len) {
    c = array[i++];
    switch (c >> 4)
    { 
      case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
        // 0xxxxxxx
        out += String.fromCharCode(c);
        break;
      case 12: case 13:
        // 110x xxxx   10xx xxxx
        char2 = array[i++];
        out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
        break;
      case 14:
        // 1110 xxxx  10xx xxxx  10xx xxxx
        char2 = array[i++];
        char3 = array[i++];
        out += String.fromCharCode(((c & 0x0F) << 12) |
                                   ((char2 & 0x3F) << 6) |
                                   ((char3 & 0x3F) << 0));
        break;
    }
  }    
  return out;
}
//http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt
/*utf.js-utf-8 utf-16转换
*
*版权所有(C)1999 Masanao Izumo
*版本:1.0
*最后修改日期:1999年12月25日
*这个图书馆是免费的。您可以重新分发和/或修改它。
*/
函数Utf8ArrayToStr(数组){
var out,i,len,c;
var char2,char3;
out=“”;
len=数组长度;
i=0;
而(我>4)
{ 
案例0:案例1:案例2:案例3:案例4:案例5:案例6:案例7:
//0xxxxxxx
out+=String.fromCharCode(c);
打破
案例12:案例13:
//110x xxxx 10x xxxx
char2=数组[i++];
out+=String.fromCharCode(((c&0x1F)Github上有一个polyfill for over:。这对于节点或浏览器来说很容易,自述文件建议如下:

var uint8array = TextEncoder(encoding).encode(string);
var string = TextDecoder(encoding).decode(uint8array);
如果我记得的话,
'utf-8'
是您需要的
编码
,当然您需要包装您的缓冲区:

var uint8array = new Uint8Array(utf8buffer);
希望它对您的效果和对我的效果一样好。

使用和


程序员寻求从字节数组转换为字符串的主要问题是unicode字符的UTF-8编码(压缩)。此代码将帮助您:

var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

        if (codePoint === (codePoint & 0x7F)) {

        } else if (0xF0 === (codePoint & 0xF0)) {
            codePoint ^= 0xF0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xE0 === (codePoint & 0xE0)) {
            codePoint ^= 0xE0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xC0 === (codePoint & 0xC0)) {
            codePoint ^= 0xC0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        }

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}
var getString=function(strBytes){
var MAX_SIZE=0x4000;
var codeUnits=[];
替代变量;
替代变量;
var指数=-1;
var结果=“”;
而(++指数codePoint=(codePoint方法从对象中readAsArrayBufferreadAsText将Blob对象转换为ArrayBuffer或DOMString异步

例如,可以从原始文本或字节数组创建Blob对象类型

let blob = new Blob([text], { type: "text/plain" });

let reader = new FileReader();
reader.onload = event =>
{
    let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);
我认为最好在承诺中包含这一点:

function textToByteArray(text)
{
    let blob = new Blob([text], { type: "text/plain" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(new Uint8Array(event.target.result));
    };
    reader.readAsArrayBuffer(blob);

    return { done: function(callback) { done = callback; } }
}

function byteArrayToText(bytes, encoding)
{
    let blob = new Blob([bytes], { type: "application/octet-stream" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(event.target.result);
    };

    if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }

    return { done: function(callback) { done = callback; } }
}

let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
    console.log(bytes);
    byteArrayToText(bytes, 'UTF-8').done(text => 
    {
        console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:

function utf8ArrayToString(aBytes) {
    var sView = "";
    
    for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
        nPart = aBytes[nIdx];
        
        sView += String.fromCharCode(
            nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
                /* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
                (nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
                (nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
                (nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
                (nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
                (nPart - 192 << 6) + aBytes[++nIdx] - 128
            : /* nPart < 127 ? */ /* one byte */
                nPart
        );
    }
    
    return sView;
}

let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);

// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);
函数text到字节数组(text)
{
设blob=newblob([text],{type:“text/plain”});
let reader=new FileReader();
let done=function(){};
reader.onload=事件=>
{
完成(新的Uint8Array(event.target.result));
};
reader.readAsArrayBuffer(blob);
返回{done:function(callback){done=callback;}}
}
函数byteArrayToText(字节,编码)
{
设blob=newblob([bytes],{type:“application/octet stream”});
let reader=new FileReader();
let done=function(){};
reader.onload=事件=>
{
完成(事件、目标、结果);
};
if(编码){reader.readAsText(blob,编码);}else{reader.readAsText(blob);}
返回{done:function(callback){done=callback;}}
}
let text=“\uD83D\uDCA9=\u2661”;
textToByteArray(text).done(字节=>
{
console.log(字节);
byteArrayToText(字节,'UTF-8')。完成(文本=>
{

console.log(text);//如果不想使用任何外部polyfill库,可以使用以下提供的函数:

函数utf8ArrayToString(aBytes){
var sView=“”;
for(var nPart,nLen=aBytes.length,nIdx=0;nIdx251&&nPart<254&&nIdx+5/*(nPart-252这类问题的最新答案(使用现在的方法)如下:

哇,我从来没有看到UTF-8阵列缓冲与字符串对话!只是开玩笑:)@LightStyle谢谢,完全错过了那个拼写错误!:p
var uintArray=new Uint8Array(“String.split”(“”).map(函数(char){return char.charCodeAt(0);}))
这就是你需要的,我可以在回答中解释,否则我只能保留注释;)您发布的一行代码将把0x00–0xFF范围内的字节解码为相应的Unicode代码点U+0000–U+00FF。换句话说,它不能表示整个Unicode范围内的任何地方。但是,恰好Unicode代码点U+0000–U+00FF与ISO 8859-1(拉丁语1)完全对应,所以您所写的实际上是一个ISO 8859-1解码器。LightStyle的oneliner是与问题中的解码器相对应的编码器。换句话说,它是一个ISO 8859-1编码器。
atob/btoa
do base64编码/解码,如果您传递一个诚实的utf8字节数组,它将无法工作:它计划只与n编码字符串,否则它将无法工作,因为
btoa
atob
转换。我可能应该指定,但是
ArrayBuffer
中的UTF-8字符串来自一个用不同编程语言编写的单独程序,该程序生成纯UTF-8字符串,正如Esailija所说,我不能将其用作它执行base64 encoding.Done。对于
stringToUint
函数也是如此,只需删除
var uint8array = new TextEncoder("utf-8").encode("Plain Text");
var string = new TextDecoder().decode(uint8array);
console.log(uint8array ,string )
var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

        if (codePoint === (codePoint & 0x7F)) {

        } else if (0xF0 === (codePoint & 0xF0)) {
            codePoint ^= 0xF0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xE0 === (codePoint & 0xE0)) {
            codePoint ^= 0xE0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        } else if (0xC0 === (codePoint & 0xC0)) {
            codePoint ^= 0xC0;
            codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        }

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}
let blob = new Blob([text], { type: "text/plain" });

let reader = new FileReader();
reader.onload = event =>
{
    let buffer = event.target.result;
};
reader.readAsArrayBuffer(blob);
function textToByteArray(text)
{
    let blob = new Blob([text], { type: "text/plain" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(new Uint8Array(event.target.result));
    };
    reader.readAsArrayBuffer(blob);

    return { done: function(callback) { done = callback; } }
}

function byteArrayToText(bytes, encoding)
{
    let blob = new Blob([bytes], { type: "application/octet-stream" });
    let reader = new FileReader();
    let done = function() { };

    reader.onload = event =>
    {
        done(event.target.result);
    };

    if(encoding) { reader.readAsText(blob, encoding); } else { reader.readAsText(blob); }

    return { done: function(callback) { done = callback; } }
}

let text = "\uD83D\uDCA9 = \u2661";
textToByteArray(text).done(bytes =>
{
    console.log(bytes);
    byteArrayToText(bytes, 'UTF-8').done(text => 
    {
        console.log(text); // If you don't want to use any external polyfill library, you can use this function provided by the Mozilla Developer Network website:

function utf8ArrayToString(aBytes) {
    var sView = "";
    
    for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
        nPart = aBytes[nIdx];
        
        sView += String.fromCharCode(
            nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
                /* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
                (nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
                (nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
                (nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
                (nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
            : nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
                (nPart - 192 << 6) + aBytes[++nIdx] - 128
            : /* nPart < 127 ? */ /* one byte */
                nPart
        );
    }
    
    return sView;
}

let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);

// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);