Firefox addon readString正在抛出类型错误:格式错误的UTF-8

Firefox addon readString正在抛出类型错误:格式错误的UTF-8,firefox-addon,jsctypes,Firefox Addon,Jsctypes,此函数返回一个ctypes.unsigned\u char.array(),我确实读取了其中的字符串。它正在获得windows的标题。问题是有时它会抛出类型错误 try { console.error('straight readString on XWindowGetProperty data:', rez_GP.data.readString()); } catch (ex) { console.error('ex on straight readString:', ex);

此函数返回一个
ctypes.unsigned\u char.array()
,我确实读取了其中的字符串。它正在获得windows的标题。问题是有时它会抛出类型错误

try {
    console.error('straight readString on XWindowGetProperty data:', rez_GP.data.readString());
} catch (ex) {
    console.error('ex on straight readString:', ex);
}
请注意
rez_GP.data.readString()

例如,此实例:
TypeError:偏移量48处格式错误的UTF-8字符序列
。在这种情况下,窗口标题是
在基本模板处编辑js macosx/bootstrap.js·Noitidart/js macosx-Mozilla Firefox
第48个偏移量是您看到的点字符,它的字符代码是
183
。如何在此缓冲区上执行
readString()
,而不出现此错误


谢谢

readString
需要utf-8编码字符串。这对于
\u NET\u WM\u NAME
返回的字符串是正确的,但对于
WM\u NAME
我找到了一种方法,即使不是utf-8也能正确读取字符串,但我不确定这是最好的方法还是推荐的方法。不过,这是可行的,我必须将其转换为
unsigned_char
(必须是这个,所以不是
char
jschar
),然后从charcode执行

function readAsChar8ThenAsChar16(stringPtr, known_len, jschar) {
    // when reading as jschar it assumes max length of 500

    // stringPtr is either char or jschar, if you know its jschar for sure, pass 2nd arg as true
    // if known_len is passed, then assumption is not made, at the known_len position in array we will see a null char
    // i tried getting known_len from stringPtr but its not possible, it has be known, i tried this:
        //"stringPtr.contents.toString()" "95"
        //"stringPtr.toString()" "ctypes.unsigned_char.ptr(ctypes.UInt64("0x7f73d5c87650"))"
        // so as we see neither of these is 77, this is for the example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox"

    // tries to do read string on stringPtr, if it fails then it falls to read as jschar

    var readJSCharString = function() {
        var assumption_max_len = known_len ? known_len : 500;
        var ptrAsArr = ctypes.cast(stringPtr, ctypes.unsigned_char.array(assumption_max_len).ptr).contents; // MUST cast to unsigned char (not ctypes.jschar, or ctypes.char) as otherwise i dont get foreign characters, as they are got as negative values, and i should read till i find a 0 which is null terminator which will have unsigned_char code of 0 // can test this by reading a string like this: "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" at js array position 36 (so 37 if count from 1), we see 183, and at 77 we see char code of 0 IF casted to unsigned_char, if casted to char we see -73 at pos 36 but pos 77 still 0, if casted to jschar we see chineese characters in all spots expect spaces even null terminator is a chineese character
        console.info('ptrAsArr.length:', ptrAsArr.length);
        //console.log('debug-msg :: dataCasted:', dataCasted, uneval(dataCasted), dataCasted.toString());
        var charCode = [];
        var fromCharCode = []
        for (var i=0; i<ptrAsArr.length; i++) { //if known_len is correct, then will not hit null terminator so like in example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" if you pass length of 77, then null term will not get hit by this loop as null term is at pos 77 and we go till `< known_len`
            var thisUnsignedCharCode = ptrAsArr.addressOfElement(i).contents;
            if (thisUnsignedCharCode == 0) {
                // reached null terminator, break
                console.log('reached null terminator, at pos: ', i);
                break;
            }
            charCode.push(thisUnsignedCharCode);
            fromCharCode.push(String.fromCharCode(thisUnsignedCharCode));
        }
        console.info('charCode:', charCode);
        console.info('fromCharCode:', fromCharCode);
        var char16_val = fromCharCode.join('');
        console.info('char16_val:', char16_val);
        return char16_val;
    }

    if (!jschar) {
        try {
            var char8_val = stringPtr.readString();
            console.info('stringPtr.readString():', char8_val);
            return char8_val;
        } catch (ex if ex.message.indexOf('malformed UTF-8 character sequence at offset ') == 0) {
            console.warn('ex of offset utf8 read error when trying to do readString so using alternative method, ex:', ex);
            return readJSCharString();
        }
    } else {
        return readJSCharString();
    }
}
函数readAsChar8ThenAsChar16(stringPtr,已知长度,jschar){
//当读取为jschar时,它假定最大长度为500
//stringPtr是char或jschar,如果您确实知道它的jschar,请将第二个参数传递为true
//如果传递了known_len,则不进行假设,在数组中的known_len位置,我们将看到一个空字符
//我试着从stringPtr中了解\u len,但这是不可能的,已经知道了,我尝试了以下方法:
//“stringPtr.contents.toString()”“95”
//“stringPtr.toString()”“ctypes.unsigned_char.ptr(ctypes.UInt64(“0x7f73d5c87650”)”
//正如我们看到的,这两个都不是77,这是“master·Noitidart/_scratchpad-Mozilla Firefox上的_scratchpad/entehold.js”的示例
//尝试在stringPtr上读取字符串,如果失败,则将作为jschar读取
var readJSCharString=函数(){
var假设最大值=已知值?已知值:500;
var ptrAsArr=ctypes.cast(stringPtr,ctypes.unsigned_char.array(假设为_max_len).ptr).contents;//必须强制转换为unsigned char(不是ctypes.jschar或ctypes.char)否则我不会得到外来字符,因为它们是负值,我应该一直读到找到一个0,它是空终止符,它的无符号字符代码为0//可以通过读取如下字符串来测试:“\u scratchpad/entehold.js at master·Noitidart/\u scratchpad-Mozilla Firefox”在js数组位置36处(因此,如果从1开始计数为37),我们看到183,在77处,如果转换为无符号字符,我们看到字符代码为0,如果转换为字符,我们看到-73,位置36,但位置77仍然为0,如果转换为jschar,我们在除空格外的所有位置都看到中文字符,即使空终止符是中文字符
console.info('ptrAsArr.length:',ptrAsArr.length);
//log('debug-msg::dataCasted:',dataCasted,uneval(dataCasted),dataCasted.toString());
var charCode=[];
var fromCharCode=[]

对于(var i=0;我是否使用XA_WM_NAME atom?我使用了
WS_NAME
atom。它也被
XGetWMName
便利函数使用,我在
XGetWMName
中也遇到了readString问题。非常感谢您的见解,我不知道readString。那么对于非utf-8编码的,我该怎么做呢se?感谢您对这些原子的额外了解:)使用
\u NET\u WM\u NAME
atom。谢谢@paa,但这并不是特别的,我只是用它作为一个例子。但是
\u NET\u WM\u NAME
在很多窗口中返回空白,而
WM\u NAME
则不返回空白,因此我必须使用
XGetWMName
的便利功能来处理这种情况。关于mdn的一条说明是这样的注意:源C字符串假定为UTF-8,并假定以null结尾。如果您需要转换不符合这些要求的字符串,则需要自己转换。“说我必须自己做。你知道这样做的方法吗?嘿@paa我在这里发布了另一个解决方案,我想出了一个阅读它的方法,你能评论一下吗,比如这是一个好方法,或者它可能有一些问题。