Firefox addon readString正在抛出类型错误:格式错误的UTF-8
此函数返回一个Firefox addon readString正在抛出类型错误:格式错误的UTF-8,firefox-addon,jsctypes,Firefox Addon,Jsctypes,此函数返回一个ctypes.unsigned\u char.array(),我确实读取了其中的字符串。它正在获得windows的标题。问题是有时它会抛出类型错误 try { console.error('straight readString on XWindowGetProperty data:', rez_GP.data.readString()); } catch (ex) { console.error('ex on straight readString:', ex);
ctypes.unsigned\u char.array()
,我确实读取了其中的字符串。它正在获得windows的标题。问题是有时它会抛出类型错误
try {
console.error('straight readString on XWindowGetProperty data:', rez_GP.data.readString());
} catch (ex) {
console.error('ex on straight readString:', ex);
}
请注意rez_GP.data.readString()
例如,此实例:TypeError:偏移量48处格式错误的UTF-8字符序列
。在这种情况下,窗口标题是在基本模板处编辑js macosx/bootstrap.js·Noitidart/js macosx-Mozilla Firefox
第48个偏移量是您看到的点字符,它的字符代码是183
。如何在此缓冲区上执行readString()
,而不出现此错误
谢谢
readString
需要utf-8编码字符串。这对于\u NET\u WM\u NAME
返回的字符串是正确的,但对于WM\u NAME
我找到了一种方法,即使不是utf-8也能正确读取字符串,但我不确定这是最好的方法还是推荐的方法。不过,这是可行的,我必须将其转换为unsigned_char
(必须是这个,所以不是char
或jschar
),然后从charcode执行:
function readAsChar8ThenAsChar16(stringPtr, known_len, jschar) {
// when reading as jschar it assumes max length of 500
// stringPtr is either char or jschar, if you know its jschar for sure, pass 2nd arg as true
// if known_len is passed, then assumption is not made, at the known_len position in array we will see a null char
// i tried getting known_len from stringPtr but its not possible, it has be known, i tried this:
//"stringPtr.contents.toString()" "95"
//"stringPtr.toString()" "ctypes.unsigned_char.ptr(ctypes.UInt64("0x7f73d5c87650"))"
// so as we see neither of these is 77, this is for the example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox"
// tries to do read string on stringPtr, if it fails then it falls to read as jschar
var readJSCharString = function() {
var assumption_max_len = known_len ? known_len : 500;
var ptrAsArr = ctypes.cast(stringPtr, ctypes.unsigned_char.array(assumption_max_len).ptr).contents; // MUST cast to unsigned char (not ctypes.jschar, or ctypes.char) as otherwise i dont get foreign characters, as they are got as negative values, and i should read till i find a 0 which is null terminator which will have unsigned_char code of 0 // can test this by reading a string like this: "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" at js array position 36 (so 37 if count from 1), we see 183, and at 77 we see char code of 0 IF casted to unsigned_char, if casted to char we see -73 at pos 36 but pos 77 still 0, if casted to jschar we see chineese characters in all spots expect spaces even null terminator is a chineese character
console.info('ptrAsArr.length:', ptrAsArr.length);
//console.log('debug-msg :: dataCasted:', dataCasted, uneval(dataCasted), dataCasted.toString());
var charCode = [];
var fromCharCode = []
for (var i=0; i<ptrAsArr.length; i++) { //if known_len is correct, then will not hit null terminator so like in example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" if you pass length of 77, then null term will not get hit by this loop as null term is at pos 77 and we go till `< known_len`
var thisUnsignedCharCode = ptrAsArr.addressOfElement(i).contents;
if (thisUnsignedCharCode == 0) {
// reached null terminator, break
console.log('reached null terminator, at pos: ', i);
break;
}
charCode.push(thisUnsignedCharCode);
fromCharCode.push(String.fromCharCode(thisUnsignedCharCode));
}
console.info('charCode:', charCode);
console.info('fromCharCode:', fromCharCode);
var char16_val = fromCharCode.join('');
console.info('char16_val:', char16_val);
return char16_val;
}
if (!jschar) {
try {
var char8_val = stringPtr.readString();
console.info('stringPtr.readString():', char8_val);
return char8_val;
} catch (ex if ex.message.indexOf('malformed UTF-8 character sequence at offset ') == 0) {
console.warn('ex of offset utf8 read error when trying to do readString so using alternative method, ex:', ex);
return readJSCharString();
}
} else {
return readJSCharString();
}
}
函数readAsChar8ThenAsChar16(stringPtr,已知长度,jschar){
//当读取为jschar时,它假定最大长度为500
//stringPtr是char或jschar,如果您确实知道它的jschar,请将第二个参数传递为true
//如果传递了known_len,则不进行假设,在数组中的known_len位置,我们将看到一个空字符
//我试着从stringPtr中了解\u len,但这是不可能的,已经知道了,我尝试了以下方法:
//“stringPtr.contents.toString()”“95”
//“stringPtr.toString()”“ctypes.unsigned_char.ptr(ctypes.UInt64(“0x7f73d5c87650”)”
//正如我们看到的,这两个都不是77,这是“master·Noitidart/_scratchpad-Mozilla Firefox上的_scratchpad/entehold.js”的示例
//尝试在stringPtr上读取字符串,如果失败,则将作为jschar读取
var readJSCharString=函数(){
var假设最大值=已知值?已知值:500;
var ptrAsArr=ctypes.cast(stringPtr,ctypes.unsigned_char.array(假设为_max_len).ptr).contents;//必须强制转换为unsigned char(不是ctypes.jschar或ctypes.char)否则我不会得到外来字符,因为它们是负值,我应该一直读到找到一个0,它是空终止符,它的无符号字符代码为0//可以通过读取如下字符串来测试:“\u scratchpad/entehold.js at master·Noitidart/\u scratchpad-Mozilla Firefox”在js数组位置36处(因此,如果从1开始计数为37),我们看到183,在77处,如果转换为无符号字符,我们看到字符代码为0,如果转换为字符,我们看到-73,位置36,但位置77仍然为0,如果转换为jschar,我们在除空格外的所有位置都看到中文字符,即使空终止符是中文字符
console.info('ptrAsArr.length:',ptrAsArr.length);
//log('debug-msg::dataCasted:',dataCasted,uneval(dataCasted),dataCasted.toString());
var charCode=[];
var fromCharCode=[]
对于(var i=0;我是否使用XA_WM_NAME atom?我使用了WS_NAME
atom。它也被XGetWMName
便利函数使用,我在XGetWMName
中也遇到了readString问题。非常感谢您的见解,我不知道readString。那么对于非utf-8编码的,我该怎么做呢se?感谢您对这些原子的额外了解:)使用\u NET\u WM\u NAME
atom。谢谢@paa,但这并不是特别的,我只是用它作为一个例子。但是\u NET\u WM\u NAME
在很多窗口中返回空白,而WM\u NAME
则不返回空白,因此我必须使用XGetWMName
的便利功能来处理这种情况。关于mdn的一条说明是这样的注意:源C字符串假定为UTF-8,并假定以null结尾。如果您需要转换不符合这些要求的字符串,则需要自己转换。“说我必须自己做。你知道这样做的方法吗?嘿@paa我在这里发布了另一个解决方案,我想出了一个阅读它的方法,你能评论一下吗,比如这是一个好方法,或者它可能有一些问题。