Delphi writeln的明显副作用(“;:width”;说明符导致输出中出现问号)
我有以下代码(RAD Studio XE2、Windows 7 x64): 当Delphi writeln的明显副作用(“;:width”;说明符导致输出中出现问号),delphi,io,console-application,delphi-xe2,side-effects,Delphi,Io,Console Application,Delphi Xe2,Side Effects,我有以下代码(RAD Studio XE2、Windows 7 x64): 当{$DEFINE BOO}指令被关闭时,我有以下(预期)输出(为了可读性,空格被点替换): 当指令打开时,我有以下(意外)输出: 而非预期 // empty line here ..a 当我将const-ENGLISH\u-ALPHABET更改为const-ENGLISH\u-ALPHABET:AnsiString时,预期输出将毫无疑问地打印出来。当:3格式被删除或更改为:1时,没有问号。当输出被重定向到文件时(通过
{$DEFINE BOO}
指令被关闭时,我有以下(预期)输出(为了可读性,空格被点替换):
当指令打开时,我有以下(意外)输出:
而非预期
// empty line here
..a
当我将const-ENGLISH\u-ALPHABET
更改为const-ENGLISH\u-ALPHABET:AnsiString
时,预期输出将毫无疑问地打印出来。当:3
格式被删除或更改为:1
时,没有问号。当输出被重定向到文件时(通过AssignFile(输出,'boo.log')
或从命令行),不再有问号
这种行为的正确解释是什么?这是RTL中一个相当奇怪的错误。对
write
的调用解析为对\u WriteWChar
的调用。此功能的实现方式如下:
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
if width <= 1 then
result := _Write0WChar(t, c)
else
begin
if t.UTF16Buffer[0] <> #0 then
begin
_Write0WChar(t, '?');
t.UTF16Buffer[0] := #0;
end;
_WriteSpaces(t, width - 1);
Result := _Write0WChar(t, c);
end;
end;
type
TTextRec = packed record
....
MBCSLength: ShortInt;
MBCSBufPos: Byte;
case Integer of
0: (MBCSBuffer: array[0..5] of _AnsiChr);
1: (UTF16Buffer: array[0..2] of WideChar);
end;
因此,您的第一个调用writeln
,解决方法如下:
function _WriteLn(var t: TTextRec): Pointer;
begin
if (t.Flags and tfCRLF) <> 0 then
_Write0Char(t, _AnsiChr(cCR));
Result := _Write0Char(t, _AnsiChr(cLF));
_Flush(t);
end;
但是当执行\u WriteWChar
时,它会不分青红皂白地查看t.UTF16Buffer
。在TTextRec
中声明如下:
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
if width <= 1 then
result := _Write0WChar(t, c)
else
begin
if t.UTF16Buffer[0] <> #0 then
begin
_Write0WChar(t, '?');
t.UTF16Buffer[0] := #0;
end;
_WriteSpaces(t, width - 1);
Result := _Write0WChar(t, c);
end;
end;
type
TTextRec = packed record
....
MBCSLength: ShortInt;
MBCSBufPos: Byte;
case Integer of
0: (MBCSBuffer: array[0..5] of _AnsiChr);
1: (UTF16Buffer: array[0..2] of WideChar);
end;
因此,MBCSBuffer
和UTF16Buffer
共享相同的存储
错误在于\u WriteWChar
不应在未首先检查缓冲区长度的情况下查看t.UTF16Buffer
的内容。由于TTextRec
没有UTF16Length
而无法立即实现的事情。相反,如果t.UTF16Buffer
包含有意义的内容,则约定其长度由-t.MBCSLength
给出
因此,\u WriteWChar
可能应该是:
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
if width <= 1 then
result := _Write0WChar(t, c)
else
begin
if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
begin
_Write0WChar(t, '?');
t.UTF16Buffer[0] := #0;
end;
_WriteSpaces(t, width - 1);
Result := _Write0WChar(t, c);
end;
end;
我提交了。在XE4中也是一样,首先在填充缓冲区中填充一个[LF]字符,并将其转换为
?
。Strange.WriteLn不知道Unicode,我觉得这就是问题所在。字符串常量是默认的Unicode字符串类型,对WriteLn的调用无法正确解释它。您可以通过将常量的定义更改为ENGLISH_ALPHABET:AnsiString='abcdefghijklmnopqrstuvwxyz'来测试这一点代码>。如果我的怀疑是对的,那么问题应该会消失。@KenWhite:是的,当我将字符串类型更改为AnsiString
时,没有问号(顺便说一句,这篇文章中提到过)。谢谢但仍然无法理解“记忆效应”。@KenWhite我认为Write
和Writeln
确实可以识别Unicode。它们可以接受UTF-16缓冲区。实现中只有一个bug。解决方法:Write(sLineBreak)代码>而不是Writeln
。感谢您的分析!我不知道如何制定质量控制报告-你能提交它吗?你不需要字符串常量,只要aChar='a'代码>只是要发布相同的答案!任何宽度说明符>1都会产生错误。@当然,我会提交报告。@LURD谢谢。我会确保在报告中提交一个真正最小的SSCCE。知道XE7中是否修复了这个问题吗?顺便说一句,这个补丁效果很好,谢谢@DavidHeffernan。
t.MBCSLength := 0;
t.MBCSBufPos := 0;
type
TTextRec = packed record
....
MBCSLength: ShortInt;
MBCSBufPos: Byte;
case Integer of
0: (MBCSBuffer: array[0..5] of _AnsiChr);
1: (UTF16Buffer: array[0..2] of WideChar);
end;
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
if width <= 1 then
result := _Write0WChar(t, c)
else
begin
if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
begin
_Write0WChar(t, '?');
t.UTF16Buffer[0] := #0;
end;
_WriteSpaces(t, width - 1);
Result := _Write0WChar(t, c);
end;
end;
{$APPTYPE CONSOLE}
uses
Windows;
procedure PatchCode(Address: Pointer; const NewCode; Size: Integer);
var
OldProtect: DWORD;
begin
if VirtualProtect(Address, Size, PAGE_EXECUTE_READWRITE, OldProtect) then
begin
Move(NewCode, Address^, Size);
FlushInstructionCache(GetCurrentProcess, Address, Size);
VirtualProtect(Address, Size, OldProtect, @OldProtect);
end;
end;
type
PInstruction = ^TInstruction;
TInstruction = packed record
Opcode: Byte;
Offset: Integer;
end;
procedure RedirectProcedure(OldAddress, NewAddress: Pointer);
var
NewCode: TInstruction;
begin
NewCode.Opcode := $E9;//jump relative
NewCode.Offset := NativeInt(NewAddress)-NativeInt(OldAddress)-SizeOf(NewCode);
PatchCode(OldAddress, NewCode, SizeOf(NewCode));
end;
var
_Write0WChar: function(var t: TTextRec; c: WideChar): Pointer;
function _Write0WCharAddress: Pointer;
asm
MOV EAX, offset System.@Write0WChar
end;
function _WriteWCharAddress: Pointer;
asm
MOV EAX, offset System.@WriteWChar
end;
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
var
i: Integer;
begin
if width <= 1 then
result := _Write0WChar(t, c)
else
begin
if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
begin
_Write0WChar(t, '?');
t.UTF16Buffer[0] := #0;
end;
for i := 1 to width - 1 do
_Write0WChar(t, ' ');
Result := _Write0WChar(t, c);
end;
end;
const
ENGLISH_ALPHABET = 'abcdefghijklmnopqrstuvwxyz';
begin
@_Write0WChar := _Write0WCharAddress;
RedirectProcedure(_WriteWCharAddress, @_WriteWChar);
writeln;
write(ENGLISH_ALPHABET[1]:3);
end.