Delphi writeln的明显副作用(“;:width”;说明符导致输出中出现问号)

Delphi writeln的明显副作用(“;:width”;说明符导致输出中出现问号),delphi,io,console-application,delphi-xe2,side-effects,Delphi,Io,Console Application,Delphi Xe2,Side Effects,我有以下代码(RAD Studio XE2、Windows 7 x64): 当{$DEFINE BOO}指令被关闭时,我有以下(预期)输出(为了可读性,空格被点替换): 当指令打开时,我有以下(意外)输出: 而非预期 // empty line here ..a 当我将const-ENGLISH\u-ALPHABET更改为const-ENGLISH\u-ALPHABET:AnsiString时,预期输出将毫无疑问地打印出来。当:3格式被删除或更改为:1时,没有问号。当输出被重定向到文件时(通过

我有以下代码(RAD Studio XE2、Windows 7 x64):

{$DEFINE BOO}
指令被关闭时,我有以下(预期)输出(为了可读性,空格被点替换):

当指令打开时,我有以下(意外)输出:

而非预期

// empty line here
..a
当我将
const-ENGLISH\u-ALPHABET
更改为
const-ENGLISH\u-ALPHABET:AnsiString
时,预期输出将毫无疑问地打印出来。当
:3
格式被删除或更改为
:1
时,没有问号。当输出被重定向到文件时(通过
AssignFile(输出,'boo.log')
或从命令行),不再有问号


这种行为的正确解释是什么?

这是RTL中一个相当奇怪的错误。对
write
的调用解析为对
\u WriteWChar
的调用。此功能的实现方式如下:

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if t.UTF16Buffer[0] <> #0 then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;
type
  TTextRec = packed record 
    ....
    MBCSLength: ShortInt;
    MBCSBufPos: Byte;
    case Integer of
      0: (MBCSBuffer: array[0..5] of _AnsiChr);
      1: (UTF16Buffer: array[0..2] of WideChar);
  end;
因此,您的第一个调用
writeln
,解决方法如下:

function _WriteLn(var t: TTextRec): Pointer;
begin
  if (t.Flags and tfCRLF) <> 0 then
    _Write0Char(t, _AnsiChr(cCR));
  Result := _Write0Char(t, _AnsiChr(cLF));
  _Flush(t);
end;
但是当执行
\u WriteWChar
时,它会不分青红皂白地查看
t.UTF16Buffer
。在
TTextRec
中声明如下:

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if t.UTF16Buffer[0] <> #0 then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;
type
  TTextRec = packed record 
    ....
    MBCSLength: ShortInt;
    MBCSBufPos: Byte;
    case Integer of
      0: (MBCSBuffer: array[0..5] of _AnsiChr);
      1: (UTF16Buffer: array[0..2] of WideChar);
  end;
因此,
MBCSBuffer
UTF16Buffer
共享相同的存储

错误在于
\u WriteWChar
不应在未首先检查缓冲区长度的情况下查看
t.UTF16Buffer
的内容。由于
TTextRec
没有
UTF16Length
而无法立即实现的事情。相反,如果
t.UTF16Buffer
包含有意义的内容,则约定其长度由
-t.MBCSLength
给出

因此,
\u WriteWChar
可能应该是:

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;

我提交了。

在XE4中也是一样,首先在填充缓冲区中填充一个[LF]字符,并将其转换为
。Strange.WriteLn不知道Unicode,我觉得这就是问题所在。字符串常量是默认的Unicode字符串类型,对WriteLn的调用无法正确解释它。您可以通过将常量的定义更改为
ENGLISH_ALPHABET:AnsiString='abcdefghijklmnopqrstuvwxyz'来测试这一点。如果我的怀疑是对的,那么问题应该会消失。@KenWhite:是的,当我将字符串类型更改为
AnsiString
时,没有问号(顺便说一句,这篇文章中提到过)。谢谢但仍然无法理解“记忆效应”。@KenWhite我认为
Write
Writeln
确实可以识别Unicode。它们可以接受UTF-16缓冲区。实现中只有一个bug。解决方法:
Write(sLineBreak)而不是
Writeln
。感谢您的分析!我不知道如何制定质量控制报告-你能提交它吗?你不需要字符串常量,只要
aChar='a'只是要发布相同的答案!任何宽度说明符>1都会产生错误。@当然,我会提交报告。@LURD谢谢。我会确保在报告中提交一个真正最小的SSCCE。知道XE7中是否修复了这个问题吗?顺便说一句,这个补丁效果很好,谢谢@DavidHeffernan。
t.MBCSLength := 0;
t.MBCSBufPos := 0;
type
  TTextRec = packed record 
    ....
    MBCSLength: ShortInt;
    MBCSBufPos: Byte;
    case Integer of
      0: (MBCSBuffer: array[0..5] of _AnsiChr);
      1: (UTF16Buffer: array[0..2] of WideChar);
  end;
function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;
{$APPTYPE CONSOLE}

uses
  Windows;

procedure PatchCode(Address: Pointer; const NewCode; Size: Integer);
var
  OldProtect: DWORD;
begin
  if VirtualProtect(Address, Size, PAGE_EXECUTE_READWRITE, OldProtect) then
  begin
    Move(NewCode, Address^, Size);
    FlushInstructionCache(GetCurrentProcess, Address, Size);
    VirtualProtect(Address, Size, OldProtect, @OldProtect);
  end;
end;

type
  PInstruction = ^TInstruction;
  TInstruction = packed record
    Opcode: Byte;
    Offset: Integer;
  end;

procedure RedirectProcedure(OldAddress, NewAddress: Pointer);
var
  NewCode: TInstruction;
begin
  NewCode.Opcode := $E9;//jump relative
  NewCode.Offset := NativeInt(NewAddress)-NativeInt(OldAddress)-SizeOf(NewCode);
  PatchCode(OldAddress, NewCode, SizeOf(NewCode));
end;

var
  _Write0WChar: function(var t: TTextRec; c: WideChar): Pointer;

function _Write0WCharAddress: Pointer;
asm
  MOV     EAX, offset System.@Write0WChar
end;

function _WriteWCharAddress: Pointer;
asm
  MOV     EAX, offset System.@WriteWChar
end;

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
var
  i: Integer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    for i := 1 to width - 1 do
      _Write0WChar(t, ' ');
    Result := _Write0WChar(t, c);
  end;
end;

const
  ENGLISH_ALPHABET = 'abcdefghijklmnopqrstuvwxyz';

begin
  @_Write0WChar := _Write0WCharAddress;
  RedirectProcedure(_WriteWCharAddress, @_WriteWChar);

  writeln;
  write(ENGLISH_ALPHABET[1]:3);
end.