如何修复iText中的孤立标点符号

如何修复iText中的孤立标点符号,itext,Itext,我看见他进来了 另一个用户的问题与我们面临的问题类似。从iText 5开始,DefaultSplitCharacter的响应就将汉字考虑在内。我们正在使用iText 5.5.6,但仍然可以看到问题 据我所知,DefaultSplitCharacter工作正常,但问题似乎是ColumnText类允许行以这些标点符号开头 然而,第三行和第五行都以标点符号开头,如图所示 我可以简单地在适当的位置添加一些新行,使其看起来正确,但这意味着如果文本在内部被重新翻译,我的修复可能不再有效。有人知道如何确保iT

我看见他进来了 另一个用户的问题与我们面临的问题类似。从iText 5开始,DefaultSplitCharacter的响应就将汉字考虑在内。我们正在使用iText 5.5.6,但仍然可以看到问题

据我所知,DefaultSplitCharacter工作正常,但问题似乎是ColumnText类允许行以这些标点符号开头

然而,第三行和第五行都以标点符号开头,如图所示


我可以简单地在适当的位置添加一些新行,使其看起来正确,但这意味着如果文本在内部被重新翻译,我的修复可能不再有效。有人知道如何确保iText不会以这些标点字符开头吗?

要在亚洲语言中断行,您需要编写自己的SplitCharacter实现。这是一个很好的断线参考。另一个是

由于在日语中实现了这个,我将把我为日语文本编写的示例代码与英语文本混合在一起。使用上面的引用,可以很容易地为中文修改此代码

下面是一个显示正在使用的JapaneseSplitCharacter的片段:

  Chunk chunk = new Chunk(<asian text>,<asian font>);
  chunk.setSplitCharacter(JapaneseSplitCharacter.SplitCharacter);
  Paragraph paragraph = new Paragraph(chunk);  
希望这有帮助。

我正在使用iTextSharp。 我根据k.f.的样本写了一个字符

public class CJKSplitCharacter : ISplitCharacter
{
    public static ISplitCharacter Default = new CJKSplitCharacter();
    private static ISplitCharacter defaultSplit = new DefaultSplitCharacter();

    public bool IsSplitCharacter(int start, int current, int end, char[] cc, PdfChunk[] ck)
    {
        char charCurrent = GetChar(current, cc, ck);
        int next = current + 1;
        if (next < cc.Length)
        {
            char charNext = GetChar(next, cc, ck);
            // if next char is close char, do not break here
            if (IsCloseChar(charNext))
            {
                return false;
            }
            // otherwise, if current char is close char, mark as breakable
            else if (IsCloseChar(charCurrent))
            {
                return true;
            }
        }
        // if current char is open char, do not break here
        if (IsOpenChar(charCurrent))
        {
            return false;
        }

        // default:
        // split every CJK character

        if (Char.GetUnicodeCategory(charCurrent) == UnicodeCategory.OtherLetter) // CJK Letters
        {
            return true;
        }
        else
        {
            return defaultSplit.IsSplitCharacter(start, current, end, cc, ck);
        }
    }
    private char GetChar(int position, char[] cc, PdfChunk[] ck)
    {
        char c;
        if (ck == null || ck[Math.Min(position, ck.Length - 1)] == null)
        {
            c = cc[position];
        }
        else
        {
            c = (char)ck[Math.Min(position, ck.Length - 1)].GetUnicodeEquivalent(cc[position]);
        }
        return c;
    }

    private bool IsCloseChar(char c)
    {
        UnicodeCategory cat = Char.GetUnicodeCategory(c);
        return (cat == UnicodeCategory.ClosePunctuation         //right bracket/brace, eg: )]
            || cat == UnicodeCategory.FinalQuotePunctuation     //right quote, eg: ”
            || cat == UnicodeCategory.OtherPunctuation          //other punctuation, eg: ,。
            );
    }
    private bool IsOpenChar(char c)
    {
        UnicodeCategory cat = Char.GetUnicodeCategory(c);
        return (cat == UnicodeCategory.OpenPunctuation          //left bracket/brace, eg: ([
            || cat == UnicodeCategory.InitialQuotePunctuation   //right quote, eg: “
            );
    }
}
public class CJKSplitCharacter : ISplitCharacter
{
    public static ISplitCharacter Default = new CJKSplitCharacter();
    private static ISplitCharacter defaultSplit = new DefaultSplitCharacter();

    public bool IsSplitCharacter(int start, int current, int end, char[] cc, PdfChunk[] ck)
    {
        char charCurrent = GetChar(current, cc, ck);
        int next = current + 1;
        if (next < cc.Length)
        {
            char charNext = GetChar(next, cc, ck);
            // if next char is close char, do not break here
            if (IsCloseChar(charNext))
            {
                return false;
            }
            // otherwise, if current char is close char, mark as breakable
            else if (IsCloseChar(charCurrent))
            {
                return true;
            }
        }
        // if current char is open char, do not break here
        if (IsOpenChar(charCurrent))
        {
            return false;
        }

        // default:
        // split every CJK character

        if (Char.GetUnicodeCategory(charCurrent) == UnicodeCategory.OtherLetter) // CJK Letters
        {
            return true;
        }
        else
        {
            return defaultSplit.IsSplitCharacter(start, current, end, cc, ck);
        }
    }
    private char GetChar(int position, char[] cc, PdfChunk[] ck)
    {
        char c;
        if (ck == null || ck[Math.Min(position, ck.Length - 1)] == null)
        {
            c = cc[position];
        }
        else
        {
            c = (char)ck[Math.Min(position, ck.Length - 1)].GetUnicodeEquivalent(cc[position]);
        }
        return c;
    }

    private bool IsCloseChar(char c)
    {
        UnicodeCategory cat = Char.GetUnicodeCategory(c);
        return (cat == UnicodeCategory.ClosePunctuation         //right bracket/brace, eg: )]
            || cat == UnicodeCategory.FinalQuotePunctuation     //right quote, eg: ”
            || cat == UnicodeCategory.OtherPunctuation          //other punctuation, eg: ,。
            );
    }
    private bool IsOpenChar(char c)
    {
        UnicodeCategory cat = Char.GetUnicodeCategory(c);
        return (cat == UnicodeCategory.OpenPunctuation          //left bracket/brace, eg: ([
            || cat == UnicodeCategory.InitialQuotePunctuation   //right quote, eg: “
            );
    }
}