Java 使用PDFBox插入空字符让我们考虑这个代码： public class Test1{ public static void CreatePdf(String src) throws IOException, COSVisitorException{ PDRectangle rec= new PDRectangle(400,400); PDDocument document= null; document = new PDDocument(); PDPage page = new PDPage(rec); document.addPage(page); PDDocumentInformation info=document.getDocumentInformation(); PDStream stream= new PDStream(document); info.setAuthor("PdfBox"); info.setCreator("Pdf"); info.setSubject("Stéganographie"); info.setTitle("Stéganographie dans les documents PDF"); info.setKeywords("Stéganographie, pdf"); content= new PDPageContentStream(document, page, true, false ); font= PDType1Font.HELVETICA; String hex = "4C0061f"; // shows "La" //Notice that we have 00 between 4C and 61 where 00 =null character StringBuilder sb = new StringBuilder(); for (int count = 0; count < hex.length() - 1; count += 2) { String output = hex.substring(count, (count + 2)); int decimal = Integer.parseInt(output, 16); StringBuilder ae= sb.append((char)decimal); } String tt=sb.toString(); content.beginText(); content.setFont(font, 12); content.appendRawCommands("15 385 Td\n"); content.appendRawCommands("("+tt+")"+"Tj\n"); content.endText(); content.close(); document.save("doc.pdf"); document.close(); } 公共类Test1{ publicstaticvoidcreatepdf（stringsrc）抛出IOException、COSVisitorException{ PDRectangle rec=新的PDRectangle（400400）； PDDocument文件=null；文档=新的PDDocument（）； PDPage=新的PDPage（rec）；文件。添加页（第页）； PDDocumentInformation=document.getDocumentInformation（）； PDStream stream=新的PDStream（文件）；信息集作者（“PdfBox”）； info.setCreator（“Pdf”）； info.setSubject（“圣加诺文字”）； info.setTitle（“Stéganographie dans les documents PDF”）； info.setKeywords（“Stéganographie，pdf”）；内容=新的PDPageContentStream（文档、页面、真、假）； font=PDType1Font.HELVETICA；字符串hex=“4C0061f”；//显示“La” //注意，我们有介于4C和61之间的00，其中00=空字符 StringBuilder sb=新的StringBuilder（）；对于（int count=0；count，即PDF浏览器必须携带自己的信息的标准的14个字体之一。浏览者带来的信息相互矛盾，PDF浏览者可能会倾向于自己的信息_Java_Pdf_Pdfbox

Java 使用PDFBox插入空字符让我们考虑这个代码： public class Test1{ public static void CreatePdf(String src) throws IOException, COSVisitorException{ PDRectangle rec= new PDRectangle(400,400); PDDocument document= null; document = new PDDocument(); PDPage page = new PDPage(rec); document.addPage(page); PDDocumentInformation info=document.getDocumentInformation(); PDStream stream= new PDStream(document); info.setAuthor("PdfBox"); info.setCreator("Pdf"); info.setSubject("Stéganographie"); info.setTitle("Stéganographie dans les documents PDF"); info.setKeywords("Stéganographie, pdf"); content= new PDPageContentStream(document, page, true, false ); font= PDType1Font.HELVETICA; String hex = "4C0061f"; // shows "La" //Notice that we have 00 between 4C and 61 where 00 =null character StringBuilder sb = new StringBuilder(); for (int count = 0; count < hex.length() - 1; count += 2) { String output = hex.substring(count, (count + 2)); int decimal = Integer.parseInt(output, 16); StringBuilder ae= sb.append((char)decimal); } String tt=sb.toString(); content.beginText(); content.setFont(font, 12); content.appendRawCommands("15 385 Td\n"); content.appendRawCommands("("+tt+")"+"Tj\n"); content.endText(); content.close(); document.save("doc.pdf"); document.close(); } 公共类Test1{ publicstaticvoidcreatepdf（stringsrc）抛出IOException、COSVisitorException{ PDRectangle rec=新的PDRectangle（400400）； PDDocument文件=null；文档=新的PDDocument（）； PDPage=新的PDPage（rec）；文件。添加页（第页）； PDDocumentInformation=document.getDocumentInformation（）； PDStream stream=新的PDStream（文件）；信息集作者（“PdfBox”）； info.setCreator（“Pdf”）； info.setSubject（“圣加诺文字”）； info.setTitle（“Stéganographie dans les documents PDF”）； info.setKeywords（“Stéganographie，pdf”）；内容=新的PDPageContentStream（文档、页面、真、假）； font=PDType1Font.HELVETICA；字符串hex=“4C0061f”；//显示“La” //注意，我们有介于4C和61之间的00，其中00=空字符 StringBuilder sb=新的StringBuilder（）；对于（int count=0；count，即PDF浏览器必须携带自己的信息的标准的14个字体之一。浏览者带来的信息相互矛盾，PDF浏览者可能会倾向于自己的信息

java pdf

Java 使用PDFBox插入空字符让我们考虑这个代码： public class Test1{ public static void CreatePdf(String src) throws IOException, COSVisitorException{ PDRectangle rec= new PDRectangle(400,400); PDDocument document= null; document = new PDDocument(); PDPage page = new PDPage(rec); document.addPage(page); PDDocumentInformation info=document.getDocumentInformation(); PDStream stream= new PDStream(document); info.setAuthor("PdfBox"); info.setCreator("Pdf"); info.setSubject("Stéganographie"); info.setTitle("Stéganographie dans les documents PDF"); info.setKeywords("Stéganographie, pdf"); content= new PDPageContentStream(document, page, true, false ); font= PDType1Font.HELVETICA; String hex = "4C0061f"; // shows "La" //Notice that we have 00 between 4C and 61 where 00 =null character StringBuilder sb = new StringBuilder(); for (int count = 0; count < hex.length() - 1; count += 2) { String output = hex.substring(count, (count + 2)); int decimal = Integer.parseInt(output, 16); StringBuilder ae= sb.append((char)decimal); } String tt=sb.toString(); content.beginText(); content.setFont(font, 12); content.appendRawCommands("15 385 Td\n"); content.appendRawCommands("("+tt+")"+"Tj\n"); content.endText(); content.close(); document.save("doc.pdf"); document.close(); } 公共类Test1{ publicstaticvoidcreatepdf（stringsrc）抛出IOException、COSVisitorException{ PDRectangle rec=新的PDRectangle（400400）； PDDocument文件=null；文档=新的PDDocument（）； PDPage=新的PDPage（rec）；文件。添加页（第页）； PDDocumentInformation=document.getDocumentInformation（）； PDStream stream=新的PDStream（文件）；信息集作者（“PdfBox”）； info.setCreator（“Pdf”）； info.setSubject（“圣加诺文字”）； info.setTitle（“Stéganographie dans les documents PDF”）； info.setKeywords（“Stéganographie，pdf”）；内容=新的PDPageContentStream（文档、页面、真、假）； font=PDType1Font.HELVETICA；字符串hex=“4C0061f”；//显示“La” //注意，我们有介于4C和61之间的00，其中00=空字符 StringBuilder sb=新的StringBuilder（）；对于（int count=0；count，即PDF浏览器必须携带自己的信息的标准的14个字体之一。浏览者带来的信息相互矛盾，PDF浏览者可能会倾向于自己的信息,java,pdf,pdfbox,Java,Pdf,Pdfbox,我的问题是：为什么PDF文档中的“00”被空格代替，而不是空字符？注意，这个空字符的宽度是0.0，但它在PDF文档中显示为空格！因此我得到：“La”而不是“La” 为什么PDF文档中的“00”被空格替换为非空字符如果查看PDF，您会发现用于文本的字体定义为： 9 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 及我添加了

我的问题是：为什么PDF文档中的“00”被空格代替，而不是空字符？注意，这个空字符的宽度是0.0，但它在PDF文档中显示为空格！因此我得到：“La”而不是“La”

为什么PDF文档中的“00”被空格替换为非空字符

如果查看PDF，您会发现用于文本的字体定义为：

9 0 obj
<<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj

及

我添加了以下代码：

InputStream afmStream = ResourceLoader.loadResource("org/apache/pdfbox/resources/afm/Helvetica.afm");
AFMParser afmParser = new AFMParser(afmStream);
afmParser.parse();
FontMetric afmMetrics = afmParser.getResult();
List<Float> newWidths = new ArrayList<Float>();
for (CharMetric charMetric : afmMetrics.getCharMetrics())
{
    if (charMetric.getCharacterCode() < 0)
        continue;
    while (charMetric.getCharacterCode() >= newWidths.size())
        newWidths.add(0f);
    newWidths.set(charMetric.getCharacterCode(), charMetric.getWx());
}
font.setFirstChar(0);
font.setLastChar(newWidths.size() - 1);
font.setWidths(newWidths);

显示“La”，没有间隙。如果我正确解释您之前（现在已删除）关于1C和1D的问题，这将有助于您继续

PPS:关于评论中的问题：

你能告诉我这个方法的所有缺点吗？为什么这个方法不匹配重音字符，例如（Lé），你的代码只匹配没有重音的字符，但是当我们有重音时，我们得到的是Lé而不是Le..我只想知道你的代码的缺点是什么：）

我不能全部说出来（因为我真的不太了解字体问题），但从本质上讲，上述方法有点不完整

如开头所述，您使用的字体带有winansienceoding，其中没有32以下的代码（十进制）映射到任何内容。通过添加FirstChar、LastChar和Widths条目，我们尝试为代码低于32的字符定义零宽度

尽管如此，我们都不关心这些代码的编码信息（编码仍然是一种纯粹的winansioncoding）我们也不考虑字体是否包含这些代码的任何信息。此外，使事情变得不可控，我们谈论的是<强> HelviTaC/强>，即PDF浏览器必须携带自己的信息的标准的14个字体之一。浏览者带来的信息相互矛盾，PDF浏览者可能会倾向于自己的信息

为什么特别是重音字符会出现问题？我不确定。不过，我想这与字体通常不会将重音字符作为单独的实体，而是将重音字符和非重音字符组合在一起有关。可能在内部，查看器使用的字体具有有关此类组合字符的一些信息在32以下的代码点映射的字符，因此，当显式代码低于32和字体隐式使用此类代码同时发生时，显示会变得古怪

基本上，我建议大家不要这样做。对于普通的PDF文档来说，根本没有必要这样做

在您的案例中，正如您将文档命名为Stéganographie dans les documents PDF，您显然希望以某种方式在PDF中隐藏信息。使用不可见、不可打印的字符似乎是一种方法；因此，您可以朝着这个方向进行试验。但PDF确实提供了更多的方法来将任何数量的信息隐藏在PDF中o没有直接可见的PDF文件

因此，根据您的具体目标，我认为其他方法可能会更安全地隐藏信息，例如私有片段信息部分或其他一些词典中的自定义标记
为什么PDF文档中的“00”被空格替换为非空字符
如果查看PDF，您会发现用于文本的字体定义为：

9 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj
及
我添加了以下代码：

InputStream afmStream = ResourceLoader.loadResource("org/apache/pdfbox/resources/afm/Helvetica.afm"); AFMParser afmParser = new AFMParser(afmStream); afmParser.parse(); FontMetric afmMetrics = afmParser.getResult(); List<Float> newWidths = new ArrayList<Float>(); for (CharMetric charMetric : afmMetrics.getCharMetrics()) { if (charMetric.getCharacterCode() < 0) continue; while (charMetric.getCharacterCode() >= newWidths.size()) newWidths.add(0f); newWidths.set(charMetric.getCharacterCode(), charMetric.getWx()); } font.setFirstChar(0); font.setLastChar(newWidths.size() - 1); font.setWidths(newWidths);
显示“La”，没有间隙。如果我正确解释您之前（现在已删除）关于1C和1D的问题，这将有助于您继续
PPS:关于评论中的问题：
你能告诉我这个方法的所有缺点吗？为什么这个方法不匹配重音字符，例如（Lé），你的代码只匹配没有重音的字符，但是当我们有重音时，我们得到的是Lé而不是Le..我只想知道你的代码的缺点是什么：）
我不能全部说出来（因为我真的不太了解字体问题），但从本质上讲，上述方法有点不完整
如开头所述，您使用的字体带有winansienceoding，其中32（十进制）以下的代码不会映射到任何内容。通过添加FirstChar，LastCharString hex = "4C0461f";
public class Test4 { public static final String src="..."; public static void CreatePdf(String src) throws IOException, COSVisitorException{ PDRectangle rec= new PDRectangle(400,400); PDDocument document=null; document= new PDDocument(); PDPage page= new PDPage(rec); document.addPage(page); PDPageContentStream canvas= new PDPageContentStream(document,page,true,false); PDFont font= PDType1Font.HELVETICA; String hex = "4C1D61f"; InputStream afmStream = ResourceLoader.loadResource("org/apache/pdfbox/resources/afm/Helvetica.afm"); AFMParser afmParser = new AFMParser(afmStream); afmParser.parse(); FontMetric afmMetrics = afmParser.getResult(); List<Float> newWidths = new ArrayList<Float>(); for (CharMetric charMetric : afmMetrics.getCharMetrics()) { if (charMetric.getCharacterCode() < 0) continue; while (charMetric.getCharacterCode() >= newWidths.size()) newWidths.add(0f); newWidths.set(charMetric.getCharacterCode(), charMetric.getWx()); } font.setFirstChar(0); font.setLastChar(newWidths.size() - 1); font.setWidths(newWidths); StringBuilder sb = new StringBuilder(); for (int count = 0; count < hex.length() - 1; count += 2) { String output = hex.substring(count, (count + 2)); int decimal = Integer.parseInt(output, 16); StringBuilder ae= sb.append((char)decimal); } String tt=sb.toString(); canvas.beginText(); canvas.setFont(font, 12); canvas.appendRawCommands("15 385 Td\n"); canvas.appendRawCommands("("+tt+")"+"Tj\n"); canvas.endText(); canvas.close(); document.save("doc.pdf"); document.close(); } /** * @param args the command line arguments */ public static void main(String[] args) throws IOException, COSVisitorException { // TODO code application logic here Test4 tes= new Test4(); tes.CreatePdf(src); } }