Java 使用PDFBOX以PDF格式打印TJ/TJ中每个TJ和字符的位置?

Java 使用PDFBOX以PDF格式打印TJ/TJ中每个TJ和字符的位置?,java,pdf,pdfbox,Java,Pdf,Pdfbox,我需要控制打印文本的位置。我需要按顺序打印每个TJ/TJ。我有TJ或TJ操作符的陪串对象。如何获取PDF中每个字符的x、y位置。 内容流如下所示 C位置为(72633.8289) h位置为(88.7903125633.8289) a位置为(101.7059375、633.8289) 如何使用PDFBOX类获得这些位置。我试了一些 writeString(String, List<TextPosition>) or processTextPosition(TextPositio

我需要控制打印文本的位置。我需要按顺序打印每个TJ/TJ。我有TJ或TJ操作符的陪串对象。如何获取PDF中每个字符的x、y位置。

内容流如下所示

C位置为(72633.8289)

h位置为(88.7903125633.8289)

a位置为(101.7059375、633.8289)

如何使用PDFBOX类获得这些位置。我试了一些

writeString(String, List<TextPosition>) or processTextPosition(TextPosition)
writeString(字符串、列表)或processTextPosition(TextPosition)

我可以得到文本行,但不能得到位置。请帮助我获取每个TJ操作符的每个字符位置的示例代码?

我们需要覆盖所有与位置相关的类。谢谢@Tilman Hausherr和@mkl。如果需要,请更正我的答案。再次感谢

import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
public class PDStreamengine extends PDFStreamEngine {
public static Map<String, OperatorProcessor> operators = new HashMap<String, OperatorProcessor>(80);

private Matrix textMatrix;
private Matrix textLineMatrix;

private Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();

private PDResources resources;
private PDPage currentPage;
private Matrix initialMatrix;
public static ArrayList<ArrayList<Double>> chars;
public static ArrayList<Matrix> charmatrixs ;
public static ArrayList<String> tjchars;


@Override
public void processPage(PDPage page) throws IOException
{
    initPage(page);
    if (page.hasContents())
    {
        processStream(page);
    }
}


private void initPage(PDPage page)
{
    if (page == null)
    {
        throw new IllegalArgumentException("Page cannot be null");
    }
    currentPage = page;
    graphicsStack.clear();
    graphicsStack.push(new PDGraphicsState(page.getCropBox()));
    textMatrix = null;
    textLineMatrix = null;
    resources = null;
    initialMatrix = page.getMatrix();
}

public void processStream(PDContentStream contentStream) throws IOException
{
    PDResources parent = pushResources(contentStream);
    Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
    Matrix parentMatrix = initialMatrix;

    // transform the CTM using the stream's matrix
    getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());

    // the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
    initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();

    // clip to bounding box
    PDRectangle bbox = contentStream.getBBox();
    clipToRect(bbox);

    processStreamOperators(contentStream);

    initialMatrix = parentMatrix;
    restoreGraphicsStacks(savedStack);
    popResources(parent);
}


private PDResources pushResources(PDContentStream contentStream)
{
    // resource lookup: first look for stream resources, then fallback to the current page
    PDResources parentResources = resources;
    PDResources streamResources = contentStream.getResources();
    if (streamResources != null)
    {
        resources = streamResources;
    }
    else if (resources != null)
    {
        // inherit directly from parent stream, this is not in the PDF spec, but the file from
        // PDFBOX-1359 does this and works in Acrobat
    }
    else
    {
        resources = currentPage.getResources();
    }

    // resources are required in PDF
    if (resources == null)
    {
        resources = new PDResources();
    }
    return parentResources;
}

private void clipToRect(PDRectangle rectangle)
{
    if (rectangle != null)
    {
        GeneralPath clip = rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
        getGraphicsState().intersectClippingPath(clip);
    }
}


private void processStreamOperators(PDContentStream contentStream) throws IOException
{
    List<COSBase> arguments = new ArrayList<COSBase>();
    PDFStreamParser parser = new PDFStreamParser(contentStream);
    new ProcessClasses();
    Object token = parser.parseNextToken();
    while (token != null)
    {
        if (token instanceof COSObject)
        {
            arguments.add(((COSObject) token).getObject());
        }
        else if (token instanceof Operator)
        {
            processOperator((Operator) token, arguments);
            arguments = new ArrayList<COSBase>();
        }
        else
        {
            arguments.add((COSBase) token);
        }
        token = parser.parseNextToken();
    }
}

private void popResources(PDResources parentResources)
{
    resources = parentResources;
}


 protected void processOperator(Operator operator, List<COSBase> operands) throws IOException
    {
        String name = operator.getName();
        OperatorProcessor processor = operators.get(name);
        if (processor != null)
        {
            processor.setContext(this);
            try
            {
                System.out.println(operator);
                System.out.println(operands);
                processor.process(operator, operands);
            }
            catch (IOException e)
            {
                operatorException(operator, operands, e);
            }
        }
        else
        {
            unsupportedOperator(operator, operands);
        }
    }
 
 
 protected final Stack<PDGraphicsState> saveGraphicsStacks()
    {
        Stack<PDGraphicsState> savedStack = graphicsStack;
        graphicsStack = new Stack<PDGraphicsState>();
        graphicsStack.add(savedStack.peek().clone());
        return savedStack;
    }
 
 @Override
 public PDGraphicsState getGraphicsState()
    {
        return graphicsStack.peek();
    }
 
 
 public  void addOperators(OperatorProcessor op)
    {
        op.setContext(this);
        operators.put(op.getName(), op);
    }
 
 protected final void restoreGraphicsStacks(Stack<PDGraphicsState> snapshot)
    {
        graphicsStack = snapshot;
    }
    
    /**
     * @return Returns the size of the graphicsStack.
     */
    public int getGraphicsStackSize()
    {
        return graphicsStack.size();
    }


    /**
     * @return Returns the textLineMatrix.
     */
    public Matrix getTextLineMatrix()
    {
        return textLineMatrix;
    }

    /**
     * @param value The textLineMatrix to set.
     */
    public void setTextLineMatrix(Matrix value)
    {
        textLineMatrix = value;
    }

    /**
     * @return Returns the textMatrix.
     */
    public Matrix getTextMatrix()
    {
        return textMatrix;
    }

    /**
     * @param value The textMatrix to set.
     */
    public void setTextMatrix(Matrix value)
    {
        textMatrix = value;
    }
    
    public PDResources getResources()
    {
        return resources;
    }
    
    
    /**
     * Pushes the current graphics state to the stack.
     */
    public void saveGraphicsState()
    {
        graphicsStack.push(graphicsStack.peek().clone());
    }

    /**
     * Pops the current graphics state from the stack.
     */
    public void restoreGraphicsState()
    {
        graphicsStack.pop();
    }
    
    protected void applyTextAdjustment(float tx, float ty) throws IOException
    {
        // update the text matrix
        textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
    }
    
    
    public void showForm(PDFormXObject form) throws IOException
    {
        if (currentPage == null)
        {
            throw new IllegalStateException("No current page, call " +
                    "#processChildStream(PDContentStream, PDPage) instead");
        }
        if (form.getCOSObject().getLength() > 0)
        {
            processStream(form);
        }
    }
    
    
    
    /**
     * Called when a string of text is to be shown.
     *
     * @param string the encoded text
     * @throws IOException if there was an error showing the text
     */
    public void showTextString(byte[] string) throws IOException
    {
        showText(string);
    }
    
    @Override
    public void showTransparencyGroup(PDTransparencyGroup form) throws IOException
    {
        processTransparencyGroup(form);
    }
    
    @Override
    protected void processTransparencyGroup(PDTransparencyGroup group) throws IOException
    {
        if (currentPage == null)
        {
            throw new IllegalStateException("No current page, call " +
                    "#processChildStream(PDContentStream, PDPage) instead");
        }

        PDResources parent = pushResources(group);
        Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
        
        Matrix parentMatrix = initialMatrix;

        // the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
        initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();

        // transform the CTM using the stream's matrix
        getGraphicsState().getCurrentTransformationMatrix().concatenate(group.getMatrix());

        // Before execution of the transparency group XObject’s content stream, 
        // the current blend mode in the graphics state shall be initialized to Normal, 
        // the current stroking and nonstroking alpha constants to 1.0, and the current soft mask to None.
        getGraphicsState().setBlendMode(BlendMode.NORMAL);
        getGraphicsState().setAlphaConstant(1);
        getGraphicsState().setNonStrokeAlphaConstant(1);
        getGraphicsState().setSoftMask(null);

        // clip to bounding box
        clipToRect(group.getBBox());

        processStreamOperators(group);
        
        initialMatrix = parentMatrix;

        restoreGraphicsStack(savedStack);
        popResources(parent);
    }
    
    
    @Override
    public void showTextStrings(COSArray array) throws IOException{
        PDTextState textState = getGraphicsState().getTextState();
        float fontSize = textState.getFontSize();
        float horizontalScaling = textState.getHorizontalScaling() / 100f;
        PDFont font = textState.getFont();
        chars = new ArrayList<ArrayList<Double>>();
        charmatrixs = new ArrayList<Matrix>();
        tjchars = new ArrayList<String>();
        boolean isVertical = false;
        if (font != null)
        {
            isVertical = font.isVertical();
        }

        for (COSBase obj : array)
        {
            if (obj instanceof COSNumber)
            {
                float tj = ((COSNumber)obj).floatValue();

                // calculate the combined displacements
                float tx, ty;
                if (isVertical)
                {
                    tx = 0;
                    ty = -tj / 1000 * fontSize;
                }
                else
                {
                    tx = -tj / 1000 * fontSize * horizontalScaling;
                    ty = 0;
                }

                applyTextAdjustment(tx, ty);
            }
            else if(obj instanceof COSString)
            {
                byte[] string = ((COSString)obj).getBytes();
                showText(string);
                
            }
            else
            {
                throw new IOException("Unknown type in array for TJ operation:" + obj);
            }
        }
        if(!chars.isEmpty() && !charmatrixs.isEmpty()) {
            Horizontalparsing.poscharobj.put(Horizontalparsing.tj_ycount, chars);
            Horizontalparsing.txtposmatrix.put(Horizontalparsing.tj_ycount, charmatrixs);
            Horizontalparsing.wordobj.put(Horizontalparsing.tj_ycount, tjchars);
            Horizontalparsing.tj_ycount +=1;
        }
        
    }
    
    @Override
     protected void showText(byte[] string) throws IOException
        {
            PDGraphicsState state = getGraphicsState();
            PDTextState textState = state.getTextState();

            // get the current font
            PDFont font = textState.getFont();
            if (font == null)
            {
               // LOG.warn("No current font, will use default");
                font = PDFontFactory.createDefaultFont();
            }

            float fontSize = textState.getFontSize();
            float horizontalScaling = textState.getHorizontalScaling() / 100f;
            float charSpacing = textState.getCharacterSpacing();

            // put the text state parameters into matrix form
            Matrix parameters = new Matrix(
                    fontSize * horizontalScaling, 0, // 0
                    0, fontSize,                     // 0
                    0, textState.getRise());         // 1

            // read the stream until it is empty
            InputStream in = new ByteArrayInputStream(string);
            while (in.available() > 0)
            {
                // decode a character
                int before = in.available();
                int code = font.readCode(in);
                int codeLength = before - in.available();
                String unicode = font.toUnicode(code);
                //To record char positions
                ArrayList<Double> pstnchar = new ArrayList<Double>();

                // Word spacing shall be applied to every occurrence of the single-byte character code
                // 32 in a string when using a simple font or a composite font that defines code 32 as
                // a single-byte code.
                float wordSpacing = 0;
                if (codeLength == 1 && code == 32)
                {
                    wordSpacing += textState.getWordSpacing();
                }

                // text rendering matrix (text space -> device space)
                Matrix ctm = state.getCurrentTransformationMatrix();
                Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm);

                // get glyph's position vector if this is vertical text
                // changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422
                if (font.isVertical())
                {
                    // position vector, in text space
                    Vector v = font.getPositionVector(code);

                    // apply the position vector to the horizontal origin to get the vertical origin
                    textRenderingMatrix.translate(v);
                }

                // get glyph's horizontal and vertical displacements, in text space
                Vector w = font.getDisplacement(code);

                // process the decoded glyph
                saveGraphicsState();
                Matrix textMatrixOld = textMatrix;
                Matrix textLineMatrixOld = textLineMatrix;
                showGlyph(textRenderingMatrix, font, code, unicode, w);
                textMatrix = textMatrixOld;
                textLineMatrix = textLineMatrixOld;
                pstnchar.add((double) textMatrix.getValue(2, 0));
                pstnchar.add((double) textMatrix.getValue(2, 1));
                charmatrixs.add(textRenderingMatrix);
                restoreGraphicsState();

                // calculate the combined displacements
                float tx, ty;
                if (font.isVertical())
                {
                    tx = 0;
                    ty = w.getY() * fontSize + charSpacing + wordSpacing;
                }
                else
                {
                    tx = (w.getX() * fontSize + charSpacing + wordSpacing) * horizontalScaling;
                    ty = 0;
                }

                // update the text matrix
                textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
                pstnchar.add((double) textMatrix.getValue(2, 0));
                pstnchar.add((double) textMatrix.getValue(2, 1));
                tjchars.add(unicode);
                chars.add(pstnchar);
            }
        }
import java.awt.geom.GeneralPath;
导入java.io.ByteArrayInputStream;
导入java.io.IOException;
导入java.io.InputStream;
导入java.util.ArrayList;
导入java.util.HashMap;
导入java.util.List;
导入java.util.Map;
导入java.util.Stack;
导入org.apache.pdfbox.contentstream.PDContentStream;
导入org.apache.pdfbox.contentstream.PDFStreamEngine;
导入org.apache.pdfbox.contentstream.operator.operator;
导入org.apache.pdfbox.contentstream.operator.operator处理器;
导入org.apache.pdfbox.cos.COSArray;
导入org.apache.pdfbox.cos.COSBase;
导入org.apache.pdfbox.cos.COSNumber;
导入org.apache.pdfbox.cos.COSObject;
导入org.apache.pdfbox.cos.coString;
导入org.apache.pdfbox.pdfparser.PDFStreamParser;
导入org.apache.pdfbox.pdmodel.PDPage;
导入org.apache.pdfbox.pdmodel.PDResources;
导入org.apache.pdfbox.pdmodel.common.PDRectangle;
导入org.apache.pdfbox.pdmodel.font.PDFont;
导入org.apache.pdfbox.pdmodel.font.PDFontFactory;
导入org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
导入org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
导入org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
导入org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
导入org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
导入org.apache.pdfbox.util.Matrix;
导入org.apache.pdfbox.util.Vector;
公共类PDStreamengine扩展了PDFStreamEngine{
公共静态映射操作符=新HashMap(80);
私有矩阵;
私有矩阵textLineMatrix;
私有堆栈graphicsStack=新堆栈();
私人资源;
私人PDPage当前页面;
私有矩阵;
公共静态数组列表字符;
公共静态数组列表charmatrix;
公共静态数组列表tjchars;
@凌驾
public void processPage(PDPage页)引发IOException
{
初始页面(第页);
if(page.hasconts())
{
processStream(第页);
}
}
私有void initPage(PDPage)
{
如果(第==null页)
{
抛出新的IllegalArgumentException(“页面不能为空”);
}
当前页面=第页;
graphicsStack.clear();
graphicsStack.push(新的PDGraphicsState(page.getCropBox());
textMatrix=null;
textLineMatrix=null;
资源=空;
initialMatrix=page.getMatrix();
}
public void processStream(PDContentStream contentStream)引发IOException
{
PDResources parent=pushResources(contentStream);
Stack savedStack=saveGraphicsStacks();
矩阵父矩阵=初始矩阵;
//使用流的矩阵变换CTM
getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());
//流的初始矩阵包括父CTM,例如,这允许缩放形式
initialMatrix=getGraphicsState().getCurrentTransformationMatrix().clone();
//剪辑到边界框
PDRectangle bbox=contentStream.getBBox();
clipToRect(bbox);
processStreamOperators(contentStream);
初始矩阵=父矩阵;
恢复堆栈(savedStack);
公共资源(家长);
}
专用PDResources pushResources(pContentStream contentStream)
{
//资源查找:首先查找流资源,然后返回到当前页面
PDResources parentResources=资源;
PDResources-streamResources=contentStream.getResources();
if(streamResources!=null)
{
资源=流动资源;
}
else if(资源!=null)
{
//直接从父流继承,这不在PDF规范中,而是来自的文件
//PDFBOX-1359可以做到这一点,并在Acrobat中工作
}
其他的
{
resources=currentPage.getResources();
}
//所需资源为PDF格式
if(资源==null)
{
资源=新的PDResources();
}
返回父资源;
}
私有void clipToRect(PDRectangle)
{
if(矩形!=null)
{
GeneralPath clip=rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
getGraphicsState().intersectClippingPath(剪辑);
}
}
私有void processStreamOperators(PDContentStream contentStream)引发IOException
{
列表参数=新的ArrayList();
PDFStreamParser=新的PDFStreamParser(contentStream);
新的ProcessClasses();
Object token=parser.parseNextToken();
while(令牌!=null)
{
if(COSObject的令牌实例)
{
参数.add(((COSObject)标记).getObject());
}
else if(令牌instanceof运算符)
{
processOperator((运算符)标记、参数);
参数=新的ArrayList();
}
其他的
{
参数。添加((COSBase)标记);
}
token=parser.parseNextToken();
}
}
私有资源(PDResources parentResources)
{
资源=父资源;
}
受保护的void processOperator(运算符运算符、列表操作数)引发IOException
{
字符串名称=运算符.getName();
OperatorProcessor处理器=operators.get(名称);
if(处理器!=null)