Java 使用PDFBOX以PDF格式打印TJ/TJ中每个TJ和字符的位置?
我需要控制打印文本的位置。我需要按顺序打印每个TJ/TJ。我有TJ或TJ操作符的陪串对象。如何获取PDF中每个字符的x、y位置。 内容流如下所示 C位置为(72633.8289) h位置为(88.7903125633.8289) a位置为(101.7059375、633.8289) 如何使用PDFBOX类获得这些位置。我试了一些Java 使用PDFBOX以PDF格式打印TJ/TJ中每个TJ和字符的位置?,java,pdf,pdfbox,Java,Pdf,Pdfbox,我需要控制打印文本的位置。我需要按顺序打印每个TJ/TJ。我有TJ或TJ操作符的陪串对象。如何获取PDF中每个字符的x、y位置。 内容流如下所示 C位置为(72633.8289) h位置为(88.7903125633.8289) a位置为(101.7059375、633.8289) 如何使用PDFBOX类获得这些位置。我试了一些 writeString(String, List<TextPosition>) or processTextPosition(TextPositio
writeString(String, List<TextPosition>) or processTextPosition(TextPosition)
writeString(字符串、列表)或processTextPosition(TextPosition)
我可以得到文本行,但不能得到位置。请帮助我获取每个TJ操作符的每个字符位置的示例代码?我们需要覆盖所有与位置相关的类。谢谢@Tilman Hausherr和@mkl。如果需要,请更正我的答案。再次感谢
import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
public class PDStreamengine extends PDFStreamEngine {
public static Map<String, OperatorProcessor> operators = new HashMap<String, OperatorProcessor>(80);
private Matrix textMatrix;
private Matrix textLineMatrix;
private Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();
private PDResources resources;
private PDPage currentPage;
private Matrix initialMatrix;
public static ArrayList<ArrayList<Double>> chars;
public static ArrayList<Matrix> charmatrixs ;
public static ArrayList<String> tjchars;
@Override
public void processPage(PDPage page) throws IOException
{
initPage(page);
if (page.hasContents())
{
processStream(page);
}
}
private void initPage(PDPage page)
{
if (page == null)
{
throw new IllegalArgumentException("Page cannot be null");
}
currentPage = page;
graphicsStack.clear();
graphicsStack.push(new PDGraphicsState(page.getCropBox()));
textMatrix = null;
textLineMatrix = null;
resources = null;
initialMatrix = page.getMatrix();
}
public void processStream(PDContentStream contentStream) throws IOException
{
PDResources parent = pushResources(contentStream);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// clip to bounding box
PDRectangle bbox = contentStream.getBBox();
clipToRect(bbox);
processStreamOperators(contentStream);
initialMatrix = parentMatrix;
restoreGraphicsStacks(savedStack);
popResources(parent);
}
private PDResources pushResources(PDContentStream contentStream)
{
// resource lookup: first look for stream resources, then fallback to the current page
PDResources parentResources = resources;
PDResources streamResources = contentStream.getResources();
if (streamResources != null)
{
resources = streamResources;
}
else if (resources != null)
{
// inherit directly from parent stream, this is not in the PDF spec, but the file from
// PDFBOX-1359 does this and works in Acrobat
}
else
{
resources = currentPage.getResources();
}
// resources are required in PDF
if (resources == null)
{
resources = new PDResources();
}
return parentResources;
}
private void clipToRect(PDRectangle rectangle)
{
if (rectangle != null)
{
GeneralPath clip = rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
getGraphicsState().intersectClippingPath(clip);
}
}
private void processStreamOperators(PDContentStream contentStream) throws IOException
{
List<COSBase> arguments = new ArrayList<COSBase>();
PDFStreamParser parser = new PDFStreamParser(contentStream);
new ProcessClasses();
Object token = parser.parseNextToken();
while (token != null)
{
if (token instanceof COSObject)
{
arguments.add(((COSObject) token).getObject());
}
else if (token instanceof Operator)
{
processOperator((Operator) token, arguments);
arguments = new ArrayList<COSBase>();
}
else
{
arguments.add((COSBase) token);
}
token = parser.parseNextToken();
}
}
private void popResources(PDResources parentResources)
{
resources = parentResources;
}
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException
{
String name = operator.getName();
OperatorProcessor processor = operators.get(name);
if (processor != null)
{
processor.setContext(this);
try
{
System.out.println(operator);
System.out.println(operands);
processor.process(operator, operands);
}
catch (IOException e)
{
operatorException(operator, operands, e);
}
}
else
{
unsupportedOperator(operator, operands);
}
}
protected final Stack<PDGraphicsState> saveGraphicsStacks()
{
Stack<PDGraphicsState> savedStack = graphicsStack;
graphicsStack = new Stack<PDGraphicsState>();
graphicsStack.add(savedStack.peek().clone());
return savedStack;
}
@Override
public PDGraphicsState getGraphicsState()
{
return graphicsStack.peek();
}
public void addOperators(OperatorProcessor op)
{
op.setContext(this);
operators.put(op.getName(), op);
}
protected final void restoreGraphicsStacks(Stack<PDGraphicsState> snapshot)
{
graphicsStack = snapshot;
}
/**
* @return Returns the size of the graphicsStack.
*/
public int getGraphicsStackSize()
{
return graphicsStack.size();
}
/**
* @return Returns the textLineMatrix.
*/
public Matrix getTextLineMatrix()
{
return textLineMatrix;
}
/**
* @param value The textLineMatrix to set.
*/
public void setTextLineMatrix(Matrix value)
{
textLineMatrix = value;
}
/**
* @return Returns the textMatrix.
*/
public Matrix getTextMatrix()
{
return textMatrix;
}
/**
* @param value The textMatrix to set.
*/
public void setTextMatrix(Matrix value)
{
textMatrix = value;
}
public PDResources getResources()
{
return resources;
}
/**
* Pushes the current graphics state to the stack.
*/
public void saveGraphicsState()
{
graphicsStack.push(graphicsStack.peek().clone());
}
/**
* Pops the current graphics state from the stack.
*/
public void restoreGraphicsState()
{
graphicsStack.pop();
}
protected void applyTextAdjustment(float tx, float ty) throws IOException
{
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
}
public void showForm(PDFormXObject form) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
if (form.getCOSObject().getLength() > 0)
{
processStream(form);
}
}
/**
* Called when a string of text is to be shown.
*
* @param string the encoded text
* @throws IOException if there was an error showing the text
*/
public void showTextString(byte[] string) throws IOException
{
showText(string);
}
@Override
public void showTransparencyGroup(PDTransparencyGroup form) throws IOException
{
processTransparencyGroup(form);
}
@Override
protected void processTransparencyGroup(PDTransparencyGroup group) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
PDResources parent = pushResources(group);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(group.getMatrix());
// Before execution of the transparency group XObject’s content stream,
// the current blend mode in the graphics state shall be initialized to Normal,
// the current stroking and nonstroking alpha constants to 1.0, and the current soft mask to None.
getGraphicsState().setBlendMode(BlendMode.NORMAL);
getGraphicsState().setAlphaConstant(1);
getGraphicsState().setNonStrokeAlphaConstant(1);
getGraphicsState().setSoftMask(null);
// clip to bounding box
clipToRect(group.getBBox());
processStreamOperators(group);
initialMatrix = parentMatrix;
restoreGraphicsStack(savedStack);
popResources(parent);
}
@Override
public void showTextStrings(COSArray array) throws IOException{
PDTextState textState = getGraphicsState().getTextState();
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
PDFont font = textState.getFont();
chars = new ArrayList<ArrayList<Double>>();
charmatrixs = new ArrayList<Matrix>();
tjchars = new ArrayList<String>();
boolean isVertical = false;
if (font != null)
{
isVertical = font.isVertical();
}
for (COSBase obj : array)
{
if (obj instanceof COSNumber)
{
float tj = ((COSNumber)obj).floatValue();
// calculate the combined displacements
float tx, ty;
if (isVertical)
{
tx = 0;
ty = -tj / 1000 * fontSize;
}
else
{
tx = -tj / 1000 * fontSize * horizontalScaling;
ty = 0;
}
applyTextAdjustment(tx, ty);
}
else if(obj instanceof COSString)
{
byte[] string = ((COSString)obj).getBytes();
showText(string);
}
else
{
throw new IOException("Unknown type in array for TJ operation:" + obj);
}
}
if(!chars.isEmpty() && !charmatrixs.isEmpty()) {
Horizontalparsing.poscharobj.put(Horizontalparsing.tj_ycount, chars);
Horizontalparsing.txtposmatrix.put(Horizontalparsing.tj_ycount, charmatrixs);
Horizontalparsing.wordobj.put(Horizontalparsing.tj_ycount, tjchars);
Horizontalparsing.tj_ycount +=1;
}
}
@Override
protected void showText(byte[] string) throws IOException
{
PDGraphicsState state = getGraphicsState();
PDTextState textState = state.getTextState();
// get the current font
PDFont font = textState.getFont();
if (font == null)
{
// LOG.warn("No current font, will use default");
font = PDFontFactory.createDefaultFont();
}
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
float charSpacing = textState.getCharacterSpacing();
// put the text state parameters into matrix form
Matrix parameters = new Matrix(
fontSize * horizontalScaling, 0, // 0
0, fontSize, // 0
0, textState.getRise()); // 1
// read the stream until it is empty
InputStream in = new ByteArrayInputStream(string);
while (in.available() > 0)
{
// decode a character
int before = in.available();
int code = font.readCode(in);
int codeLength = before - in.available();
String unicode = font.toUnicode(code);
//To record char positions
ArrayList<Double> pstnchar = new ArrayList<Double>();
// Word spacing shall be applied to every occurrence of the single-byte character code
// 32 in a string when using a simple font or a composite font that defines code 32 as
// a single-byte code.
float wordSpacing = 0;
if (codeLength == 1 && code == 32)
{
wordSpacing += textState.getWordSpacing();
}
// text rendering matrix (text space -> device space)
Matrix ctm = state.getCurrentTransformationMatrix();
Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm);
// get glyph's position vector if this is vertical text
// changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422
if (font.isVertical())
{
// position vector, in text space
Vector v = font.getPositionVector(code);
// apply the position vector to the horizontal origin to get the vertical origin
textRenderingMatrix.translate(v);
}
// get glyph's horizontal and vertical displacements, in text space
Vector w = font.getDisplacement(code);
// process the decoded glyph
saveGraphicsState();
Matrix textMatrixOld = textMatrix;
Matrix textLineMatrixOld = textLineMatrix;
showGlyph(textRenderingMatrix, font, code, unicode, w);
textMatrix = textMatrixOld;
textLineMatrix = textLineMatrixOld;
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
charmatrixs.add(textRenderingMatrix);
restoreGraphicsState();
// calculate the combined displacements
float tx, ty;
if (font.isVertical())
{
tx = 0;
ty = w.getY() * fontSize + charSpacing + wordSpacing;
}
else
{
tx = (w.getX() * fontSize + charSpacing + wordSpacing) * horizontalScaling;
ty = 0;
}
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
tjchars.add(unicode);
chars.add(pstnchar);
}
}
import java.awt.geom.GeneralPath;
导入java.io.ByteArrayInputStream;
导入java.io.IOException;
导入java.io.InputStream;
导入java.util.ArrayList;
导入java.util.HashMap;
导入java.util.List;
导入java.util.Map;
导入java.util.Stack;
导入org.apache.pdfbox.contentstream.PDContentStream;
导入org.apache.pdfbox.contentstream.PDFStreamEngine;
导入org.apache.pdfbox.contentstream.operator.operator;
导入org.apache.pdfbox.contentstream.operator.operator处理器;
导入org.apache.pdfbox.cos.COSArray;
导入org.apache.pdfbox.cos.COSBase;
导入org.apache.pdfbox.cos.COSNumber;
导入org.apache.pdfbox.cos.COSObject;
导入org.apache.pdfbox.cos.coString;
导入org.apache.pdfbox.pdfparser.PDFStreamParser;
导入org.apache.pdfbox.pdmodel.PDPage;
导入org.apache.pdfbox.pdmodel.PDResources;
导入org.apache.pdfbox.pdmodel.common.PDRectangle;
导入org.apache.pdfbox.pdmodel.font.PDFont;
导入org.apache.pdfbox.pdmodel.font.PDFontFactory;
导入org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
导入org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
导入org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
导入org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
导入org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
导入org.apache.pdfbox.util.Matrix;
导入org.apache.pdfbox.util.Vector;
公共类PDStreamengine扩展了PDFStreamEngine{
公共静态映射操作符=新HashMap(80);
私有矩阵;
私有矩阵textLineMatrix;
私有堆栈graphicsStack=新堆栈();
私人资源;
私人PDPage当前页面;
私有矩阵;
公共静态数组列表字符;
公共静态数组列表charmatrix;
公共静态数组列表tjchars;
@凌驾
public void processPage(PDPage页)引发IOException
{
初始页面(第页);
if(page.hasconts())
{
processStream(第页);
}
}
私有void initPage(PDPage)
{
如果(第==null页)
{
抛出新的IllegalArgumentException(“页面不能为空”);
}
当前页面=第页;
graphicsStack.clear();
graphicsStack.push(新的PDGraphicsState(page.getCropBox());
textMatrix=null;
textLineMatrix=null;
资源=空;
initialMatrix=page.getMatrix();
}
public void processStream(PDContentStream contentStream)引发IOException
{
PDResources parent=pushResources(contentStream);
Stack savedStack=saveGraphicsStacks();
矩阵父矩阵=初始矩阵;
//使用流的矩阵变换CTM
getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());
//流的初始矩阵包括父CTM,例如,这允许缩放形式
initialMatrix=getGraphicsState().getCurrentTransformationMatrix().clone();
//剪辑到边界框
PDRectangle bbox=contentStream.getBBox();
clipToRect(bbox);
processStreamOperators(contentStream);
初始矩阵=父矩阵;
恢复堆栈(savedStack);
公共资源(家长);
}
专用PDResources pushResources(pContentStream contentStream)
{
//资源查找:首先查找流资源,然后返回到当前页面
PDResources parentResources=资源;
PDResources-streamResources=contentStream.getResources();
if(streamResources!=null)
{
资源=流动资源;
}
else if(资源!=null)
{
//直接从父流继承,这不在PDF规范中,而是来自的文件
//PDFBOX-1359可以做到这一点,并在Acrobat中工作
}
其他的
{
resources=currentPage.getResources();
}
//所需资源为PDF格式
if(资源==null)
{
资源=新的PDResources();
}
返回父资源;
}
私有void clipToRect(PDRectangle)
{
if(矩形!=null)
{
GeneralPath clip=rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
getGraphicsState().intersectClippingPath(剪辑);
}
}
私有void processStreamOperators(PDContentStream contentStream)引发IOException
{
列表参数=新的ArrayList();
PDFStreamParser=新的PDFStreamParser(contentStream);
新的ProcessClasses();
Object token=parser.parseNextToken();
while(令牌!=null)
{
if(COSObject的令牌实例)
{
参数.add(((COSObject)标记).getObject());
}
else if(令牌instanceof运算符)
{
processOperator((运算符)标记、参数);
参数=新的ArrayList();
}
其他的
{
参数。添加((COSBase)标记);
}
token=parser.parseNextToken();
}
}
私有资源(PDResources parentResources)
{
资源=父资源;
}
受保护的void processOperator(运算符运算符、列表操作数)引发IOException
{
字符串名称=运算符.getName();
OperatorProcessor处理器=operators.get(名称);
if(处理器!=null)