Java 突出显示PDF中的单词
我有一个PDF和一些关键字。我需要的是在PDF中搜索这些关键字,在PDF中突出显示它们,然后保存它。在此之后,我必须在谷歌文档中查看此PDF文件,并在其中突出显示文字。我必须用Java来做这件事 我的代码是Java 突出显示PDF中的单词,java,pdf,word,highlight,Java,Pdf,Word,Highlight,我有一个PDF和一些关键字。我需要的是在PDF中搜索这些关键字,在PDF中突出显示它们,然后保存它。在此之后,我必须在谷歌文档中查看此PDF文件,并在其中突出显示文字。我必须用Java来做这件事 我的代码是 package com.hiringsteps.ats.util.pdfclownUtil; import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.Col
package com.hiringsteps.ats.util.pdfclownUtil;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.pdfclown.documents.Page;
import org.pdfclown.documents.contents.ITextString;
import org.pdfclown.documents.contents.TextChar;
import org.pdfclown.documents.interaction.annotations.TextMarkup;
import org.pdfclown.documents.interaction.annotations.TextMarkup.MarkupTypeEnum;
import org.pdfclown.files.File;
import org.pdfclown.files.SerializationModeEnum;
import org.pdfclown.util.math.Interval;
import org.pdfclown.util.math.geom.Quad;
import org.pdfclown.tools.TextExtractor;
import com.hiringsteps.ats.applicant.domain.ApplicantKeyWord;
import com.hiringsteps.ats.job.domain.CustomerJobKeyword;
public class TextHighlightUtil
{
private int count;
public Collection<ApplicantKeyWord> highlight(String inputPath, String outputPath, Collection<CustomerJobKeyword> customerJobKeywordList )
{
Collection<ApplicantKeyWord> applicantKeywordList = new ArrayList<ApplicantKeyWord>();
ApplicantKeyWord applicantKeyword = null;
// 1. Open the PDF file!
File file;
try
{
file = new File(inputPath);
}
catch(Exception e)
{
throw new RuntimeException(inputPath + " file access error.",e);
}
for(CustomerJobKeyword key : customerJobKeywordList) {
applicantKeyword = new ApplicantKeyWord();
count = 0;
// Define the text pattern to look for!
//String textRegEx = promptChoice("Please enter the pattern to look for: ");
applicantKeyword.setKey(key);
Pattern pattern = Pattern.compile(key.getName(), Pattern.CASE_INSENSITIVE);
// 2. Iterating through the document pages...
TextExtractor textExtractor = new TextExtractor(true, true);
for(final Page page : file.getDocument().getPages())
{
// 2.1. Extract the page text!
Map<Rectangle2D,List<ITextString>> textStrings = textExtractor.extract(page);
// 2.2. Find the text pattern matches!
final Matcher matcher = pattern.matcher(TextExtractor.toString(textStrings));
// 2.3. Highlight the text pattern matches!
textExtractor.filter(textStrings,
new TextExtractor.IIntervalFilter()
{
public boolean hasNext()
{
//if(key.getMatchCriteria() == 1){
if (matcher.find()) {
count++;
return true;
}
/*} else if(key.getMatchCriteria() == 2) {
if (matcher.hitEnd()) {
count++;
return true;
}
}*/
return false;
}
public Interval<Integer> next()
{
return new Interval<Integer>(matcher.start(), matcher.end());
}
public void process(Interval<Integer> interval, ITextString match)
{
// Defining the highlight box of the text pattern match...
List<Quad> highlightQuads = new ArrayList<Quad>();
{
Rectangle2D textBox = null;
for(TextChar textChar : match.getTextChars())
{
Rectangle2D textCharBox = textChar.getBox();
if(textBox == null)
{textBox = (Rectangle2D)textCharBox.clone();}
else
{
if(textCharBox.getY() > textBox.getMaxY())
{
highlightQuads.add(Quad.get(textBox));
textBox = (Rectangle2D)textCharBox.clone();
}
else
{textBox.add(textCharBox);}
}
}
textBox.setRect(textBox.getX(), textBox.getY(), textBox.getWidth(), textBox.getHeight()+5);
highlightQuads.add(Quad.get(textBox));
}
//TextMarkup.setPrintable(true);
// Highlight the text pattern match!
new TextMarkup(page, MarkupTypeEnum.Highlight, highlightQuads);
//TextMarkup temp = new TextMarkup(page, MarkupTypeEnum.Highlight, highlightQuads);
//temp.setMarkupBoxes(highlightQuads);
//temp.setPrintable(true);
//
temp.setVisible(true);
//temp.setMarkupType(MarkupTypeEnum.Highlight);
}
public void remove()
{throw new UnsupportedOperationException();}
}
);
}
applicantKeyword.setCount(count);
applicantKeywordList.add(applicantKeyword);
}
SerializationModeEnum serializationMode = SerializationModeEnum.Incremental;
try
{
file.save(new java.io.File(outputPath), serializationMode);
file.close();
}
catch(Exception e)
{
System.out.println("File writing failed: " + e.getMessage());
e.printStackTrace();
}
return applicantKeywordList;
}
}
package com.hiringsteps.ats.util.pdfclownUtil;
导入java.awt.geom.Rectangle2D;
导入java.util.ArrayList;
导入java.util.Collection;
导入java.util.List;
导入java.util.Map;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.pdfclown.documents.Page;
导入org.pdfclown.documents.contents.ITextString;
导入org.pdfclown.documents.contents.TextChar;
导入org.pdfclown.documents.interaction.annotations.TextMarkup;
导入org.pdfclown.documents.interaction.annotations.TextMarkup.MarkupTypeEnum;
导入org.pdfclown.files.File;
导入org.pdfclown.files.SerializationModeEnum;
导入org.pdfclown.util.math.Interval;
导入org.pdfclown.util.math.geom.Quad;
导入org.pdfclown.tools.TextExtractor;
导入com.hiringsteps.ats.applicator.domain.applicationKeyword;
导入com.hiringsteps.ats.job.domain.CustomerJobKeyword;
公共类TextHighlightUtil
{
私人整数计数;
公共收藏PDF小丑的作者报告说,该问题是由于缺少与标记注释关联的显式外观流造成的。此问题已通过提交给的修订解决。如果您在Adobe中高亮显示某个内容(手动),当您在Google Docs/Drive中打开它时,它是否有高亮显示?(即,这是文档还是您的代码的问题?)