Java 突出显示PDF中的单词_Java_Pdf_Word_Highlight

Java 突出显示PDF中的单词

java pdf

Java 突出显示PDF中的单词,java,pdf,word,highlight,Java,Pdf,Word,Highlight,我有一个PDF和一些关键字。我需要的是在PDF中搜索这些关键字，在PDF中突出显示它们，然后保存它。在此之后，我必须在谷歌文档中查看此PDF文件，并在其中突出显示文字。我必须用Java来做这件事我的代码是 package com.hiringsteps.ats.util.pdfclownUtil; import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.Col

我有一个PDF和一些关键字。我需要的是在PDF中搜索这些关键字，在PDF中突出显示它们，然后保存它。在此之后，我必须在谷歌文档中查看此PDF文件，并在其中突出显示文字。我必须用Java来做这件事

我的代码是

    package com.hiringsteps.ats.util.pdfclownUtil;

    import java.awt.geom.Rectangle2D;
    import java.util.ArrayList;
    import java.util.Collection;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import org.pdfclown.documents.Page;
    import org.pdfclown.documents.contents.ITextString;
    import org.pdfclown.documents.contents.TextChar;
    import org.pdfclown.documents.interaction.annotations.TextMarkup;
    import org.pdfclown.documents.interaction.annotations.TextMarkup.MarkupTypeEnum;
    import org.pdfclown.files.File;
    import org.pdfclown.files.SerializationModeEnum;
    import org.pdfclown.util.math.Interval;
    import org.pdfclown.util.math.geom.Quad;
    import org.pdfclown.tools.TextExtractor;

    import com.hiringsteps.ats.applicant.domain.ApplicantKeyWord;
    import com.hiringsteps.ats.job.domain.CustomerJobKeyword;

    public class TextHighlightUtil 
    {
        private int count;
        public Collection<ApplicantKeyWord> highlight(String inputPath, String outputPath, Collection<CustomerJobKeyword> customerJobKeywordList )
        {           
            Collection<ApplicantKeyWord> applicantKeywordList = new ArrayList<ApplicantKeyWord>();
            ApplicantKeyWord applicantKeyword = null;

            // 1. Open the PDF file!
            File file;
            try
            {
                file = new File(inputPath);
            }
            catch(Exception e)
            {
                throw new RuntimeException(inputPath + " file access error.",e);
            }
            for(CustomerJobKeyword key : customerJobKeywordList) {
                applicantKeyword = new ApplicantKeyWord();
                count = 0;
                // Define the text pattern to look for!
                //String textRegEx = promptChoice("Please enter the pattern to look for: ");
                applicantKeyword.setKey(key);
                Pattern pattern = Pattern.compile(key.getName(), Pattern.CASE_INSENSITIVE);

                // 2. Iterating through the document pages...
                TextExtractor textExtractor = new TextExtractor(true, true);
                for(final Page page : file.getDocument().getPages())
                {

                  // 2.1. Extract the page text!
                  Map<Rectangle2D,List<ITextString>> textStrings = textExtractor.extract(page);
                  // 2.2. Find the text pattern matches!
                  final Matcher matcher = pattern.matcher(TextExtractor.toString(textStrings));
                  // 2.3. Highlight the text pattern matches!
                  textExtractor.filter(textStrings,
                    new TextExtractor.IIntervalFilter()
                    {
                      public boolean hasNext()
                      {                   
                          //if(key.getMatchCriteria() == 1){
                              if (matcher.find()) {
                                count++;
                                return true;
                              }
                          /*} else if(key.getMatchCriteria() == 2) {
                              if (matcher.hitEnd()) {
                                count++;
                                return true;
                              }
                          }*/
                          return false;

                      }

                      public Interval<Integer> next()
                      {
                          return new Interval<Integer>(matcher.start(), matcher.end());
                      }

                      public void process(Interval<Integer> interval, ITextString match)
                      {
                        // Defining the highlight box of the text pattern match...
                        List<Quad> highlightQuads = new ArrayList<Quad>();
                        {
                          Rectangle2D textBox = null;
                          for(TextChar textChar : match.getTextChars())
                          {
                            Rectangle2D textCharBox = textChar.getBox();
                            if(textBox == null)
                            {textBox = (Rectangle2D)textCharBox.clone();}
                            else
                            {
                              if(textCharBox.getY() > textBox.getMaxY())
                              {
                                highlightQuads.add(Quad.get(textBox));
                                textBox = (Rectangle2D)textCharBox.clone();
                              }
                              else
                              {textBox.add(textCharBox);}
                            }
                          }
                          textBox.setRect(textBox.getX(), textBox.getY(), textBox.getWidth(), textBox.getHeight()+5);
                          highlightQuads.add(Quad.get(textBox));
                        }                  
                        //TextMarkup.setPrintable(true);
                        // Highlight the text pattern match!
                        new TextMarkup(page, MarkupTypeEnum.Highlight, highlightQuads);
//TextMarkup temp = new TextMarkup(page, MarkupTypeEnum.Highlight, highlightQuads);
                        //temp.setMarkupBoxes(highlightQuads);
                        //temp.setPrintable(true);
                     //
                        temp.setVisible(true);
                        //temp.setMarkupType(MarkupTypeEnum.Highlight);
                      }

                      public void remove()
                      {throw new UnsupportedOperationException();}
                    }
                    );
                }
                applicantKeyword.setCount(count);
                applicantKeywordList.add(applicantKeyword);
            }

            SerializationModeEnum serializationMode = SerializationModeEnum.Incremental;
            try
            {
                file.save(new java.io.File(outputPath), serializationMode);
                file.close();
            }
            catch(Exception e)
            {
              System.out.println("File writing failed: " + e.getMessage());
              e.printStackTrace();
             }

            return applicantKeywordList;
          }     

    }

package com.hiringsteps.ats.util.pdfclownUtil；
导入java.awt.geom.Rectangle2D；
导入java.util.ArrayList；
导入java.util.Collection；
导入java.util.List；
导入java.util.Map；
导入java.util.regex.Matcher；
导入java.util.regex.Pattern；
导入org.pdfclown.documents.Page；
导入org.pdfclown.documents.contents.ITextString；
导入org.pdfclown.documents.contents.TextChar；
导入org.pdfclown.documents.interaction.annotations.TextMarkup；
导入org.pdfclown.documents.interaction.annotations.TextMarkup.MarkupTypeEnum；
导入org.pdfclown.files.File；
导入org.pdfclown.files.SerializationModeEnum；
导入org.pdfclown.util.math.Interval；
导入org.pdfclown.util.math.geom.Quad；
导入org.pdfclown.tools.TextExtractor；
导入com.hiringsteps.ats.applicator.domain.applicationKeyword；
导入com.hiringsteps.ats.job.domain.CustomerJobKeyword；
公共类TextHighlightUtil
{
私人整数计数；
公共收藏
PDF小丑的作者报告说，该问题是由于缺少与标记注释关联的显式外观流造成的。此问题已通过提交给
的修订解决。如果您在Adobe中高亮显示某个内容（手动），当您在Google Docs/Drive中打开它时，它是否有高亮显示？（即，这是文档还是您的代码的问题？）