使用ClearNLP库实现java中的语义角色标签

使用ClearNLP库实现java中的语义角色标签,java,nlp,Java,Nlp,我想使用clearNLP库进行语义角色标记。。。我已经将modelType的输入设置为“general en”,并将输入设置为文件,我猜输出将写入文件中。。请帮帮我,我得到了NullPointerException。 代码如下: public class DemoNLPDecode{ final String language = AbstractReader.LANG_EN; public DemoNLPDecode(String modelType, String inputFil

我想使用clearNLP库进行语义角色标记。。。我已经将modelType的输入设置为“general en”,并将输入设置为文件,我猜输出将写入文件中。。请帮帮我,我得到了NullPointerException。 代码如下:

public class DemoNLPDecode{
  final String language = AbstractReader.LANG_EN;

  public DemoNLPDecode(String modelType, String inputFile, String outputFile) throws Exception{
    AbstractTokenizer tokenizer = NLPGetter.getTokenizer(language);
    AbstractComponent tagger = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
    AbstractComponent parser = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
    AbstractComponent identifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_PRED);
    AbstractComponent classifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_ROLE);
    AbstractComponent labeler = NLPGetter.getComponent(modelType, language, NLPMode.MODE_SRL);

    AbstractComponent [] components = {tagger, parser, identifier, classifier, labeler};

    String sentence = "I'd like to meet Dr. Choi.";
    process(tokenizer, components, sentence);
    process(tokenizer, components, UTInput.createBufferedFileReader(inputFile), UTOutput.createPrintBufferedFileStream(outputFile));
  }

  public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, String sentence){
    DEPTree tree = NLPGetter.toDEPTree(tokenizer.getTokens(sentence));
    for (AbstractComponent component : components)
        component.process(tree);
    System.out.println(tree.toStringSRL()+"\n");
  }

  public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, BufferedReader reader, PrintStream fout){
    AbstractSegmenter segmenter = NLPGetter.getSegmenter(language, tokenizer);
    DEPTree tree;

    for (List<String> tokens : segmenter.getSentences(reader)){
        tree = NLPGetter.toDEPTree(tokens);
        for (AbstractComponent component : components)
            component.process(tree);
        fout.println(tree.toStringSRL()+"\n");
    }

    fout.close();
  }

  public static void main(String[] args){
    String modelType  = "general-en";   // "general-en" or "medical-en"
    String inputFile  = "E:/References/Test Files/46.txt";
    String outputFile = "E:/References/Test Files/46_1.txt";

    try{
      new DemoNLPDecode(modelType, inputFile, outputFile);
    }
    catch (Exception e) {e.printStackTrace();}
  }
}
公共类DemoNLPDecode{
最终字符串语言=AbstractReader.LANG\u EN;
公共DemoNLPDecode(字符串模型类型、字符串输入文件、字符串输出文件)引发异常{
AbstractTokenizer-tokenizer=NLPGetter.getTokenizer(语言);
AbstractComponent tagger=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_POS);
AbstractComponent parser=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE\u DEP);
AbstractComponent标识符=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_PRED);
AbstractComponent classifier=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_角色);
AbstractComponent labeler=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_SRL);
AbstractComponent[]components={tagger、解析器、标识符、分类器、标签器};
String语句=“我想见见崔博士。”;
过程(标记器、组件、句子);
进程(标记器、组件、UTInput.createBufferedFileReader(inputFile)、UTOutput.createPrintBufferedFileStream(outputFile));
}
公共无效进程(AbstractTokenizer标记器、AbstractComponent[]组件、字符串语句){
DEPTree-tree=NLPGetter.toDEPTree(tokenizer.getTokens(句子));
for(抽象组件:组件)
组件、过程(树);
System.out.println(tree.toStringSRL()+“\n”);
}
公共无效进程(AbstractTokenizer标记器、AbstractComponent[]组件、BufferedReader读取器、PrintStream fout){
AbstractSegmenter-segmenter=NLPGetter.getSegmenter(语言,标记器);
树木;
for(列表标记:segmenter.getSequences(reader)){
tree=NLPGetter.toDEPTree(令牌);
for(抽象组件:组件)
组件、过程(树);
println(tree.toStringSRL()+“\n”);
}
fout.close();
}
公共静态void main(字符串[]args){
字符串modelType=“general en”;/“general en”或“medical en”
String inputFile=“E:/References/Test Files/46.txt”;
String outputFile=“E:/References/Test Files/46_1.txt”;
试一试{
新的DemoNLPDecode(modelType、inputFile、outputFile);
}
catch(异常e){e.printStackTrace();}
}
}
我得到的错误是

java.lang.NullPointerException
at java.io.Reader.<init>(Reader.java:78)
at java.io.InputStreamReader.<init>(InputStreamReader.java:72)
at com.clearnlp.util.UTInput.getStringSet(UTInput.java:101)
at com.clearnlp.tokenization.EnglishTokenizer.initDictionaries(EnglishTokenizer.java:305)
at com.clearnlp.tokenization.EnglishTokenizer.<init>(EnglishTokenizer.java:130)
at com.clearnlp.nlp.NLPGetter.getTokenizer(NLPGetter.java:106)
at DemoNLPDecode.<init>(DemoNLPDecode.java:25)
at DemoNLPDecode.main(DemoNLPDecode.java:75)

java.lang.NullPointerException
at java.util.zip.InflaterInputStream.<init>(InflaterInputStream.java:83)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:77)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:91)
at com.clearnlp.nlp.NLPGetter.getObjectInputStream(NLPGetter.java:176)
at com.clearnlp.nlp.NLPGetter.getComponent(NLPGetter.java:147)
at DemoNLPDecode.<init>(DemoNLPDecode.java:26)
at DemoNLPDecode.main(DemoNLPDecode.java:75)
java.lang.NullPointerException
在java.io.Reader上。(Reader.java:78)
位于java.io.InputStreamReader。(InputStreamReader.java:72)
位于com.clearnlp.util.UTInput.getStringSet(UTInput.java:101)
位于com.clearnlp.tokenization.EnglishTokenizer.initDictionaries(EnglishTokenizer.java:305)
在com.clearnlp.tokenization.EnglishTokenizer.(EnglishTokenizer.java:130)
位于com.clearnlp.nlp.NLPGetter.getTokenizer(NLPGetter.java:106)
在DemoNLPDecode(DemoNLPDecode.java:25)
位于DemoNLPDecode.main(DemoNLPDecode.java:75)
java.lang.NullPointerException
位于java.util.zip.InflateInputStream。(InflateInputStream.java:83)
位于java.util.zip.GZIPInputStream。(GZIPInputStream.java:77)
位于java.util.zip.GZIPInputStream。(GZIPInputStream.java:91)
位于com.clearnlp.nlp.NLPGetter.getObjectInputStream(NLPGetter.java:176)
位于com.clearnlp.nlp.NLPGetter.getComponent(NLPGetter.java:147)
在DemoNLPDecode(DemoNLPDecode.java:26)
位于DemoNLPDecode.main(DemoNLPDecode.java:75)

您好,您是否初始化了词典?在我看来,您似乎缺少一个库。如果您使用maven进行依赖关系管理,或者只是手动导入JAR,您能提供更多信息吗?如果你正在使用maven,也可以发布pom.xml。我没有使用maven。我正在手动加载JAR。我不知道在哪里初始化字典。你能给我提供一个简单介绍加载字典的链接吗。我发现了以下示例:,但它使用了maven。但是,您可以做的是,检查类路径中是否有所有必需的JAR。我想,您需要clearnlp字典依赖项(请参阅pom.xml,它可以指导您找到所需的依赖项)。