使用ClearNLP库实现java中的语义角色标签
我想使用clearNLP库进行语义角色标记。。。我已经将modelType的输入设置为“general en”,并将输入设置为文件,我猜输出将写入文件中。。请帮帮我,我得到了NullPointerException。 代码如下:使用ClearNLP库实现java中的语义角色标签,java,nlp,Java,Nlp,我想使用clearNLP库进行语义角色标记。。。我已经将modelType的输入设置为“general en”,并将输入设置为文件,我猜输出将写入文件中。。请帮帮我,我得到了NullPointerException。 代码如下: public class DemoNLPDecode{ final String language = AbstractReader.LANG_EN; public DemoNLPDecode(String modelType, String inputFil
public class DemoNLPDecode{
final String language = AbstractReader.LANG_EN;
public DemoNLPDecode(String modelType, String inputFile, String outputFile) throws Exception{
AbstractTokenizer tokenizer = NLPGetter.getTokenizer(language);
AbstractComponent tagger = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
AbstractComponent parser = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
AbstractComponent identifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_PRED);
AbstractComponent classifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_ROLE);
AbstractComponent labeler = NLPGetter.getComponent(modelType, language, NLPMode.MODE_SRL);
AbstractComponent [] components = {tagger, parser, identifier, classifier, labeler};
String sentence = "I'd like to meet Dr. Choi.";
process(tokenizer, components, sentence);
process(tokenizer, components, UTInput.createBufferedFileReader(inputFile), UTOutput.createPrintBufferedFileStream(outputFile));
}
public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, String sentence){
DEPTree tree = NLPGetter.toDEPTree(tokenizer.getTokens(sentence));
for (AbstractComponent component : components)
component.process(tree);
System.out.println(tree.toStringSRL()+"\n");
}
public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, BufferedReader reader, PrintStream fout){
AbstractSegmenter segmenter = NLPGetter.getSegmenter(language, tokenizer);
DEPTree tree;
for (List<String> tokens : segmenter.getSentences(reader)){
tree = NLPGetter.toDEPTree(tokens);
for (AbstractComponent component : components)
component.process(tree);
fout.println(tree.toStringSRL()+"\n");
}
fout.close();
}
public static void main(String[] args){
String modelType = "general-en"; // "general-en" or "medical-en"
String inputFile = "E:/References/Test Files/46.txt";
String outputFile = "E:/References/Test Files/46_1.txt";
try{
new DemoNLPDecode(modelType, inputFile, outputFile);
}
catch (Exception e) {e.printStackTrace();}
}
}
公共类DemoNLPDecode{
最终字符串语言=AbstractReader.LANG\u EN;
公共DemoNLPDecode(字符串模型类型、字符串输入文件、字符串输出文件)引发异常{
AbstractTokenizer-tokenizer=NLPGetter.getTokenizer(语言);
AbstractComponent tagger=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_POS);
AbstractComponent parser=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE\u DEP);
AbstractComponent标识符=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_PRED);
AbstractComponent classifier=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_角色);
AbstractComponent labeler=NLPGetter.getComponent(模型类型、语言、NLPMode.MODE_SRL);
AbstractComponent[]components={tagger、解析器、标识符、分类器、标签器};
String语句=“我想见见崔博士。”;
过程(标记器、组件、句子);
进程(标记器、组件、UTInput.createBufferedFileReader(inputFile)、UTOutput.createPrintBufferedFileStream(outputFile));
}
公共无效进程(AbstractTokenizer标记器、AbstractComponent[]组件、字符串语句){
DEPTree-tree=NLPGetter.toDEPTree(tokenizer.getTokens(句子));
for(抽象组件:组件)
组件、过程(树);
System.out.println(tree.toStringSRL()+“\n”);
}
公共无效进程(AbstractTokenizer标记器、AbstractComponent[]组件、BufferedReader读取器、PrintStream fout){
AbstractSegmenter-segmenter=NLPGetter.getSegmenter(语言,标记器);
树木;
for(列表标记:segmenter.getSequences(reader)){
tree=NLPGetter.toDEPTree(令牌);
for(抽象组件:组件)
组件、过程(树);
println(tree.toStringSRL()+“\n”);
}
fout.close();
}
公共静态void main(字符串[]args){
字符串modelType=“general en”;/“general en”或“medical en”
String inputFile=“E:/References/Test Files/46.txt”;
String outputFile=“E:/References/Test Files/46_1.txt”;
试一试{
新的DemoNLPDecode(modelType、inputFile、outputFile);
}
catch(异常e){e.printStackTrace();}
}
}
我得到的错误是
java.lang.NullPointerException
at java.io.Reader.<init>(Reader.java:78)
at java.io.InputStreamReader.<init>(InputStreamReader.java:72)
at com.clearnlp.util.UTInput.getStringSet(UTInput.java:101)
at com.clearnlp.tokenization.EnglishTokenizer.initDictionaries(EnglishTokenizer.java:305)
at com.clearnlp.tokenization.EnglishTokenizer.<init>(EnglishTokenizer.java:130)
at com.clearnlp.nlp.NLPGetter.getTokenizer(NLPGetter.java:106)
at DemoNLPDecode.<init>(DemoNLPDecode.java:25)
at DemoNLPDecode.main(DemoNLPDecode.java:75)
java.lang.NullPointerException
at java.util.zip.InflaterInputStream.<init>(InflaterInputStream.java:83)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:77)
at java.util.zip.GZIPInputStream.<init>(GZIPInputStream.java:91)
at com.clearnlp.nlp.NLPGetter.getObjectInputStream(NLPGetter.java:176)
at com.clearnlp.nlp.NLPGetter.getComponent(NLPGetter.java:147)
at DemoNLPDecode.<init>(DemoNLPDecode.java:26)
at DemoNLPDecode.main(DemoNLPDecode.java:75)
java.lang.NullPointerException
在java.io.Reader上。(Reader.java:78)
位于java.io.InputStreamReader。(InputStreamReader.java:72)
位于com.clearnlp.util.UTInput.getStringSet(UTInput.java:101)
位于com.clearnlp.tokenization.EnglishTokenizer.initDictionaries(EnglishTokenizer.java:305)
在com.clearnlp.tokenization.EnglishTokenizer.(EnglishTokenizer.java:130)
位于com.clearnlp.nlp.NLPGetter.getTokenizer(NLPGetter.java:106)
在DemoNLPDecode(DemoNLPDecode.java:25)
位于DemoNLPDecode.main(DemoNLPDecode.java:75)
java.lang.NullPointerException
位于java.util.zip.InflateInputStream。(InflateInputStream.java:83)
位于java.util.zip.GZIPInputStream。(GZIPInputStream.java:77)
位于java.util.zip.GZIPInputStream。(GZIPInputStream.java:91)
位于com.clearnlp.nlp.NLPGetter.getObjectInputStream(NLPGetter.java:176)
位于com.clearnlp.nlp.NLPGetter.getComponent(NLPGetter.java:147)
在DemoNLPDecode(DemoNLPDecode.java:26)
位于DemoNLPDecode.main(DemoNLPDecode.java:75)
您好,您是否初始化了词典?在我看来,您似乎缺少一个库。如果您使用maven进行依赖关系管理,或者只是手动导入JAR,您能提供更多信息吗?如果你正在使用maven,也可以发布pom.xml。我没有使用maven。我正在手动加载JAR。我不知道在哪里初始化字典。你能给我提供一个简单介绍加载字典的链接吗。我发现了以下示例:,但它使用了maven。但是,您可以做的是,检查类路径中是否有所有必需的JAR。我想,您需要clearnlp字典依赖项(请参阅pom.xml,它可以指导您找到所需的依赖项)。