Java 如何通过使用斯坦福大学CoreNLP的审查获得总体情绪

Java 如何通过使用斯坦福大学CoreNLP的审查获得总体情绪,java,stanford-nlp,Java,Stanford Nlp,我使用斯坦福大学CoreNLP来获得25000篇电影评论的情感分析。然而,我已经获得了每一句话、每一篇评论的感悟,但我想知道是否有人知道我如何获得整体评论的感悟,而不是评论中的每一句话 我使用的代码是: import java.io.*; import java.util.*; import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import ed

我使用斯坦福大学CoreNLP来获得25000篇电影评论的情感分析。然而,我已经获得了每一句话、每一篇评论的感悟,但我想知道是否有人知道我如何获得整体评论的感悟,而不是评论中的每一句话

我使用的代码是:

import java.io.*;
import java.util.*;

import edu.stanford.nlp.coref.CorefCoreAnnotations;

import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.io.*;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.*;

/** This class demonstrates building and using a Stanford CoreNLP pipeline. */
public class sentimentMain {

  /** Usage: java -cp "*" StanfordCoreNlpDemo [inputFile [outputTextFile [outputXmlFile]]] */
  public static void main(String[] args) throws IOException {
    // set up optional output files
    PrintWriter out;
    if (args.length > 1) {
      out = new PrintWriter(args[1]);
    } else {
      out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
      xmlOut = new PrintWriter(args[2]);
    }

    // Create a CoreNLP pipeline. To build the default pipeline, you can just use:
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Here's a more complex setup example:
    //   Properties props = new Properties();
    //   props.put("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");
    //   props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
    //   props.put("ner.applyNumericClassifiers", "false");
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // Add in sentiment
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    File[] files = new File("C:/stanford-corenlp-full-2016-10-31/dataset").listFiles();

    String line = null;

    try{
        for (File file : files) {
            if (file.exists()) {
                BufferedReader in = new BufferedReader(new FileReader(file));
                while((line = in.readLine()) != null)
                {
                    Annotation document = new Annotation(line);

                    // run all the selected Annotators on this text
                    pipeline.annotate(document);

                    // this prints out the results of sentence analysis to file(s) in good formats
                    pipeline.prettyPrint(document, out);
                    if (xmlOut != null) {
                      pipeline.xmlPrint(document, xmlOut);
                    }

                    // An Annotation is a Map with Class keys for the linguistic analysis types.
                    // You can get and use the various analyses individually.
                    // For instance, this gets the parse tree of the first sentence in the text.
                    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
                    if (sentences != null && ! sentences.isEmpty()) {
                      CoreMap sentence = sentences.get(0);
                      /*out.println("The keys of the first sentence's CoreMap are:");
                      out.println(sentence.keySet());
                      out.println();
                      out.println("The first sentence is:");
                      out.println(sentence.toShorterString());
                      out.println();
                      out.println("The first sentence tokens are:");*/
                      for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        //out.println(token.toShorterString());
                      }
                      Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
                      //out.println();
                      //out.println("The first sentence parse tree is:");
                      tree.pennPrint(out);
                      //out.println();
                      //out.println("The first sentence basic dependencies are:");
                      //out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
                      //out.println("The first sentence collapsed, CC-processed dependencies are:");
                      SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
                      //out.println(graph.toString(SemanticGraph.OutputFormat.LIST));

                      // Access coreference. In the coreference link graph,
                      // each chain stores a set of mentions that co-refer with each other,
                      // along with a method for getting the most representative mention.
                      // Both sentence and token offsets start at 1!
                      //out.println("Coreference information");
                      Map<Integer, CorefChain> corefChains =
                          document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
                      if (corefChains == null) { return; }
                      for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {
                        //out.println("Chain " + entry.getKey());
                        for (CorefChain.CorefMention m : entry.getValue().getMentionsInTextualOrder()) {
                          // We need to subtract one since the indices count from 1 but the Lists start from 0
                          List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
                          // We subtract two for end: one for 0-based indexing, and one because we want last token of mention not one following.
                          /*out.println("  " + m + ", i.e., 0-based character offsets [" + tokens.get(m.startIndex - 1).beginPosition() +
                                  ", " + tokens.get(m.endIndex - 2).endPosition() + ")");*/
                        }
                      }
                      //out.println();
                      out.println("The first sentence overall sentiment rating is " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
                    }
                }
                in.close();
                //showFiles(file.listFiles()); // Calls same method again.
            } else {
                System.out.println("File: " + file.getName() + file.toString());
            }
        }
    }catch(NullPointerException e){
        e.printStackTrace();
    }
    IOUtils.closeIgnoringExceptions(out);
    IOUtils.closeIgnoringExceptions(xmlOut);
  }

}
import java.io.*;
导入java.util.*;
导入edu.stanford.nlp.coref.corefcore注释;
导入edu.stanford.nlp.coref.data.CorefChain;
导入edu.stanford.nlp.io.*;
导入edu.stanford.nlp.ling.*;
导入edu.stanford.nlp.pipeline.*;
导入edu.stanford.nlp.semgraph.SemanticGraph;
导入edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
导入edu.stanford.nlp.touction.mountain注释;
导入edu.stanford.nlp.trees.*;
导入edu.stanford.nlp.util.*;
/**本课程演示如何构建和使用斯坦福CoreNLP管道*/
公众阶级{
/**用法:java-cp“*”StanfordCoreNlpDemo[inputFile[OutputExtFile[outputXmlFile]]*/
公共静态void main(字符串[]args)引发IOException{
//设置可选的输出文件
打印输出;
如果(参数长度>1){
out=新的PrintWriter(args[1]);
}否则{
out=新的PrintWriter(System.out);
}
PrintWriter xmlOut=null;
如果(参数长度>2){
xmlOut=新的PrintWriter(args[2]);
}
//创建CoreNLP管道。要构建默认管道,只需使用:
//StanfordCoreNLP管道=新的StanfordCoreNLP(道具);
//下面是一个更复杂的设置示例:
//Properties props=新属性();
//props.put(“注释器”、“标记化、ssplit、pos、引理、ner、depparse”);
//props.put(“ner.model”、“edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz”);
//props.put(“ner.applynumericsclassifiers”,“false”);
//StanfordCoreNLP管道=新的StanfordCoreNLP(道具);
//加上感情
Properties props=新属性();
props.setProperty(“注释器”、“标记化、ssplit、pos、引理、ner、解析、dcoref、情感”);
StanfordCoreNLP管道=新的StanfordCoreNLP(道具);
File[]files=新文件(“C:/stanford-corenlp-full-2016-10-31/dataset”).listFiles();
字符串行=null;
试一试{
用于(文件:文件){
if(file.exists()){
BufferedReader in=新的BufferedReader(新文件读取器(文件));
而((line=in.readLine())!=null)
{
注释文档=新注释(行);
//在此文本上运行所有选定的注释器
管道注释(文件);
//这会将句子分析的结果以良好的格式打印到文件中
管道。预打印(文件,输出);
如果(xmlOut!=null){
pipeline.xmlPrint(文档,xmlOut);
}
//注释是带有语言分析类型的类键的映射。
//您可以单独获取并使用各种分析。
//例如,它获取文本中第一个句子的解析树。
列出句子=document.get(coreanotations.SentencesAnnotation.class);
if(句!=null&!句.isEmpty()){
CoreMap语句=语句。获取(0);
/*println(“第一个句子的CoreMap的键是:”);
out.println(句子.keySet());
out.println();
out.println(“第一句是:”);
out.println(句子.toShorterString());
out.println();
println(“第一句标记是:”)*/
for(CoreMap标记:句子.get(coreanotations.TokensAnnotation.class)){
//out.println(token.toShorterString());
}
Tree-Tree=句子.get(TreeCoreAnnotations.TreeAnnotation.class);
//out.println();
//println(“第一句解析树是:”);
树。打印(输出);
//out.println();
//println(“第一句基本依赖项是:”);
//println(句子.get(SemanticGraphCoreAnnotations.BasicDependenceAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
//println(“第一句话倒了,CC处理的依赖项是:”);
SemanticGraph graph=句子.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenceAnnotation.class);
//println(graph.toString(SemanticGraph.OutputFormat.LIST));
//访问共同引用。在共同引用链接图中,
//每个连锁店都存储一组相互关联的提及,
//以及获得最具代表性提及的方法。
//句子和标记偏移量都从1开始!
//out.println(“共同引用信息”);
映射核心链=
get(corefcoreeannotations.CorefChainAnnotation.class);
如果(corefChains==null){return;}
对于(Map.Entry:corefChains.entrySet()){
//out.println(“链”+entry.getKey());
for(CorefChain.corefformation m:entry.getValue().getReferencesIntextualorder()){
//我们需要减去1,因为索引从1开始计数,但列表从0开始
列表标记=句子.get(m.sentNum-1).get(coreanotations.TokensAnnotation.class);