Java 如何通过使用斯坦福大学CoreNLP的审查获得总体情绪_Java_Stanford Nlp

Java 如何通过使用斯坦福大学CoreNLP的审查获得总体情绪

java stanford-nlp

Java 如何通过使用斯坦福大学CoreNLP的审查获得总体情绪,java,stanford-nlp,Java,Stanford Nlp,我使用斯坦福大学CoreNLP来获得25000篇电影评论的情感分析。然而，我已经获得了每一句话、每一篇评论的感悟，但我想知道是否有人知道我如何获得整体评论的感悟，而不是评论中的每一句话我使用的代码是： import java.io.*; import java.util.*; import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import ed

我使用斯坦福大学CoreNLP来获得25000篇电影评论的情感分析。然而，我已经获得了每一句话、每一篇评论的感悟，但我想知道是否有人知道我如何获得整体评论的感悟，而不是评论中的每一句话

我使用的代码是：

import java.io.*;
import java.util.*;

import edu.stanford.nlp.coref.CorefCoreAnnotations;

import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.io.*;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.*;

/** This class demonstrates building and using a Stanford CoreNLP pipeline. */
public class sentimentMain {

  /** Usage: java -cp "*" StanfordCoreNlpDemo [inputFile [outputTextFile [outputXmlFile]]] */
  public static void main(String[] args) throws IOException {
    // set up optional output files
    PrintWriter out;
    if (args.length > 1) {
      out = new PrintWriter(args[1]);
    } else {
      out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
      xmlOut = new PrintWriter(args[2]);
    }

    // Create a CoreNLP pipeline. To build the default pipeline, you can just use:
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // Here's a more complex setup example:
    //   Properties props = new Properties();
    //   props.put("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");
    //   props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
    //   props.put("ner.applyNumericClassifiers", "false");
    //   StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // Add in sentiment
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    File[] files = new File("C:/stanford-corenlp-full-2016-10-31/dataset").listFiles();

    String line = null;

    try{
        for (File file : files) {
            if (file.exists()) {
                BufferedReader in = new BufferedReader(new FileReader(file));
                while((line = in.readLine()) != null)
                {
                    Annotation document = new Annotation(line);

                    // run all the selected Annotators on this text
                    pipeline.annotate(document);

                    // this prints out the results of sentence analysis to file(s) in good formats
                    pipeline.prettyPrint(document, out);
                    if (xmlOut != null) {
                      pipeline.xmlPrint(document, xmlOut);
                    }

                    // An Annotation is a Map with Class keys for the linguistic analysis types.
                    // You can get and use the various analyses individually.
                    // For instance, this gets the parse tree of the first sentence in the text.
                    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
                    if (sentences != null && ! sentences.isEmpty()) {
                      CoreMap sentence = sentences.get(0);
                      /*out.println("The keys of the first sentence's CoreMap are:");
                      out.println(sentence.keySet());
                      out.println();
                      out.println("The first sentence is:");
                      out.println(sentence.toShorterString());
                      out.println();
                      out.println("The first sentence tokens are:");*/
                      for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        //out.println(token.toShorterString());
                      }
                      Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
                      //out.println();
                      //out.println("The first sentence parse tree is:");
                      tree.pennPrint(out);
                      //out.println();
                      //out.println("The first sentence basic dependencies are:");
                      //out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
                      //out.println("The first sentence collapsed, CC-processed dependencies are:");
                      SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
                      //out.println(graph.toString(SemanticGraph.OutputFormat.LIST));

                      // Access coreference. In the coreference link graph,
                      // each chain stores a set of mentions that co-refer with each other,
                      // along with a method for getting the most representative mention.
                      // Both sentence and token offsets start at 1!
                      //out.println("Coreference information");
                      Map<Integer, CorefChain> corefChains =
                          document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
                      if (corefChains == null) { return; }
                      for (Map.Entry<Integer,CorefChain> entry: corefChains.entrySet()) {
                        //out.println("Chain " + entry.getKey());
                        for (CorefChain.CorefMention m : entry.getValue().getMentionsInTextualOrder()) {
                          // We need to subtract one since the indices count from 1 but the Lists start from 0
                          List<CoreLabel> tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class);
                          // We subtract two for end: one for 0-based indexing, and one because we want last token of mention not one following.
                          /*out.println("  " + m + ", i.e., 0-based character offsets [" + tokens.get(m.startIndex - 1).beginPosition() +
                                  ", " + tokens.get(m.endIndex - 2).endPosition() + ")");*/
                        }
                      }
                      //out.println();
                      out.println("The first sentence overall sentiment rating is " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
                    }
                }
                in.close();
                //showFiles(file.listFiles()); // Calls same method again.
            } else {
                System.out.println("File: " + file.getName() + file.toString());
            }
        }
    }catch(NullPointerException e){
        e.printStackTrace();
    }
    IOUtils.closeIgnoringExceptions(out);
    IOUtils.closeIgnoringExceptions(xmlOut);
  }

}

import java.io.*；
导入java.util.*；
导入edu.stanford.nlp.coref.corefcore注释；
导入edu.stanford.nlp.coref.data.CorefChain；
导入edu.stanford.nlp.io.*；
导入edu.stanford.nlp.ling.*；
导入edu.stanford.nlp.pipeline.*；
导入edu.stanford.nlp.semgraph.SemanticGraph；
导入edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations；
导入edu.stanford.nlp.touction.mountain注释；
导入edu.stanford.nlp.trees.*；
导入edu.stanford.nlp.util.*；
/**本课程演示如何构建和使用斯坦福CoreNLP管道*/
公众阶级{
/**用法：java-cp“*”StanfordCoreNlpDemo[inputFile[OutputExtFile[outputXmlFile]]*/
公共静态void main（字符串[]args）引发IOException{
//设置可选的输出文件
打印输出；
如果（参数长度>1）{
out=新的PrintWriter（args[1]）；
}否则{
out=新的PrintWriter（System.out）；
}
PrintWriter xmlOut=null；
如果（参数长度>2）{
xmlOut=新的PrintWriter（args[2]）；
}
//创建CoreNLP管道。要构建默认管道，只需使用：
//StanfordCoreNLP管道=新的StanfordCoreNLP（道具）；
//下面是一个更复杂的设置示例：
//Properties props=新属性（）；
//props.put（“注释器”、“标记化、ssplit、pos、引理、ner、depparse”）；
//props.put（“ner.model”、“edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz”）；
//props.put（“ner.applynumericsclassifiers”，“false”）；
//StanfordCoreNLP管道=新的StanfordCoreNLP（道具）；
//加上感情
Properties props=新属性（）；
props.setProperty（“注释器”、“标记化、ssplit、pos、引理、ner、解析、dcoref、情感”）；
StanfordCoreNLP管道=新的StanfordCoreNLP（道具）；
File[]files=新文件（“C:/stanford-corenlp-full-2016-10-31/dataset”）.listFiles（）；
字符串行=null；
试一试{
用于（文件：文件）{
if（file.exists（））{
BufferedReader in=新的BufferedReader（新文件读取器（文件））；
而（（line=in.readLine（））！=null）
{
注释文档=新注释（行）；
//在此文本上运行所有选定的注释器
管道注释（文件）；
//这会将句子分析的结果以良好的格式打印到文件中
管道。预打印（文件，输出）；
如果（xmlOut！=null）{
pipeline.xmlPrint（文档，xmlOut）；
}
//注释是带有语言分析类型的类键的映射。
//您可以单独获取并使用各种分析。
//例如，它获取文本中第一个句子的解析树。
列出句子=document.get（coreanotations.SentencesAnnotation.class）；
if（句！=null&！句.isEmpty（））{
CoreMap语句=语句。获取（0）；
/*println（“第一个句子的CoreMap的键是：”）；
out.println（句子.keySet（））；
out.println（）；
out.println（“第一句是：”）；
out.println（句子.toShorterString（））；
out.println（）；
println（“第一句标记是：”）*/
for（CoreMap标记：句子.get（coreanotations.TokensAnnotation.class））{
//out.println（token.toShorterString（））；
}
Tree-Tree=句子.get（TreeCoreAnnotations.TreeAnnotation.class）；
//out.println（）；
//println（“第一句解析树是：”）；
树。打印（输出）；
//out.println（）；
//println（“第一句基本依赖项是：”）；
//println（句子.get（SemanticGraphCoreAnnotations.BasicDependenceAnnotation.class）.toString（SemanticGraph.OutputFormat.LIST））；
//println（“第一句话倒了，CC处理的依赖项是：”）；
SemanticGraph graph=句子.get（SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenceAnnotation.class）；
//println（graph.toString（SemanticGraph.OutputFormat.LIST））；
//访问共同引用。在共同引用链接图中，
//每个连锁店都存储一组相互关联的提及，
//以及获得最具代表性提及的方法。
//句子和标记偏移量都从1开始！
//out.println（“共同引用信息”）；
映射核心链=
get（corefcoreeannotations.CorefChainAnnotation.class）；
如果（corefChains==null）{return；}
对于（Map.Entry:corefChains.entrySet（））{
//out.println（“链”+entry.getKey（））；
for（CorefChain.corefformation m:entry.getValue（）.getReferencesIntextualorder（））{
//我们需要减去1，因为索引从1开始计数，但列表从0开始
列表标记=句子.get（m.sentNum-1）.get（coreanotations.TokensAnnotation.class）；