Java 使用Stanford CoreNLP情绪分析时输出不正确
当我输入句子时: “能回来真是太激动了!我们在这里重新联系并认识新朋友 ghc16的创新者” 然后返回的情绪是负面的。我不明白为什么会这样。该语句为正值,但仍返回负值Java 使用Stanford CoreNLP情绪分析时输出不正确,java,stanford-nlp,sentiment-analysis,Java,Stanford Nlp,Sentiment Analysis,当我输入句子时: “能回来真是太激动了!我们在这里重新联系并认识新朋友 ghc16的创新者” 然后返回的情绪是负面的。我不明白为什么会这样。该语句为正值,但仍返回负值 class SentimentAnalyzer { public TweetWithSentiment findSentiment(String line) { if(line == null || line.isEmpty()) { throw new Illega
class SentimentAnalyzer {
public TweetWithSentiment findSentiment(String line) {
if(line == null || line.isEmpty()) {
throw new IllegalArgumentException("The line must not be null or empty.");
}
Annotation annotation = processLine(line);
int mainSentiment = findMainSentiment(annotation);
if(mainSentiment < 0 || mainSentiment > 4) { //You should avoid magic numbers like 2 or 4 try to create a constant that will provide a description why 2
return null; //You should avoid null returns
}
TweetWithSentiment tweetWithSentiment = new TweetWithSentiment(line, toCss(mainSentiment));
return tweetWithSentiment;
}
private String toCss(int sentiment) {
switch (sentiment) {
case 0:
return "very negative";
case 1:
return "negative";
case 2:
return "neutral";
case 3:
return "positive";
case 4:
return "very positive";
default:
return "default";
}
}
private int findMainSentiment(Annotation annotation) {
int mainSentiment = Integer.MIN_VALUE;
int longest = Integer.MIN_VALUE;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
System.out.println("word: " + word);
System.out.println("pos: " + pos);
System.out.println("ne: " + ne);
System.out.println("Lemmas: " + lemma);
}
int sentenceLength = String.valueOf(sentence).length();
if(sentenceLength > longest) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
mainSentiment = RNNCoreAnnotations.getPredictedClass(tree);
longest = sentenceLength ;
}
}
return mainSentiment;
}
private Annotation processLine(String line) {
StanfordCoreNLP pipeline = createPieline();
return pipeline.process(line);
}
private StanfordCoreNLP createPieline() {
Properties props = createPipelineProperties();
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
return pipeline;
}
private Properties createPipelineProperties() {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, sentiment");
return props;
}
}
类分析器{
公共TweetWithEntity FindEntity(字符串行){
if(line==null | | line.isEmpty()){
抛出新的IllegalArgumentException(“行不能为null或空”);
}
注释=processLine(行);
int main情绪=FindMain情绪(注释);
如果(mainthemation<0 | | mainthemation>4){//您应该避免使用像2或4这样的神奇数字,尝试创建一个常量来描述为什么为2
return null;//应该避免null返回
}
TweetWithSession TweetWithSession=新的TweetWithSession(行、TOCS(主情感));
返回和保存;
}
私有字符串TOCS(整型){
切换(情绪){
案例0:
返回“非常负面”;
案例1:
返回“否定”;
案例2:
返回“中立”;
案例3:
返回“正”;
案例4:
返回“非常积极”;
违约:
返回“默认”;
}
}
private int FindMainMotion(注释){
int=Integer.MIN_值;
int longest=Integer.MIN\u值;
for(CoreMap语句:annotation.get(coreanotations.SentencesAnnotation.class)){
for(CoreLabel标记:句子.get(CoreAnnotations.TokensAnotation.class)){
String word=token.get(CoreAnnotations.TextAnnotation.class);
String pos=token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
字符串ne=token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
字符串lemma=token.get(CoreAnnotations.LemmaAnnotation.class);
System.out.println(“word:+word”);
系统输出打印项次(“pos:+pos”);
System.out.println(“ne:+ne”);
System.out.println(“引理:+引理);
}
int sentenceLength=String.valueOf(句子).length();
如果(句子长度>最长){
Tree-Tree=句子.get(感伤核心注释.感伤注释树.class);
main=rnncorenanctions.getPredictedClass(树);
最长=句子长度;
}
}
回归主流情绪;
}
专用注释处理行(字符串行){
StanfordCoreNLP管道=createPieline();
回流管线、工艺(管线);
}
私有的StanfordCoreNLP createPieline(){
Properties props=createPipelineProperties();
StanfordCoreNLP管道=新的StanfordCoreNLP(道具);
回流管道;
}
私有属性createPipelineProperties(){
Properties props=新属性();
props.setProperty(“注释器”、“标记化、ssplit、pos、引理、ner、解析、情感”);
返回道具;
}
}
这是技术限制的另一种情况,主要针对某些特定点:
很高兴能回来代码>->正
->我们在这里重新连接到中立的
->中立在ghc16会见新的创新者
建议:
如果没有合适的,考虑切换到另一个。
这是否意味着‘&’被解释为一个否定,还有其他符号有相似的问题吗?不是否定的,而是他们的存在是混淆的,软件可能不理解什么是手段。这种混乱使nlp倾向于负面路径(没有你,这会使它更加混乱)。要点是:如果可以,请更换特殊字符。:)