Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/EmptyTag/155.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 删除停止字不能正确标记_Java_String - Fatal编程技术网

Java 删除停止字不能正确标记

Java 删除停止字不能正确标记,java,string,Java,String,我使用java程序删除word文件中的停止字。但我的停止字删除并没有删除特殊字符。我想从文件中删除所有停止字和其他不必要的字、特殊字符和数字 如何改进此代码 { String[] stopwords ={"a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all"

我使用java程序删除word文件中的停止字。但我的停止字删除并没有删除特殊字符。我想从文件中删除所有停止字和其他不必要的字、特殊字符和数字

如何改进此代码

             {                                         
    String[] stopwords ={"a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", 
            "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount",  "an", "and", 
            "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as",  "at", "back","be","became", 
            "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", 
            "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt",
            "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else",
            "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", 
            "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", 
            "front", "full", "further", "get", "give", "go", "had", "has", "hasnt",
            "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", 
            "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", 
            "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", 
            "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", 
            "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", 
            "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", 
            "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps",
            "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she",
            "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", 
            "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", 
            "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", 
            "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", 
            "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", 
            "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever",
            "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", 
            "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet",
            "you", "your", "yours", "yourself", "yourselves","1","2","3","4","5","6","7","8","9","10","1.","2.","3.","4.","5.","6.","11",
            "7.","8.","9.","12","13","14","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",
            "terms","CONDITIONS","conditions","values","interested.","care","sure",".","!","@","#","$","%","^","&","*","(",")","{","}","[","]",":",";",",","<",".",">","/","?","_","-","+","=",
            "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z",
            "contact","grounds","buyers","tried","said,","plan","value","principle.","forces","sent:","is,","was","like",
            "discussion","tmus","diffrent.","layout","area.","thanks","thankyou","hello","bye","rise","fell","fall","psqft.","http://","km","miles"};

    try
    {
        Scanner fip1=new Scanner(new File("c:/removewords.txt"));
        FileOutputStream out=new FileOutputStream("c:/StopWords.txt");

      while(fip1.hasNext())
      {
          int flag=1;
          String s1=fip1.next();
          s1=s1.toLowerCase();
          for(int i=0;i<stopwords.length;i++){
              if(s1.equals(stopwords[i]))
                  {
                  flag=0;
                  }
          }
          if(flag!=0)
          {
              System.out.println(s1);


              PrintStream p=new PrintStream(out);
              p.println(s1);   
          }



      }
      JOptionPane.showMessageDialog(null,"STOP WORD REMOVAL IS DONE");        
      }
      catch(Exception e){
          System.err.println("cannot read file");
      }
}                                        
{
String[]stopwords={“a”,“about”,“over”,“over”,“over”,“over”,“after”,“after”,“reach”,“against”,“all”,“small”,
“单独”、“沿着”、“已经”、“也”、“虽然”、“始终”、“am”、“中间”、“中间”、“最大”、“金额”、“安”、“和”,
“另一个”、“任何”、“无论如何”、“任何人”、“任何事”、“无论如何”、“任何地方”、“都在”、“周围”、“作为”、“在”、“在”、“回来”、“在”、“成为”,
“因为”、“成为”、“成为”、“成为”、“曾经”、“之前”、“之前”、“之后”、“存在”、“下面”、“旁边”、“除了”,
“中间”、“超出”、“账单”、“两者”、“底部”、“但是”、“通过”、“呼叫”、“可以”、“不能”、“不能”、“合作”、“合作”、“可以”、“不能”,
“哭”、“德”、“描述”、“细节”、“做”、“完成”、“结束”、“到期”、“期间”、“每个”、“例如”、“八个”、“任一个”、“十一个”、“其他”,
“别处”、“空”、“足够”、“等”、“甚至”、“曾经”、“每一个”、“每个人”、“一切”、“到处”、“除了”、“很少”,
“十五”,“五”,“补”,“找”,“火”,“第一”,“五”,“为”,“以前”,“以前”,“四十”,“找到”,“四”,“从”,
“前”、“满”、“进”、“得”、“给”、“去”、“有”、“有”、“没有”,
“有”、“他”、“因此”、“她”、“这里”、“以后”、“在此”、“在此”、“在此”、“在此”、“在此”、“她的”、“她自己”,
“他”、“他”、“他的”、“如何”、“然而”、“百”、“ie”、“如果”、“在”、“公司”、“确实”、“利益”、“进入”,
“是”、“它”、“它”、“自身”、“保持”、“最后”、“后”、“最近”、“最少”、“较少”、“有限”、“制造”、“许多”,
“可能”、“我”、“同时”、“可能”、“磨坊”、“我的”、“更多”、“此外”、“大多数”、“大部分”、“移动”、“很多”、“必须”,
“我的”、“我自己”、“名字”、“即”、“既没有”、“从来没有”、“不过”、“下一个”、“九个”、“没有”、“没有人”、“没有”,
“没有”、“没有”、“没有”、“没有”、“现在”、“无处”、“的”、“关闭”、“经常”、“打开”、“一次”、“一次”、“唯一”、“打开”,
“或”、“其他”、“其他”、“其他”、“我们的”、“我们的”、“我们自己”、“退出”、“结束”、“拥有”、“部分”、“个人”、“可能”,
“请”、“放”、“相当”、“再”、“相同”、“看”、“似乎”、“似乎”、“似乎”、“严重”、“几个”、“她”,
“应该”、“秀”、“侧”、“自”、“诚”、“六”、“六十”、“所以”、“有些”、“不知何故”、“某人”、“某物”,
“有时”、“有时”、“某地”、“静止”、“这样”、“系统”、“取”、“十”、“比”、“那”、“那”、“他们的”,
“他们”、“他们自己”、“然后”、“从那里”、“那里”、“之后”、“因此”、“因此”、“其中”、“因此”,
“这些”、“他们”、“厚”、“薄”、“三”、“这个”、“那些”、“虽然”、“三”、“通过”、“贯穿”、“通过”,
“因此”、“到”、“一起”、“太”、“顶”、“朝”、“朝”、“十二”、“二十”、“二”、“联合国”、“下”、“直到”,
“向上”,“向上”,“我们”,“非常”,“通过”,“是”,“我们”,“好”,“是”,“什么”,“无论什么”,“何时”,“何地”,“何时”,
“where”、“where”、“where”、“where”、“where”、“where”、“where”、“where”、“which”、“while”,
“何人”、“何人”、“整体”、“何人”、“何人”、“何人”、“为什么”、“将”、“有”、“在”、“在”、“没有”、“会”、“还没有”,
“你”、“你的”、“你的”、“你自己”、“你自己”、“1”、“2”、“3”、“4”、“5”、“6”、“7”、“8”、“9”、“10”、“1”、“2”、“3”、“4”、“5”、“6”、“11”,
“7”,“8”,“9”,“12”,“13”,“14”,“A”,“B”,“C”,“D”,“E”,“F”,“G”,“H”,“I”,“J”,“K”,“L”,“M”,“N”,“O”,“P”,“Q”,“R”,“S”,“T”,“U”,“V”,“W”,“X”,“Y”,“Z”,
“条款”、“条件”、“条件”、“价值”、“感兴趣的”、“关心”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“肯定”、“,
“a”,“b”,“c”,“d”,“e”,“f”,“g”,“h”,“i”,“j”,“k”,“l”,“m”,“n”,“o”,“p”,“q”,“r”,“s”,“t”,“u”,“v”,“w”,“x”,“y”,“z”,
“联系”、“理由”、“买家”、“试过”、“说过”、“计划”、“价值”、“原则”、“力量”、“发送”、“是”、“是”、“是”、“像”,
“讨论”,“TMU”,“差异”,“布局”,“区域”,“谢谢”,“谢谢”,“你好”,“再见”,“上升”,“下降”,“下降”,“psqft”,“http://”,“公里”,“英里”};
尝试
{
Scanner fip1=新扫描仪(新文件(“c:/removewords.txt”);
FileOutputStream out=新的FileOutputStream(“c:/StopWords.txt”);
while(fip1.hasNext())
{
int标志=1;
字符串s1=fip1.next();
s1=s1.toLowerCase();

对于(int i=0;i也许您可以看看它:

//输入字符串
String input=“请删除我的停止词!”;
//标记输入字符串
TokenStream TokenStream=新的ClassicTokenizer(Version.LUCENE_35,新的StringReader(输入));
//删除停止词
tokenStream=新的StopFilter(Version.LUCENE_35,tokenStream,EnglishAnalyzer.getDefaultStopSet());
//检索剩余的令牌
Set tokens=new HashSet();
CharterMattAttribute令牌=tokenStream.getAttribute(charterMattAttribute.class);
试一试{
while(tokenStream.incrementToken()){
添加(token.toString());
}
}捕获(IOE异常){
//日志
}
//从现在起,令牌只包含不间断的单词

减少字符串数组!!我们也可以使用小数组。您可能应该使用集合而不是字符串数组,以使比较速度更合理。布尔值(
标志
)使用
布尔值
而不是
int
)。如何将这些词存储在一个集合中?查看此项,了解从文件中删除的方法。此项用于删除停止词?或存储停止词?将其从输入字符串中删除。这是您需要的,我错了吗?