Java 2个文件中的字频计数

Java 2个文件中的字频计数,java,count,Java,Count,我已经编写了Java代码来计算事件总数。它使用2个.txt文件作为输入,并提供单词和频率作为输出 我还想打印,哪个文件包含给定单词多少次。你知道怎么做吗 public class JavaApplication2 { public static void main(String[] args) throws IOException { Path filePath1 = Paths.get("test.txt"); Path filePa

我已经编写了Java代码来计算事件总数。它使用2个
.txt
文件作为输入,并提供单词和频率作为输出

我还想打印,哪个文件包含给定单词多少次。你知道怎么做吗

public class JavaApplication2
{

    public static void main(String[] args) throws IOException
    {     
        Path filePath1 = Paths.get("test.txt");
        Path filePath2 = Paths.get("test2.txt");

        Scanner readerL = new Scanner(filePath1);
        Scanner readerR = new Scanner(filePath2);

        String line1 = readerL.nextLine();
        String line2 = readerR.nextLine();

        String text = new String();
        text=text.concat(line1).concat(line2);

        String[] keys = text.split("[!.?:;\\s]");
        String[] uniqueKeys;
        int count = 0;
        System.out.println(text);
        uniqueKeys = getUniqueKeys(keys);

        for(String key: uniqueKeys)
        {
            if(null == key)
            {
                break;
            }           
            for(String s : keys)
            {
                if(key.equals(s))
                {
                    count++;
                }               
            }
            System.out.println("["+key+"] frequency : "+count);
            count=0;
        }
    }

    private static String[] getUniqueKeys(String[] keys)
    {
        String[] uniqueKeys = new String[keys.length];

        uniqueKeys[0] = keys[0];
        int uniqueKeyIndex = 1;
        boolean keyAlreadyExists = false;

        for(int i=1; i<keys.length ; i++)
        {
            for(int j=0; j<=uniqueKeyIndex; j++)
            {
                if(keys[i].equals(uniqueKeys[j]))
                {
                    keyAlreadyExists = true;
                }
            }           

            if(!keyAlreadyExists)
            {
                uniqueKeys[uniqueKeyIndex] = keys[i];
                uniqueKeyIndex++;               
            }
            keyAlreadyExists = false;
        }       
        return uniqueKeys;
    }
公共类JavaApplication2
{
公共静态void main(字符串[]args)引发IOException
{     
Path filePath1=Path.get(“test.txt”);
Path filePath2=Path.get(“test2.txt”);
Scanner readerL=新扫描仪(filePath1);
Scanner readerR=新扫描仪(filePath2);
字符串line1=readerL.nextLine();
字符串line2=readerR.nextLine();
字符串文本=新字符串();
text=text.concat(第1行)。concat(第2行);
String[]keys=text.split(“[!.?:;\\s]”);
字符串[]唯一键;
整数计数=0;
System.out.println(文本);
uniquekey=getuniquekey(键);
for(字符串键:唯一键)
{
if(null==键)
{
打破
}           
用于(字符串s:键)
{
如果(键等于)
{
计数++;
}               
}
系统输出打印项次(“[”+键+“]频率:“+计数);
计数=0;
}
}
私有静态字符串[]getUniqueKeys(字符串[]键)
{
String[]uniqueKeys=新字符串[keys.length];
唯一键[0]=键[0];
int uniqueKeyIndex=1;
布尔值keyAlreadyExists=false;

对于(inti=1;i,首先,不要使用数组作为唯一键,而是使用
HashMap
。这样效率更高

最好的选择是分别对每行/文件运行处理,并分别存储这些计数。然后合并这两个计数以获得总体频率

更多详细信息:

String[] keys = text.split("[!.?:;\\s]");
HashMap<String,Integer> uniqueKeys = new HashMap<>();

for(String key : keys){
    if(uniqueKeys.containsKey(key)){
        // if your keys is already in map, increment count of it
        uniqueKeys.put(key, uniqueKeys.get(map) + 1);
    }else{
        // if it isn't in it, add it
        uniqueKeys.put(key, 1);
    }
}

// You now have the count of all unique keys in a given text
// To print them to console

for(Entry<String, Integer> keyCount : uniqueKeys.getEntrySet()){
    System.out.println(keyCount.getKey() + ": " + keyCount.getValue());
}

// To merge, if you're using Java 8

for(Entry<String, Integer> keyEntry : uniqueKeys1.getEntrySet()){
    uniqueKeys2.merge(keyEntry.getKey(), keyEntry.getValue(), Integer::add);
}

// To merge, otherwise

for(Entry<String, Integer> keyEntry : uniqueKeys1.getEntrySet()){
    if(uniqueKeys2.containsKey()){
        uniqueKeys2.put(keyEntry.getKey(),
            uniqueKeys2.get(keyEntry.getKey()) + keyEntry.getValue());
    }else{
        uniqueKeys2.put(keyEntry.getKey(), keyEntry.getValue());
    }
}
String[]keys=text.split(“[!.?:;\\s]”);
HashMap uniqueKeys=新HashMap();
用于(字符串键:键){
if(唯一键。容器键(键)){
//如果您的关键点已经在地图中,请增加它的计数
uniqueKeys.put(key,uniqueKeys.get(map)+1);
}否则{
//如果它不在其中,请添加它
唯一键。输入(键,1);
}
}
//您现在拥有给定文本中所有唯一键的计数
//将它们打印到控制台
for(Entry keyCount:uniqueKeys.getEntrySet()){
System.out.println(keyCount.getKey()+”:“+keyCount.getValue());
}
//要合并,如果您使用的是Java8
for(Entry-keyEntry:uniqueKeys1.getEntrySet()){
uniqueKeys2.merge(keyEntry.getKey(),keyEntry.getValue(),Integer::add);
}
//合并,否则
for(Entry-keyEntry:uniqueKeys1.getEntrySet()){
if(uniqueKeys2.containsKey()){
uniqueKeys2.put(keyEntry.getKey(),
uniqueKeys2.get(keyEntry.getKey())+keyEntry.getValue());
}否则{
uniqueKeys2.put(keyEntry.getKey(),keyEntry.getValue());
}
}

更新:单词出现的代码(谢谢@George)

此示例适用于一个文件,您可以将其用于多个文件:

public class MyTest {

    Map<String,Integer> mapTable;

    public MyTest(List<String> wordList){
        //initialize map
        makeMap(wordList);
    }

    public void makeMap(List<String> wordList){
        mapTable = new HashMap();

        for(int i = 0; i < wordList.size(); i++){
            //fill the map up
            mapTable.put(wordList.get(i), 0);
        }
    }

    //update occurences in a map
    public void updateMap(String [] _words){
        for(int i = 0; i < _words.length; i++){
            updateWordCount(_words[i]);
        }
    }

    public void updateWordCount(String _word){
        int value = 0;
        //check if a word present
        if(mapTable.containsKey(_word)){
            value = mapTable.get(_word);
            value++;
            mapTable.put(_word, value);
        }
    }

    public void DisplayCounts(){
        for( String key : mapTable.keySet()){
            System.out.println("Word : "+key+"\t Occurrence(s) :"+mapTable.get(key));
        }
    }

    public void getWordCount(){
        String filePath = "C:\\Users\\Jyo\\Desktop\\help.txt";
        String line = "";

        try {
            // FileReader reads text files in the default encoding.
            FileReader fileReader = new FileReader(filePath);

            // Always wrap FileReader in BufferedReader.
            BufferedReader bufferedReader = new BufferedReader(fileReader);

            String _words[] = null;

            while((line = bufferedReader.readLine()) != null) {
                System.out.println(line);

                _words = line.split(" ");

                updateMap(_words);
            }    

            // Always close files.
            bufferedReader.close();  
        } catch (Exception e) {

            System.out.println("Error :"+e.getMessage());
        }
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        // TODO code application logic here
        List<String> wordList = new ArrayList<>();
        wordList.add("data");
        wordList.add("select");
        MyTest mt = new MyTest(wordList);
        mt.getWordCount();
        mt.DisplayCounts();


    }

}
公共类MyTest{
地图映射表;
公共MyTest(列表字列表){
//初始化映射
makeMap(单词列表);
}
公共void makeMap(列表单词列表){
mapTable=新的HashMap();
对于(int i=0;i
导入java.io。; 导入java.util

公共类文件1{
公共静态void main(字符串[]args)引发异常{
HashMap words_fre=新HashMap();
HashSet words=新的HashSet();
试试{
File folder=新文件(“/home/jsrathore/Dropbox/sement 6th/IR_Lab/Lab_01/one”);
File[]listOfFiles=folder.listFiles();
BufferedReader BufferedReader=null;
FileInputStream inputfilename=null;
BufferedWriter out=新的BufferedWriter(新的OutputStreamWriter(新的FileOutputStream(“outfilename.txt”,false),“UTF-8”);
对于(文件:listOfFiles){
inputfilename=新文件InputStream(文件);
/*System.out.println(文件);*/
浅黄色
public class file1{
 public static void main(String[] args) throws Exception{
HashMap<String,Integer> words_fre = new HashMap<String,Integer>();
HashSet<String> words = new HashSet<String>();
try{  

       File folder = new File("/home/jsrathore/Dropbox/Semester 6th/IR_Lab/lab_01/one");
       File[] listOfFiles = folder.listFiles();

       BufferedReader bufferedReader=null;
       FileInputStream inputfilename=null;
       BufferedWriter out= new BufferedWriter(new OutputStreamWriter(new FileOutputStream("outfilename.txt",false), "UTF-8"));

        for(File file : listOfFiles){           
           inputfilename= new FileInputStream(file); 
           /*System.out.println(file); */    
           bufferedReader= new BufferedReader(new InputStreamReader(inputfilename, "UTF-8"));


             String s;
             while((s = bufferedReader.readLine()) != null){
               /*System.out.println(line);*/
                  s = s.replaceAll("\\<.*?>"," ");
                    if(s.contains("॥") || s.contains(":")|| s.contains("।")|| 
                     s.contains(",")|| s.contains("!")|| s.contains("?")){
                         s=s.replace("॥"," ");
                         s=s.replace(":"," ");
                         s=s.replace("।"," ");
                         s=s.replace(","," ");
                         s=s.replace("!"," ");
                         s=s.replace("?"," ");
                       }                                                   
                  StringTokenizer st = new StringTokenizer(s," ");
                  while (st.hasMoreTokens()) {         
                  /*out.write(st.nextToken()+"\n");*/
                  String str=(st.nextToken()).toString();
                  words.add(str);
                }
                for(String str : words){
                  if(words_fre.containsKey(str)){  
                           int a = words_fre.get(str);  
                           words_fre.put(str,a+1);             
                  }else{  
                      words_fre.put(str,1);/*uwords++;//unique words count */  
                  }                      
                }
                words.clear(); 

                  /*out.write("\n");
                  out.close();*/

             }             
             Object[] key =   words_fre.keySet().toArray();   
                  Arrays.sort(key);  
                  for (int i = 0; i < key.length; i++) {  
                    //System.out.println(key[i]+"= "+words_fre.get(key[i]));
                 out.write(key[i]+" : "+words_fre.get(key[i]) +"\n");
               }


         }

            out.close();
            bufferedReader.close();

      }catch(FileNotFoundException ex){
         System.out.println("Error in reading line");
        }catch(IOException ex){
            /*System.out.println("Error in reading line"+fileReader );*/
            ex.printStackTrace();
           }
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;

public class WordCounter implements Runnable {
    private final Scanner scanner;
    private Map<String, AtomicLong> sharedCounter;

    public WordCounter(Scanner scanner, Map<String, AtomicLong> sharedCounter) {
        this.scanner = scanner;
        this.sharedCounter = sharedCounter;
    }

    public void run() {
        if (scanner == null) {
            return;
        }

        while (scanner.hasNext()) {
            String word = scanner.next().toLowerCase();
            sharedCounter.putIfAbsent(word, new AtomicLong(0));
            sharedCounter.get(word).incrementAndGet();
        }
    }

    public static void main(String[] args) throws IOException {
        // Number of parallel thread to run
        int THREAD_COUNT = 10;

        List<Path> paths = new ArrayList<>();
        // Add path
        paths.add(Paths.get("test1.txt"));
        paths.add(Paths.get("test2.txt"));

        // Shared word counter
        Map<String, AtomicLong> sharedCounter = new ConcurrentHashMap<>();

        ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);

        for (Path path : paths) {
            executor.execute(new WordCounter(new Scanner(path), sharedCounter));
        }
        executor.shutdown();
        // Wait until all threads are finish
        while (!executor.isTerminated()) {
        }
        System.out.println(sharedCounter);
    }
}