Java 使用producer读取大量文件会导致CPU使用率达到100%

Java 使用producer读取大量文件会导致CPU使用率达到100%,java,multithreading,file,cpu-usage,producer-consumer,Java,Multithreading,File,Cpu Usage,Producer Consumer,我编写了一个简单的消费者-生产者模式来帮助我完成以下任务: 从包含约500000个TSV(制表符分隔)文件的目录中读取文件 将每个文件操纵到数据结构中,并将其放入阻塞队列中 使用使用者和查询数据库使用队列 比较两个哈希映射,如果有差异,将差异打印到文件中 当我运行这个程序时,即使有5个线程,我的CPU消耗也会飙升到100%。这可能是因为我使用一个制作人来读取文件吗 文件示例(制表符分隔) 制作人 public class Producer implements Runnable{ private

我编写了一个简单的消费者-生产者模式来帮助我完成以下任务:

  • 从包含约500000个TSV(制表符分隔)文件的目录中读取文件
  • 将每个文件操纵到数据结构中,并将其放入阻塞队列中
  • 使用使用者和查询数据库使用队列
  • 比较两个哈希映射,如果有差异,将差异打印到文件中
  • 当我运行这个程序时,即使有5个线程,我的CPU消耗也会飙升到100%。这可能是因为我使用一个制作人来读取文件吗

    文件示例(制表符分隔)

    制作人

    public class Producer implements Runnable{
    private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
    private String m_Directory;
    
    public Producer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, String i_Directory)
    {
        m_Queue = i_Queue;
        m_Directory = i_Directory;
    }
    
    @Override
    public void run()
    {
        if (Files.exists(Paths.get(m_Directory)))
        {
            File[] files = new File(m_Directory).listFiles();
    
            if (files != null)
            {
                for (File file : files)
                {
                    Map<String, String> map = new HashMap<>();
                    try (BufferedReader reader = new BufferedReader(new FileReader(file)))
                    {
                        String line, lastcolumn3 = "", column1 = "", column2 = "", column3 = "";
                        while ((line = reader.readLine()) != null)
                        {
                            //Skip column header
                            if (!Character.isLetter(line.charAt(0)))
                            {
                                String[] splitLine = line.split("\t");
    
                                column1 = splitLine[0].replace("\"", "");
                                column2 = splitLine[1].replace("\"", "");
                                column3 = splitLine[2].replace("\"", "");
    
                                if (!lastcolumn3.equals(column3))
                                {
                                    map.put(column3, column1);
                                    lastcolumn3 = column3;
                                }
                            }
                        }
    
                        map.put(column3, column1);
    
                        //Column 1 is always the same per file, it'll be the key. Column2 and Column3 are stored as the value (as a key-value pair)
                        Map<String, Map<String, String>> mapPerFile = new HashMap<>();
                        mapPerFile.put(column2, map);
    
                        m_Queue.put(mapPerFile);
                    }
                    catch (IOException | InterruptedException e)
                    {
                        System.out.println(file);
                        e.printStackTrace();
                    }
                }
            }
        }
    }}
    
    public class Consumer implements Runnable{
    private HashMap<String, String> m_DBResults;
    private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
    private Map<String, Map<String, String>> m_DBResultsPerFile;
    private String m_Column1;
    private int m_ThreadID;
    
    public Consumer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, int i_ThreadID)
    {
        m_Queue = i_Queue;
        m_ThreadID = i_ThreadID;
    }
    
    @Override
    public void run()
    {
        try
        {
            while ((m_DBResultsPerFile = m_Queue.poll()) != null)
            {
                //Column1 is always the same, only need the first entry.
                m_Column1 = m_DBResultsPerFile.keySet().toArray()[0].toString();
    
                //Queries DB and puts returned data into m_DBResults
                queryDB(m_Column1);
    
                //Write the difference, if any, per thread into a file.
                writeDifference();
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
    }
    
    private void writeDifference()
    {
        MapDifference<String, String> difference = Maps.difference(m_DBResultsPerFile.get(m_Column1), m_DBResults);
    
        if (difference.entriesOnlyOnLeft().size() > 0 || difference.entriesOnlyOnRight().size() > 0)
        {
            try (BufferedWriter writer = new BufferedWriter(new FileWriter(String.format("thread_%d.tsv", m_ThreadID), true)))
            {
                if (difference.entriesOnlyOnLeft().size() > 0)
                {
                    writer.write(String.format("%s\t%s\t", "Missing", m_Column1));
                    for (Map.Entry<String, String> entry : difference.entriesOnlyOnLeft().entrySet())
                    {
                        writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
                    }
    
                    writer.write("\n");
                }
                if (difference.entriesOnlyOnRight().size() > 0)
                {
                    writer.write(String.format("%s\t%s\t", "Extra", m_Column1));
                    for (Map.Entry<String, String> entry : difference.entriesOnlyOnRight().entrySet())
                    {
                        writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
                    }
    
                    writer.write("\n");
                }
            }
            catch (IOException e)
            {
                e.printStackTrace();
            }
        }
    }}
    
    公共类生成器实现可运行{
    私有阻塞队列;
    私有字符串m_目录;
    公共生产者(阻塞队列i_队列、字符串i_目录)
    {
    m_队列=i_队列;
    m_目录=i_目录;
    }
    @凌驾
    公开募捐
    {
    if(Files.exists(path.get(m_目录)))
    {
    File[]files=新文件(m_目录).listFiles();
    如果(文件!=null)
    {
    用于(文件:文件)
    {
    Map Map=newhashmap();
    try(BufferedReader=new BufferedReader(new FileReader(file)))
    {
    字符串行,lastcolumn3=“”,column1=“”,column2=“”,column3=“”;
    而((line=reader.readLine())!=null)
    {
    //跳过列标题
    if(!Character.isleter(line.charAt(0)))
    {
    String[]splitLine=line.split(“\t”);
    column1=分割线[0]。替换(“\”,”);
    column2=分割线[1]。替换(“\”,”);
    column3=分割线[2]。替换(“\”,”);
    如果(!lastcolumn3.等于(column3))
    {
    map.put(第3列,第1列);
    lastcolumn3=column3;
    }
    }
    }
    map.put(第3列,第1列);
    //每个文件的列1总是相同的,它将是键。列2和列3存储为值(作为键值对)
    Map mapPerFile=新建HashMap();
    mapPerFile.put(第2列,map);
    m_Queue.put(mapPerFile);
    }
    捕获(IOException | InterruptedException e)
    {
    System.out.println(文件);
    e、 printStackTrace();
    }
    }
    }
    }
    }}
    
    消费者

    public class Producer implements Runnable{
    private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
    private String m_Directory;
    
    public Producer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, String i_Directory)
    {
        m_Queue = i_Queue;
        m_Directory = i_Directory;
    }
    
    @Override
    public void run()
    {
        if (Files.exists(Paths.get(m_Directory)))
        {
            File[] files = new File(m_Directory).listFiles();
    
            if (files != null)
            {
                for (File file : files)
                {
                    Map<String, String> map = new HashMap<>();
                    try (BufferedReader reader = new BufferedReader(new FileReader(file)))
                    {
                        String line, lastcolumn3 = "", column1 = "", column2 = "", column3 = "";
                        while ((line = reader.readLine()) != null)
                        {
                            //Skip column header
                            if (!Character.isLetter(line.charAt(0)))
                            {
                                String[] splitLine = line.split("\t");
    
                                column1 = splitLine[0].replace("\"", "");
                                column2 = splitLine[1].replace("\"", "");
                                column3 = splitLine[2].replace("\"", "");
    
                                if (!lastcolumn3.equals(column3))
                                {
                                    map.put(column3, column1);
                                    lastcolumn3 = column3;
                                }
                            }
                        }
    
                        map.put(column3, column1);
    
                        //Column 1 is always the same per file, it'll be the key. Column2 and Column3 are stored as the value (as a key-value pair)
                        Map<String, Map<String, String>> mapPerFile = new HashMap<>();
                        mapPerFile.put(column2, map);
    
                        m_Queue.put(mapPerFile);
                    }
                    catch (IOException | InterruptedException e)
                    {
                        System.out.println(file);
                        e.printStackTrace();
                    }
                }
            }
        }
    }}
    
    public class Consumer implements Runnable{
    private HashMap<String, String> m_DBResults;
    private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
    private Map<String, Map<String, String>> m_DBResultsPerFile;
    private String m_Column1;
    private int m_ThreadID;
    
    public Consumer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, int i_ThreadID)
    {
        m_Queue = i_Queue;
        m_ThreadID = i_ThreadID;
    }
    
    @Override
    public void run()
    {
        try
        {
            while ((m_DBResultsPerFile = m_Queue.poll()) != null)
            {
                //Column1 is always the same, only need the first entry.
                m_Column1 = m_DBResultsPerFile.keySet().toArray()[0].toString();
    
                //Queries DB and puts returned data into m_DBResults
                queryDB(m_Column1);
    
                //Write the difference, if any, per thread into a file.
                writeDifference();
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
    }
    
    private void writeDifference()
    {
        MapDifference<String, String> difference = Maps.difference(m_DBResultsPerFile.get(m_Column1), m_DBResults);
    
        if (difference.entriesOnlyOnLeft().size() > 0 || difference.entriesOnlyOnRight().size() > 0)
        {
            try (BufferedWriter writer = new BufferedWriter(new FileWriter(String.format("thread_%d.tsv", m_ThreadID), true)))
            {
                if (difference.entriesOnlyOnLeft().size() > 0)
                {
                    writer.write(String.format("%s\t%s\t", "Missing", m_Column1));
                    for (Map.Entry<String, String> entry : difference.entriesOnlyOnLeft().entrySet())
                    {
                        writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
                    }
    
                    writer.write("\n");
                }
                if (difference.entriesOnlyOnRight().size() > 0)
                {
                    writer.write(String.format("%s\t%s\t", "Extra", m_Column1));
                    for (Map.Entry<String, String> entry : difference.entriesOnlyOnRight().entrySet())
                    {
                        writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
                    }
    
                    writer.write("\n");
                }
            }
            catch (IOException e)
            {
                e.printStackTrace();
            }
        }
    }}
    
    公共类使用者实现可运行{
    私有HashMap m_DBResults;
    私有阻塞队列;
    私有映射m_DBResultsPerFile;
    私有字符串m_Column1;
    私有int m_ThreadID;
    公共消费者(阻止队列i\u队列,int i\u线程ID)
    {
    m_队列=i_队列;
    m_ThreadID=i_ThreadID;
    }
    @凌驾
    公开募捐
    {
    尝试
    {
    而((m_DBResultsPerFile=m_Queue.poll())!=null)
    {
    //Column1总是相同的,只需要第一个条目。
    m_Column1=m_DBResultsPerFile.keySet().toArray()[0].toString();
    //查询数据库并将返回的数据放入m_DBResults
    queryDB(m_第1列);
    //将每个线程的差异(如果有)写入文件。
    写差异();
    }
    }
    捕获(例外e)
    {
    e、 printStackTrace();
    }
    }
    私有无效写入差异()
    {
    MapDifference difference=Maps.difference(m_DBResultsPerFile.get(m_Column1),m_DBResults);
    if(difference.entriesOnlyOnLeft().size()>0 | | difference.entriesOnlyOnRight().size()>0)
    {
    try(BufferedWriter writer=new BufferedWriter(new FileWriter(String.format(“thread_%d.tsv”,m_ThreadID),true)))
    {
    if(差分.entriesOnlyOnLeft().size()>0)
    {
    writer.write(String.format(“%s\t%s\t”,“缺失”,m_Column1));
    对于(Map.Entry:difference.entriesOnlyOnLeft().entrySet())
    {
    write(String.format(“[%s,%s];”,entry.getKey(),entry.getValue());
    }
    writer.write(“\n”);
    }
    如果(差异.entriesOnlyOnRight().size()>0)
    {
    writer.write(String.format(“%s\t%s\t”,“Extra”,m_Column1));
    对于(Map.Entry:difference.entriesOnlyOnRight().entrySet())
    {
    write(String.format(“[%s,%s];”,entry.getKey(),entry.getValue());
    }
    writer.write(“\n”);
    }
    }
    捕获(IOE异常)
    {
    e、 printStackTrace();
    }
    }
    }}
    
    Main

    public static void main(String[]args) {
    BlockingQueue<Map<String, Map<String,String>>> queue = new LinkedBlockingQueue <> ();
    
    //Start the reader thread.
    threadPool.execute(new Producer(queue, args[0]));
    
    //Create configurable threads.
    for (int i = 0; i < 10; i++) {
        threadPool.execute(new Consumer(queue, i + 1));
    }
    
    threadPool.shutdown();
    System.out.println("INFO: Shutting down threads.");
    
    try {
        threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        System.out.println("INFO: Threadpool terminated successfully.");
    } catch (InterruptedException e) {
        e.printStackTrace();
    }}
    
    publicstaticvoidmain(字符串[]args){
    BlockingQueue=newLinkedBlockingQueue();
    //启动读卡器线程。
    execute(新生产者(队列,参数[0]);
    //创建可配置的线程。
    对于(int i=0;i<10;i++){
    执行(新使用者(队列,i+1));
    }
    threadPool.shutdown();
    System.out.println(“信息:关闭线程”);
    试一试{
    线程池终止(Long.MAX_值,时间单位纳秒);
    System.out.println(“信息:线程池已成功终止”);
    }捕捉(中断异常e){
    e、 printStackTrace();
    }}
    
    您的CPU使用率很可能是由以下原因造成的:

    while ((m_DBResultsPerFile = m_Queue.poll()) != null)
    
    poll
    方法不会阻塞。它会立即返回。因此每秒执行该循环数百万次

    您应该使用
    take()
    ,它实际上会等待元素可用:

    while ((m_DBResultsPerFile = m_Queue.take()) != null)
    

    这篇文章很好地总结了这一切,在某种程度上(在我看来)消除了任何混淆。

    您的CPU使用率很可能是由于以下原因:

    while ((m_DBResultsPerFile = m_Queue.poll()) != null)