Java 使用producer读取大量文件会导致CPU使用率达到100%
我编写了一个简单的消费者-生产者模式来帮助我完成以下任务:Java 使用producer读取大量文件会导致CPU使用率达到100%,java,multithreading,file,cpu-usage,producer-consumer,Java,Multithreading,File,Cpu Usage,Producer Consumer,我编写了一个简单的消费者-生产者模式来帮助我完成以下任务: 从包含约500000个TSV(制表符分隔)文件的目录中读取文件 将每个文件操纵到数据结构中,并将其放入阻塞队列中 使用使用者和查询数据库使用队列 比较两个哈希映射,如果有差异,将差异打印到文件中 当我运行这个程序时,即使有5个线程,我的CPU消耗也会飙升到100%。这可能是因为我使用一个制作人来读取文件吗 文件示例(制表符分隔) 制作人 public class Producer implements Runnable{ private
public class Producer implements Runnable{
private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
private String m_Directory;
public Producer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, String i_Directory)
{
m_Queue = i_Queue;
m_Directory = i_Directory;
}
@Override
public void run()
{
if (Files.exists(Paths.get(m_Directory)))
{
File[] files = new File(m_Directory).listFiles();
if (files != null)
{
for (File file : files)
{
Map<String, String> map = new HashMap<>();
try (BufferedReader reader = new BufferedReader(new FileReader(file)))
{
String line, lastcolumn3 = "", column1 = "", column2 = "", column3 = "";
while ((line = reader.readLine()) != null)
{
//Skip column header
if (!Character.isLetter(line.charAt(0)))
{
String[] splitLine = line.split("\t");
column1 = splitLine[0].replace("\"", "");
column2 = splitLine[1].replace("\"", "");
column3 = splitLine[2].replace("\"", "");
if (!lastcolumn3.equals(column3))
{
map.put(column3, column1);
lastcolumn3 = column3;
}
}
}
map.put(column3, column1);
//Column 1 is always the same per file, it'll be the key. Column2 and Column3 are stored as the value (as a key-value pair)
Map<String, Map<String, String>> mapPerFile = new HashMap<>();
mapPerFile.put(column2, map);
m_Queue.put(mapPerFile);
}
catch (IOException | InterruptedException e)
{
System.out.println(file);
e.printStackTrace();
}
}
}
}
}}
public class Consumer implements Runnable{
private HashMap<String, String> m_DBResults;
private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
private Map<String, Map<String, String>> m_DBResultsPerFile;
private String m_Column1;
private int m_ThreadID;
public Consumer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, int i_ThreadID)
{
m_Queue = i_Queue;
m_ThreadID = i_ThreadID;
}
@Override
public void run()
{
try
{
while ((m_DBResultsPerFile = m_Queue.poll()) != null)
{
//Column1 is always the same, only need the first entry.
m_Column1 = m_DBResultsPerFile.keySet().toArray()[0].toString();
//Queries DB and puts returned data into m_DBResults
queryDB(m_Column1);
//Write the difference, if any, per thread into a file.
writeDifference();
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
private void writeDifference()
{
MapDifference<String, String> difference = Maps.difference(m_DBResultsPerFile.get(m_Column1), m_DBResults);
if (difference.entriesOnlyOnLeft().size() > 0 || difference.entriesOnlyOnRight().size() > 0)
{
try (BufferedWriter writer = new BufferedWriter(new FileWriter(String.format("thread_%d.tsv", m_ThreadID), true)))
{
if (difference.entriesOnlyOnLeft().size() > 0)
{
writer.write(String.format("%s\t%s\t", "Missing", m_Column1));
for (Map.Entry<String, String> entry : difference.entriesOnlyOnLeft().entrySet())
{
writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
}
writer.write("\n");
}
if (difference.entriesOnlyOnRight().size() > 0)
{
writer.write(String.format("%s\t%s\t", "Extra", m_Column1));
for (Map.Entry<String, String> entry : difference.entriesOnlyOnRight().entrySet())
{
writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
}
writer.write("\n");
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}}
公共类生成器实现可运行{
私有阻塞队列;
私有字符串m_目录;
公共生产者(阻塞队列i_队列、字符串i_目录)
{
m_队列=i_队列;
m_目录=i_目录;
}
@凌驾
公开募捐
{
if(Files.exists(path.get(m_目录)))
{
File[]files=新文件(m_目录).listFiles();
如果(文件!=null)
{
用于(文件:文件)
{
Map Map=newhashmap();
try(BufferedReader=new BufferedReader(new FileReader(file)))
{
字符串行,lastcolumn3=“”,column1=“”,column2=“”,column3=“”;
而((line=reader.readLine())!=null)
{
//跳过列标题
if(!Character.isleter(line.charAt(0)))
{
String[]splitLine=line.split(“\t”);
column1=分割线[0]。替换(“\”,”);
column2=分割线[1]。替换(“\”,”);
column3=分割线[2]。替换(“\”,”);
如果(!lastcolumn3.等于(column3))
{
map.put(第3列,第1列);
lastcolumn3=column3;
}
}
}
map.put(第3列,第1列);
//每个文件的列1总是相同的,它将是键。列2和列3存储为值(作为键值对)
Map mapPerFile=新建HashMap();
mapPerFile.put(第2列,map);
m_Queue.put(mapPerFile);
}
捕获(IOException | InterruptedException e)
{
System.out.println(文件);
e、 printStackTrace();
}
}
}
}
}}
消费者
public class Producer implements Runnable{
private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
private String m_Directory;
public Producer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, String i_Directory)
{
m_Queue = i_Queue;
m_Directory = i_Directory;
}
@Override
public void run()
{
if (Files.exists(Paths.get(m_Directory)))
{
File[] files = new File(m_Directory).listFiles();
if (files != null)
{
for (File file : files)
{
Map<String, String> map = new HashMap<>();
try (BufferedReader reader = new BufferedReader(new FileReader(file)))
{
String line, lastcolumn3 = "", column1 = "", column2 = "", column3 = "";
while ((line = reader.readLine()) != null)
{
//Skip column header
if (!Character.isLetter(line.charAt(0)))
{
String[] splitLine = line.split("\t");
column1 = splitLine[0].replace("\"", "");
column2 = splitLine[1].replace("\"", "");
column3 = splitLine[2].replace("\"", "");
if (!lastcolumn3.equals(column3))
{
map.put(column3, column1);
lastcolumn3 = column3;
}
}
}
map.put(column3, column1);
//Column 1 is always the same per file, it'll be the key. Column2 and Column3 are stored as the value (as a key-value pair)
Map<String, Map<String, String>> mapPerFile = new HashMap<>();
mapPerFile.put(column2, map);
m_Queue.put(mapPerFile);
}
catch (IOException | InterruptedException e)
{
System.out.println(file);
e.printStackTrace();
}
}
}
}
}}
public class Consumer implements Runnable{
private HashMap<String, String> m_DBResults;
private BlockingQueue<Map<String, Map<String, String>>> m_Queue;
private Map<String, Map<String, String>> m_DBResultsPerFile;
private String m_Column1;
private int m_ThreadID;
public Consumer(BlockingQueue<Map<String, Map<String, String>>> i_Queue, int i_ThreadID)
{
m_Queue = i_Queue;
m_ThreadID = i_ThreadID;
}
@Override
public void run()
{
try
{
while ((m_DBResultsPerFile = m_Queue.poll()) != null)
{
//Column1 is always the same, only need the first entry.
m_Column1 = m_DBResultsPerFile.keySet().toArray()[0].toString();
//Queries DB and puts returned data into m_DBResults
queryDB(m_Column1);
//Write the difference, if any, per thread into a file.
writeDifference();
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
private void writeDifference()
{
MapDifference<String, String> difference = Maps.difference(m_DBResultsPerFile.get(m_Column1), m_DBResults);
if (difference.entriesOnlyOnLeft().size() > 0 || difference.entriesOnlyOnRight().size() > 0)
{
try (BufferedWriter writer = new BufferedWriter(new FileWriter(String.format("thread_%d.tsv", m_ThreadID), true)))
{
if (difference.entriesOnlyOnLeft().size() > 0)
{
writer.write(String.format("%s\t%s\t", "Missing", m_Column1));
for (Map.Entry<String, String> entry : difference.entriesOnlyOnLeft().entrySet())
{
writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
}
writer.write("\n");
}
if (difference.entriesOnlyOnRight().size() > 0)
{
writer.write(String.format("%s\t%s\t", "Extra", m_Column1));
for (Map.Entry<String, String> entry : difference.entriesOnlyOnRight().entrySet())
{
writer.write(String.format("[%s,%s]; ", entry.getKey(), entry.getValue()));
}
writer.write("\n");
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}}
公共类使用者实现可运行{
私有HashMap m_DBResults;
私有阻塞队列;
私有映射m_DBResultsPerFile;
私有字符串m_Column1;
私有int m_ThreadID;
公共消费者(阻止队列i\u队列,int i\u线程ID)
{
m_队列=i_队列;
m_ThreadID=i_ThreadID;
}
@凌驾
公开募捐
{
尝试
{
而((m_DBResultsPerFile=m_Queue.poll())!=null)
{
//Column1总是相同的,只需要第一个条目。
m_Column1=m_DBResultsPerFile.keySet().toArray()[0].toString();
//查询数据库并将返回的数据放入m_DBResults
queryDB(m_第1列);
//将每个线程的差异(如果有)写入文件。
写差异();
}
}
捕获(例外e)
{
e、 printStackTrace();
}
}
私有无效写入差异()
{
MapDifference difference=Maps.difference(m_DBResultsPerFile.get(m_Column1),m_DBResults);
if(difference.entriesOnlyOnLeft().size()>0 | | difference.entriesOnlyOnRight().size()>0)
{
try(BufferedWriter writer=new BufferedWriter(new FileWriter(String.format(“thread_%d.tsv”,m_ThreadID),true)))
{
if(差分.entriesOnlyOnLeft().size()>0)
{
writer.write(String.format(“%s\t%s\t”,“缺失”,m_Column1));
对于(Map.Entry:difference.entriesOnlyOnLeft().entrySet())
{
write(String.format(“[%s,%s];”,entry.getKey(),entry.getValue());
}
writer.write(“\n”);
}
如果(差异.entriesOnlyOnRight().size()>0)
{
writer.write(String.format(“%s\t%s\t”,“Extra”,m_Column1));
对于(Map.Entry:difference.entriesOnlyOnRight().entrySet())
{
write(String.format(“[%s,%s];”,entry.getKey(),entry.getValue());
}
writer.write(“\n”);
}
}
捕获(IOE异常)
{
e、 printStackTrace();
}
}
}}
Main
public static void main(String[]args) {
BlockingQueue<Map<String, Map<String,String>>> queue = new LinkedBlockingQueue <> ();
//Start the reader thread.
threadPool.execute(new Producer(queue, args[0]));
//Create configurable threads.
for (int i = 0; i < 10; i++) {
threadPool.execute(new Consumer(queue, i + 1));
}
threadPool.shutdown();
System.out.println("INFO: Shutting down threads.");
try {
threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
System.out.println("INFO: Threadpool terminated successfully.");
} catch (InterruptedException e) {
e.printStackTrace();
}}
publicstaticvoidmain(字符串[]args){
BlockingQueue=newLinkedBlockingQueue();
//启动读卡器线程。
execute(新生产者(队列,参数[0]);
//创建可配置的线程。
对于(int i=0;i<10;i++){
执行(新使用者(队列,i+1));
}
threadPool.shutdown();
System.out.println(“信息:关闭线程”);
试一试{
线程池终止(Long.MAX_值,时间单位纳秒);
System.out.println(“信息:线程池已成功终止”);
}捕捉(中断异常e){
e、 printStackTrace();
}}
您的CPU使用率很可能是由以下原因造成的:
while ((m_DBResultsPerFile = m_Queue.poll()) != null)
poll
方法不会阻塞。它会立即返回。因此每秒执行该循环数百万次
您应该使用take()
,它实际上会等待元素可用:
while ((m_DBResultsPerFile = m_Queue.take()) != null)
这篇文章很好地总结了这一切,在某种程度上(在我看来)消除了任何混淆。您的CPU使用率很可能是由于以下原因:
while ((m_DBResultsPerFile = m_Queue.poll()) != null)