多线程模式下的Java集合算法
我的任务是在并行线程中执行一些与集合相关的逻辑,并将其与单线程模式进行比较。从这个问题中,我注意到文件读取不是多线程的任务,所以我决定将重点放在进一步的逻辑上。逻辑如下:多线程模式下的Java集合算法,java,multithreading,collections,concurrency,fork-join,Java,Multithreading,Collections,Concurrency,Fork Join,我的任务是在并行线程中执行一些与集合相关的逻辑,并将其与单线程模式进行比较。从这个问题中,我注意到文件读取不是多线程的任务,所以我决定将重点放在进一步的逻辑上。逻辑如下: public List<?> taskExecution(File file, boolean parallel) { List<Entry<String, Integer>> entryList = new ArrayList<>(); try {
public List<?> taskExecution(File file, boolean parallel) {
List<Entry<String, Integer>> entryList = new ArrayList<>();
try {
if (parallel) {
entryList = taskExecutionInParallel(file);
} else {
// put in the map the words and their occurrence
Map<String, Integer> wordsFrequency = new HashMap<>();
for(String word : this.readWordsFromText(file, parallel)) {
if (wordsFrequency.containsKey(word)) {
wordsFrequency.put(word, wordsFrequency.get(word).intValue() + 1);
} else {
wordsFrequency.put(word, 1);
}
}
// create the list of Map.Entry objects
entryList.addAll(wordsFrequency.entrySet());
// sort the entries by the value descending
Collections.sort(entryList, new Comparator<Entry<String, Integer>>(){
@Override
public int compare(Entry<String, Integer> o1,
Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
// identify the top index
int topIndex = entryList.size() > 1 ? 2 : entryList.size() > 0 ? 1 : 0;
// truncate the list
entryList = entryList.subList(0, topIndex);
// sort the result list by the words descending
Collections.sort(entryList, new Comparator<Entry<String, Integer>>(){
@Override
public int compare(Entry<String, Integer> o1,
Entry<String, Integer> o2) {
return o2.getKey().compareTo(o1.getKey());
}
});
}
} catch (IOException e) {
e.printStackTrace();
}
return entryList;
}
公共列表任务执行(文件文件,布尔并行){
List entryList=new ArrayList();
试一试{
if(并行){
entryList=taskExecutionInParallel(文件);
}否则{
//把单词和它们的出现放在地图上
Map wordsFrequency=新HashMap();
for(字符串字:this.readWordsFromText(文件,并行)){
if(字频率包含字){
wordsFrequency.put(word,wordsFrequency.get(word.intValue()+1);
}否则{
字频率输入(字,1);
}
}
//创建Map.Entry对象的列表
entryList.addAll(wordsffrequency.entrySet());
//按降序值对条目进行排序
Collections.sort(entryList,newcomparator(){
@凌驾
公共整数比较(条目o1,
入口(氧气){
返回o2.getValue().compareTo(o1.getValue());
}
});
//确定顶级索引
inttopIndex=entryList.size()>1?2:entryList.size()>0?1:0;
//截断列表
entryList=entryList.subList(0,topIndex);
//按单词降序排列结果列表
Collections.sort(entryList,newcomparator(){
@凌驾
公共整数比较(条目o1,
入口(氧气){
返回o2.getKey().compareTo(o1.getKey());
}
});
}
}捕获(IOE异常){
e、 printStackTrace();
}
返回入口列表;
}
我正在尝试使用Fork/Join框架执行从初始单词列表到单词频率映射的转换:
class ForkJoinFrequencyReader extends RecursiveAction {
static final int SEQUENTIAL_THRESHOLD = 1000;
private static final long serialVersionUID = -7784403215745552735L;
private Map<String, Integer> wordsFrequency;
private final int start;
private final int end;
private final List<String> words;
public ForkJoinFrequencyReader(List<String> words, Map<String, Integer> wordsFrequency) {
this(words, 0, words.size(), wordsFrequency);
}
private ForkJoinFrequencyReader(List<String> words, int start, int end, Map<String, Integer> wordsFrequency) {
this.words = words;
this.start = start;
this.end = end;
this.wordsFrequency = wordsFrequency;
}
private synchronized void putInMap() {
for(int i = start; i < end; i++) {
String word = words.get(i);
if (wordsFrequency.containsKey(word)) {
wordsFrequency.put(word, wordsFrequency.get(word).intValue() + 1);
} else {
wordsFrequency.put(word, 1);
}
}
}
@Override
protected void compute() {
if (end - start < SEQUENTIAL_THRESHOLD) {
putInMap();
} else {
int mid = (start + end) >>> 1;
ForkJoinFrequencyReader left = new ForkJoinFrequencyReader(words, start, mid, wordsFrequency);
ForkJoinFrequencyReader right = new ForkJoinFrequencyReader(words, mid, end, wordsFrequency);
left.fork();
right.fork();
left.join();
right.join();
}
}
}
private List<Entry<String, Integer>> taskExecutionInParallel(File file) throws IOException {
List<Entry<String, Integer>> entryList = new CopyOnWriteArrayList<>();
ForkJoinPool pool = new ForkJoinPool();
Map<String, Integer> wordsFrequency = new ConcurrentHashMap<>();
pool.invoke(new ForkJoinFrequencyReader(Collections.synchronizedList(this.readWordsFromText(file, true)), wordsFrequency));
//****** .... the same single-thread code yet
}
类ForkJoinFrequencyReader扩展递归操作{
静态最终整数顺序_阈值=1000;
私有静态最终长serialVersionUID=-778440321574552735L;
私有映射词频率;
私人最终启动;
私人终端;
私人最终列表词;
公共ForkJoinFrequencyReader(列出单词、映射单词频率){
这(words,0,words.size(),words频率);
}
专用ForkJoinFrequencyReader(列出单词、int开始、int结束、映射单词频率){
这个单词=单词;
this.start=start;
this.end=end;
this.wordsFrequency=wordsFrequency;
}
私有同步void putInMap(){
for(int i=start;i>>1;
ForkJoinFrequencyReader left=新的ForkJoinFrequencyReader(字、开始、中间、字频率);
ForkJoinFrequencyReader right=新的ForkJoinFrequencyReader(单词、中间、结尾、单词频率);
左。fork();
对。fork();
左。join();
对。join();
}
}
}
私有列表taskExecutionInParallel(文件)引发IOException{
List entryList=新建CopyOnWriteArrayList();
ForkJoinPool池=新的ForkJoinPool();
Map wordsFrequency=新的ConcurrentHashMap();
invoke(新的ForkJoinFrequencyReader(Collections.synchronizedList(this.readWordsFromText(file,true)),wordsFrequency));
//******…还是相同的单线程代码吗
}
但是,每次执行后,生成的映射具有不同的值。有人能告诉我瓶颈在哪里,或者提出一些其他解决方案来使用标准JDK将并发性嵌入到版本7吗 您可能应该使用Java 8流的并行执行功能:
Path path = FileSystems.getDefault().getPath(...);
Stream<String> words = Files.lines(path);
Map<String, Long> wordsFrequency = words.parallel()
.collect(Collectors.groupingBy(UnaryOperator.identity(),
Collectors.counting()));
Path Path=FileSystems.getDefault().getPath(…);
流字=文件.行(路径);
映射词频率=words.parallel()
.collect(收集器.groupingBy(UnaryOperator.identity()),
收集器。计数();
您的putInMap在ForkJoinFrequencyReader实例上同步。
同时,在计算方法中创建ForkJoinFrequencyReader的不同实例。
所以同步根本不起作用,因为每个同步都与它自己的实例相关。要检查它,只需更换电脑上的按钮
private void putInMap() {
synchronized (wordsFrequency) {
例如,阅读这篇文章:我还为单词频率块实现了生产者-消费者模式:
private Map<String, Integer> frequencyCounterInParallel(File file) throws InterruptedException {
Map<String, Integer> wordsFrequency = Collections.synchronizedMap(new LinkedHashMap<>());
BlockingQueue<String> queue = new ArrayBlockingQueue<>(1024);
Thread producer = new Thread(new Producer(queue, file));
Thread consumer = new Thread(new Consumer(queue, wordsFrequency));
producer.start();
consumer.start();
producer.join();
consumer.join();
return wordsFrequency;
}
class Producer implements Runnable {
private BlockingQueue<String> queue;
private File file;
public Producer(BlockingQueue<String> queue, File file) {
this.file = file;
this.queue = queue;
}
@Override
public void run() {
try(BufferedReader bufferReader = Files.newBufferedReader(file.toPath())) {
String line = null;
while ((line = bufferReader.readLine()) != null){
String[] lineWords = line.split(CommonConstants.SPLIT_TEXT_REGEX);
for(String word : lineWords) {
if (word.length() > 0) {
queue.put(word.toLowerCase());
}
}
}
queue.put(STOP_THREAD);
} catch (InterruptedException | IOException e) {
e.printStackTrace();
}
}
}
class Consumer implements Runnable {
private BlockingQueue<String> queue;
private Map<String, Integer> wordsFrequency;
public Consumer(BlockingQueue<String> queue, Map<String, Integer> wordsFrequency) {
this.queue = queue;
this.wordsFrequency = wordsFrequency;
}
@Override
public void run() {
try {
String word = null;
while(!((word = queue.take()).equals(STOP_THREAD))) {
if (wordsFrequency.containsKey(word)) {
wordsFrequency.put(word, wordsFrequency.get(word).intValue() + 1);
} else {
wordsFrequency.put(word, 1);
}
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
私有映射frequencyCounterInParallel(文件文件)引发InterruptedException{
MapWordsFrequency=Collections.synchronizedMap(新LinkedHashMap());
BlockingQueue=new ArrayBlockingQueue(1024);
线程生产者=新线程(新生产者(队列、文件));
线程使用者=新线程(新使用者(队列、字频率));
producer.start();
consumer.start();
producer.join();
consumer.join();
返回词频率;
}
类生成器实现了Runnable{
私有阻塞队列;
私有文件;
公共生产者(阻止队列、文件){
this.file=文件;
this.queue=队列;
}
@凌驾
public void run(){
try(BufferedReader bufferReader=Files.newBufferedReader(file.toPath())){
字符串行=null;
而(李)