C++ 通过多线程处理确定每个字符在文件中出现的次数
我的密码。我写道我可以。现在需要你的帮助。告诉我 怎么了?如果我有一个符号,我该如何屏蔽多少次?我不知道该怎么做,我看书。但还是没有结果C++ 通过多线程处理确定每个字符在文件中出现的次数,c++,multithreading,C++,Multithreading,我的密码。我写道我可以。现在需要你的帮助。告诉我 怎么了?如果我有一个符号,我该如何屏蔽多少次?我不知道该怎么做,我看书。但还是没有结果 void TextThread(std::string str) { std::ifstream text(str); if (!text) std::cout << "No open file" << "\n"; std::istream_iterator<char> input(text); s
void TextThread(std::string str)
{
std::ifstream text(str);
if (!text)
std::cout << "No open file" << "\n";
std::istream_iterator<char> input(text);
std::istream_iterator<char> output;
std::vector<char> symvol(input, output);
unsigned maxThreadCount = std::thread::hardware_concurrency();
const std::size_t minLength = symvol.size() / maxThreadCount;
const std::size_t modulo = symvol.size() % maxThreadCount;
std::vector<std::size_t> results;
results.reserve(maxThreadCount);
std::vector<std::thread> threads;
threads.reserve(maxThreadCount - 1);
for (std::size_t i = 0; i < modulo; ++i)
results.emplace_back(minLength + 1);
if (minLength > 0)
{
for (std::size_t i = modulo; i < maxThreadCount; ++i)
results.emplace_back(minLength);
}
for (std::size_t i = 1; i < threads.size(); ++i)
threads.emplace_back(threads[i - 1] + 1, threads[i - 1] + threads[i]);
std::for_each(threads.begin(), threads.end(),
std::mem_fn(&std::thread::join));
for (unsigned int i = 0; i < symvol.size(); ++i)
std::cout << symvol[i] << "\n";
}
int main()
{
TextThread("D:\\text.txt");
return 0;
}
void TextThread(std::string str)
{
std::ifstream文本(str);
如果(!text)
std::cout我将在这里提供帮助,因为你非常迷路
目标
我想你想要一个直方图,比如:
std::array<size_t, 256> histo;
注意这里有一件非常微妙的事情,我将字符转换为无符号字符
(也可以说uint8\t
)。这是因为某些平台将对char
进行签名,这会导致在索引histo[ch]
或比较时出错(例如,ch我将在这里提供帮助,因为你非常迷路
目标
我想你想要一个直方图,比如:
std::array<size_t, 256> histo;
注意这里有一件非常微妙的事情,我将字符转换为无符号字符
(也可以说uint8\t
)。这是因为某些平台将对char
进行签名,这会导致在索引histo[ch]
或比较时出错(例如,ch你说“我该怎么做”是什么意思此外,据我所知,您不会启动任何线程。因此,编写bogosort应用于histogramming@igagisthreads.emplaceаu back
@CharStyleаааааааааааааааааааааn更新只打印所有非空白字符(效率很低)。向其中添加线程会使一切效率更低,并且仍然不会执行类似于“通过多线程处理确定文件中每个字符出现多少次”的操作。您说“我如何屏蔽”是什么意思此外,据我所知,您不会启动任何线程。因此,编写bogosort应用于histogramming@igagisthreads.emplaceаu back
@CharStyleаааааааааааааааааааааn更新只打印所有非空白字符(效率很低)。向其中添加线程会使一切效率更低,并且仍然不会执行类似于“通过多线程确定文件中每个字符出现的次数”的操作我很快就回来。家庭需要关注。你主要为什么要做?所以最好先做?第一件事。所以,我正确地猜到了任务的目的(为什么你的代码中没有这个?),现在你突然选择了无序地图
。为什么?还有,名字非常重要。不要(永远)调用函数TextThread
。它与线程无关,Text
不描述它的功能。它的功能是1.读取文件2.创建直方图(同样,如果函数不为符号赋值,则不要调用函数ValueSymbol
。它会生成直方图。因此,创建直方图,直方图,历史,计数频率,甚至可能计数出现次数。但是,永远不要ValueSymbol
。对于str
,也一样。如果是文件名,将其称为file\u specification
或fname
。永远不要调用字符串变量str
,除非这是您所知道的唯一相关内容(std::string trim(std::string const&str);
可以,例如)@是的,如果代码要在未经测试的平台上运行,一定要防止退化的hardware\u并发()
implementations!感谢您让我意识到这一点。我很快就会回来。家庭需要关注您主要在做什么?所以最好是先做第一件事。因此,我正确地猜测了任务的目的(为什么您的代码中没有这一点?)现在你突然选择了unordered\u map
。为什么?还有,名字非常重要。不要(永远)调用你的函数TextThread
。它与线程无关,Text
不描述它的作用。它的作用是1.读取文件2.创建直方图(同样,如果函数不为符号赋值,则不要调用函数ValueSymbol
。它会生成直方图。因此,创建直方图,直方图,历史,计数频率,甚至可能计数出现次数。但是,永远不要ValueSymbol
。对于str
,也一样。如果是文件名,将其称为file\u specification
或fname
。永远不要调用字符串变量str
,除非这是您所知道的唯一相关内容(std::string trim(std::string const&str);
可以,例如)@是的,如果代码要在未经测试的平台上运行,请务必防止退化的硬件并发()
实现!感谢您让我意识到这一点。
#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <iterator>
#include <algorithm>
#include <string>
#include <vector>
#include <unordered_map>
#include <thread>
#include <cctype>
#include <clocale>
#include <numeric>
std::vector<std::pair<std::size_t, std::size_t>> GenerateIntervals(const
std::vector<char>& text)
{
std::size_t maxThreadCount = std::thread::hardware_concurrency();
const std::size_t minLength = text.size() / maxThreadCount;
const std::size_t modulo = text.size() % maxThreadCount;
std::vector<std::size_t> intervalLengthPerThread;
intervalLengthPerThread.reserve(maxThreadCount);
for (std::size_t i = 0; i < modulo; ++i)
intervalLengthPerThread.emplace_back(minLength + 1);
if (minLength > 0)
{
for (std::size_t i = modulo; i < maxThreadCount; ++i)
intervalLengthPerThread.emplace_back(minLength);
}
std::vector<std::pair<std::size_t, std::size_t>> intervals;
intervals.reserve(intervalLengthPerThread.size());
intervals.emplace_back(0, 0 + intervalLengthPerThread[0] - 1);
for (std::size_t i = 1; i < intervalLengthPerThread.size(); ++i)
intervals.emplace_back(intervals[i - 1].second + 1, intervals[i - 1].second + intervalLengthPerThread[i]);
return intervals;
}
void BuildHistogram(const std::pair<std::size_t, std::size_t>& textRange,
const std::vector<char>& text, std::unordered_map<char, std::size_t>&
histogram)
{
for (std::size_t i = textRange.first; i < textRange.second; ++i)
++histogram[text[i]];
}
std::vector<char> ReadDataFromFile(const std::string& pathToFile)
{
std::ifstream stream(pathToFile);
if (!stream)
{
std::vector<char> result;
std::cout << "Can not open file" << "\n";
return result;
}
std::istream_iterator<char> it(stream);
std::istream_iterator<char> itEnd;
std::vector<char> text(it, itEnd);
}
int main()
{
auto text = ReadDataFromFile("D:\\text.txt");
const auto textIntervals = GenerateIntervals(text);
const auto usedThreadCount = textIntervals.size();
std::vector<std::unordered_map<char, size_t>> histograms(usedThreadCount);
std::vector<std::thread> threads(usedThreadCount);
for (std::size_t i = 0; i < usedThreadCount; ++i)
threads[i] = std::thread(BuildHistogram, std::cref(textIntervals[i]), text, std::ref(histograms[i]));
for (std::size_t i = 0; i < usedThreadCount; ++i)
threads[i].join();
std::unordered_map<char, size_t> histogram;
for (const auto& hist : histograms)
{
for (const auto& pair : hist)
histogram[pair.first] += pair.second;
}
for (int ch = 0; ch < 256; ++ch)
{
if (text[ch])
{
std::cout << "Character '" << char(ch) << "' occurs " << text[ch] << " times\n";
}
}
return 0;
std::array<size_t, 256> histo;
using Histo = std::array<size_t, 256>;
Histo histogram(std::vector<char> const& data) {
Histo histo {}; // value initialize, makes sure all elements are 0
for (auto ch : data) {
histo[ch] += 1;
}
return histo;
}
Histo histogram_file(std::string const& fname) {
std::ifstream text(fname);
if (!text)
throw std::runtime_error("No open file");
std::vector<char> const data(std::istreambuf_iterator<char>{text}, {});
return histogram(data);
}
#include <thread>
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <iterator>
using Histo = std::array<size_t, 256>;
Histo histogram(std::vector<char> const& data) {
Histo histo {}; // value initialize, makes sure all elements are 0
for (unsigned char ch : data) {
histo[ch] += 1;
}
return histo;
}
Histo histogram_file(std::string const& fname) {
std::ifstream text(fname);
if (!text)
throw std::runtime_error("No open file");
std::vector<char> const data(std::istreambuf_iterator<char>{text}, {});
return histogram(data);
}
int main() {
Histo histo = histogram_file("main.cpp");
for (int ch = 0; ch <= 255; ++ch) {
if (histo[ch]) {
if (std::isprint(ch))
std::cout << "Character '" << char(ch) << "' occurs " << histo[ch] << " times\n";
else
std::cout << "Character #" << ch << " occurs " << histo[ch] << " times\n";
}
}
}
Character #10 occurs 41 times
Character ' ' occurs 224 times
Character '!' occurs 1 times
Character '"' occurs 16 times
Character '#' occurs 7 times
...
Character 'x' occurs 3 times
Character 'y' occurs 1 times
Character 'z' occurs 2 times
Character '{' occurs 9 times
Character '}' occurs 9 times
template <typename Iterator> // could just use `char const*` but let's not
Histo histogram(Iterator begin, Iterator end) {
Histo histo {}; // value initialize, makes sure all elements are 0
for (auto it = begin; it != end; ++it) {
histo[*it] += 1;
}
return histo;
}
Histo histogram(std::vector<char> const& data) {
return histogram(data.begin(), data.end());
}
using It = std::vector<char>::const_iterator;
// calculate chunk size
auto chunk = data.size() / num_threads;
if (chunk*num_threads < data.size()) chunk += 1;
////////////////////////////////////////////
// start chunk threads
struct worker_t {
Histo local; // histogram per thread
std::thread th;
void run(It begin, It end) {
local = histogram(begin, end);
}
};
std::vector<worker_t> workers(num_threads);
int i = 0;
for (auto& w : workers) {
auto begin = data.begin()+chunk*i,
end = std::min(begin + chunk, data.end());
w.th = std::thread(&worker_t::run, std::ref(w), begin, end);
++i;
}
////////////////////////////////////////////
// wait for completion
for (auto& w : workers) {
if (w.th.joinable())
w.th.join();
};
////////////////////////////////////////////
// merge results
Histo histo;
for (auto& w : workers) {
merge_into(histo, w.local);
}
return histo;
void merge_into(Histo& into, Histo const& part) {
for (auto& p : part)
into[p.first] += p.second;
}
#include <thread>
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
using Histo = std::map<char, size_t>;
void merge_into(Histo& into, Histo const& part) {
for (auto& p : part)
into[p.first] += p.second;
}
template <typename Iterator> // could just use `char const*` but let's not
Histo histogram(Iterator begin, Iterator end) {
Histo histo {}; // value initialize, makes sure all elements are 0
for (auto it = begin; it != end; ++it) {
histo[*it] += 1;
}
return histo;
}
Histo histogram(std::vector<char> const& data) {
return histogram(data.begin(), data.end());
}
Histo parallel_histo(std::vector<char> const& data, size_t num_threads = std::thread::hardware_concurrency()) {
using It = std::vector<char>::const_iterator;
// calculate chunk size
auto chunk = data.size() / num_threads;
if (chunk*num_threads < data.size()) chunk += 1;
////////////////////////////////////////////
// start chunk threads
struct worker_t {
Histo local; // histogram per thread
std::thread th;
void run(It begin, It end) {
local = histogram(begin, end);
}
};
std::vector<worker_t> workers(num_threads);
int i = 0;
for (auto& w : workers) {
auto begin = data.begin()+chunk*i,
end = std::min(begin + chunk, data.end());
w.th = std::thread(&worker_t::run, std::ref(w), begin, end);
++i;
}
////////////////////////////////////////////
// wait for completion
for (auto& w : workers) {
if (w.th.joinable())
w.th.join();
};
////////////////////////////////////////////
// merge results
Histo histo;
for (auto& w : workers) {
merge_into(histo, w.local);
}
return histo;
}
Histo histogram_file(std::string const& fname) {
std::ifstream text(fname);
if (!text)
throw std::runtime_error("No open file");
return parallel_histo({std::istreambuf_iterator<char>{text}, {}});
}
int main() {
Histo histo = histogram_file("main.cpp");
for (int ch = 0; ch <= 255; ++ch) {
if (histo[ch]) {
if (std::isprint(ch))
std::cout << "Character '" << char(ch) << "' occurs " << histo[ch] << " times\n";
else
std::cout << "Character #" << ch << " occurs " << histo[ch] << " times\n";
}
}
}
Histo parallel_histo(std::vector<char> const& data, size_t num_threads = std::thread::hardware_concurrency()) {
// calculate chunk size
auto chunk = data.size() / num_threads;
if (chunk*num_threads < data.size()) chunk += 1;
////////////////////////////////////////////
// start tasks
std::vector<std::future<Histo> > tasks(num_threads);
int i = 0;
for (auto& t : tasks) {
auto begin = data.begin()+chunk*i,
end = std::min(begin + chunk, data.end());
t = std::async([=,&data] { return histogram(begin, end); });
}
Histo histo;
for (auto& t : tasks) {
merge_into(histo, t.get());
}
return histo;
}
#include <atomic>
#include <execution>
#include <algorithm>
using Histo = std::vector<std::atomic_size_t>;
Histo histogram(std::vector<char> const& data) {
Histo histo(256);
std::for_each(std::execution::par_unseq, data.begin(), data.end(), [&histo](unsigned char ch) { ++histo[ch]; });
return histo;
}
Histo histogram_file(std::string const& fname) {
std::ifstream text(fname);
if (!text)
throw std::runtime_error("No open file");
return histogram({std::istreambuf_iterator<char>{text}, {}});
}