C++ 如何加快耗时计算的运行时间_C++_Performance_Calculation

C++ 如何加快耗时计算的运行时间

c++ performance

C++ 如何加快耗时计算的运行时间,c++,performance,calculation,C++,Performance,Calculation,我正在尝试为windows控制台编写一个将图像转换为字符和颜色的函数。目前，对于700x700像素的图像，计算大约需要13秒，但这段时间是不可取的，尤其是当我计划使函数更复杂以考虑字符形状时什么是加速C++中重计算和循环的方法？我被推荐使用多线程、SIMD和内联汇编，但我如何用这些方法来改进下面这样的函数呢这是我正在使用的当前代码 unsigned char characterValues[256] = { 0 }; // This operation can be done ahead

我正在尝试为windows控制台编写一个将图像转换为字符和颜色的函数。目前，对于700x700像素的图像，计算大约需要13秒，但这段时间是不可取的，尤其是当我计划使函数更复杂以考虑字符形状时

<>什么是加速C++中重计算和循环的方法？我被推荐使用多线程、SIMD和内联汇编，但我如何用这些方法来改进下面这样的函数呢

这是我正在使用的当前代码

unsigned char characterValues[256] = { 0 };

// This operation can be done ahead of time when the program is started up
{
    ResourceInputStream in = ResourceInputStream();
    // This image is the font for the console. The background color is black while the foreground color is white
    in.open(BMP_FONT, 2); // 2 is for RT_BITMAP, BMP_FONT is a resource
    if (in.isOpen()) {
        auto bmp = readBitmap(&in, true);
        in.close();
        for (int x = 0; x < bmp->size.x; x++) {
            for (int y = 0; y < bmp->size.y; y++) {
                int charIndex = (x / 8) + (y / 12) * 16;
                if (bmp->pixels[x][y].r == 255)
                    characterValues[charIndex]++;
            }
        }
    }
}
// This operation is for asciifying the image
{
    FileInputStream in = FileInputStream();
    in.open(R"(image-path.bmp)");
    if (in.isOpen()) {
        auto bmp = readBitmap(&in, false);
        in.close();

        auto image = /* make default image here */
        Point2I imageSize = (Point2I)GMath::ceil((Point2F)bmp->size / Point2F(8.0f, 12.0f));
        int totalImageSize = imageSize.x * imageSize.y;
        image->resize(imageSize);
        auto palette = /* get palette of 16 colors here */

        // Iterate through each (character area)
        for (int imgx = 0; imgx < imageSize.x; imgx++) {
            for (int imgy = 0; imgy < imageSize.y; imgy++) {

                // Read image color value
                int r = 0, g = 0, b = 0;
                int totalRead = 0;
                // Read each pixel inside the bounds of a single character
                for (int px = 0; px < 8; px++) {
                    for (int py = 0; py < 12; py++) {
                        Point2I p = Point2I(imgx * 8 + px, imgy * 12 + py);
                        if (p < bmp->size) {
                            r += bmp->pixels[p.x][p.y].r;
                            g += bmp->pixels[p.x][p.y].g;
                            b += bmp->pixels[p.x][p.y].b;
                            totalRead++;
                        }
                    }
                }
                Color imageValue = Color(r / totalRead, g / totalRead, b / totalRead);

                // A combo of a character and foreground/background color
                Pixel closestPixel = Pixel();
                float closestScore = std::numeric_limits<float>().max();
                for (int col = 1; col < 255; col++) {
                    unsigned char f = getFColor(col);
                    unsigned char b = getBColor(col);
                    for (int ch = 1; ch < 255; ch++) {
                        // Calculate values
                        Color value = Color(
                            (palette[f].r * characterValues[ch] + palette[b].r * (TOTAL_CHARACTER_VALUE - characterValues[ch])) / TOTAL_CHARACTER_VALUE,
                            (palette[f].g * characterValues[ch] + palette[b].g * (TOTAL_CHARACTER_VALUE - characterValues[ch])) / TOTAL_CHARACTER_VALUE,
                            (palette[f].b * characterValues[ch] + palette[b].b * (TOTAL_CHARACTER_VALUE - characterValues[ch])) / TOTAL_CHARACTER_VALUE
                        );
                        // Add up score here
                        float score =
                            (float)((int)value.r - (int)imageValue.r) * (float)((int)value.r - (int)imageValue.r) +
                            (float)((int)value.g - (int)imageValue.g) * (float)((int)value.g - (int)imageValue.g) +
                            (float)((int)value.b - (int)imageValue.b) * (float)((int)value.b - (int)imageValue.b);
                        if (score < closestScore) {
                            closestPixel = Pixel((unsigned char)ch, (unsigned char)col);
                            closestScore = score;
                        }
                    }
                }
                // Set the character/color combo here
            }
        }
    }
}

无符号字符值[256]={0}；
//当程序启动时，可以提前完成此操作
{
ResourceInputStream in=ResourceInputStream（）；
//此图像是控制台的字体。背景色为黑色，前景色为白色
in.open（BMP_字体，2）；//2表示RT_位图，BMP_字体表示资源
如果（在.isOpen（）中）{
自动bmp=readBitmap（&in，true）；
in.close（）；
对于（intx=0；xsize.x；x++）{
对于（int y=0；ysize.y；y++）{
int charIndex=（x/8）+（y/12）*16；
如果（bmp->像素[x][y].r==255）
characterValues[charIndex]++；
}
}
}
}
//此操作用于对图像进行ascification
{
FileInputStream in=FileInputStream（）；
in.open（R“（image path.bmp）”；
如果（在.isOpen（）中）{
自动bmp=读取位图（&in，false）；
in.close（）；
自动图像=/*在此处生成默认图像*/
Point2I imageSize=（Point2I）GMath:：ceil（（Point2F）bmp->size/Point2F（8.0f，12.0f））；
int totalImageSize=imageSize.x*imageSize.y；
图像->调整大小（图像大小）；
自动调色板=/*在此处获得16种颜色的调色板*/
//遍历每个（字符区域）
对于（int-imgx=0；imgxsize）{
r+=bmp->像素[p.x][p.y].r；
g+=bmp->像素[p.x][p.y].g；
b+=bmp->像素[p.x][p.y].b；
totalRead++；
}
}
}
彩色图像值=颜色（r/totalRead，g/totalRead，b/totalRead）；
//字符和前/背景色的组合
像素闭合像素=像素（）；
float closestScore=std:：numeric_limits（）.max（）；
for（int col=1；col<255；col++）{
无符号字符f=getFColor（col）；
无符号字符b=getBColor（col）；
for（int ch=1；ch<255；ch++）{
//计算值
颜色值=颜色(
（调色板[f].r*字符值[ch]+调色板[b].r*（总字符值-字符值[ch]））/总字符值，
（调色板[f].g*字符值[ch]+调色板[b].g*（总字符值-字符值[ch]））/总字符值，
（调色板[f].b*字符值[ch]+调色板[b].b*（总字符值-字符值[ch]）/总字符值
);
//在这里把分数加起来
浮点数=
（float）（（int）value.r-（int）imageValue.r）*（float）（（int）value.r-（int）imageValue.r）+
（float）（（int）value.g-（int）imageValue.g）*（float）（（int）value.g-（int）imageValue.g）+
（float）（（int）value.b-（int）imageValue.b）*（float）（（int）value.b-（int）imageValue.b）；
如果（分数<最接近分数）{
closestPixel=像素（（无符号字符）ch，（无符号字符）col）；
最接近分数=分数；
}
}
}
//在此处设置字符/颜色组合
}
}
}
}

您有一个x循环和一个嵌套的y循环，您确定这是内存中的字节顺序吗？可能是这样，但如果有帮助的话，你可以尝试另一种方法

// could be faster, depending on data structure
 for (int y = 0; y < bmp->size.y; y++) {
     for (int x = 0; x < bmp->size.x; x++) {

计算三次，这包括指针解引用、两个成员解引用（p.x和p.y），然后是2D数组解引用（最多是乘法和加法，然后是指针解引用）。这至少是6次原子计算，每次都是为了得到对那个像素的引用

相反，你可以：

auto current_pixel = bmp->pixels[p.x][p.y];

更好的方法是，计算一个点2i，然后检查该点的x和y值是否在bmp大小内。您根本不需要点2i，只需计算x和y大小，并分别与bmp x和y大小进行比较

计算外循环中的x边界，在那里对x进行if检查，如果x超出边界，就可以避免击中内循环。再加上避免在内部循环中创建或索引结构，您将获得：

           for (int px = 0; px < 8; px++) {
                int p_x = imgx * 8 + px;
                if(p_x < bmp->size.x) {
                    for (int py = 0; py < 12; py++) {
                        int p_y = imgy * 12 + py;
                        if (p_y < bmp->size.y) {
                            auto pixel = bmp->pixels[p_x][p_y];
                            r += pixel.r;
                            g += pixel.g;
                            b += pixel.b;
                            totalRead++;
                        }
                    }
                }
            }

for（int-px=0；px<8；px++）{
int p_x=imgx*8+px；
如果（p_xsize.x）{
对于（int-py=0；py<12；py++）{
int p_y=imgy*12+py；
如果（p_ysize.y）{
自动像素=bmp->像素[p_x][p_y]；
r+=pixel.r；
 for (int x = 0; x < bmp->size.x; x++) {
     int charIndex_x = (x / 8);
     auto current_pixel = &bmp->pixels[x][0];
     int bmp_size_y = bmp->size.y;
     for (int y = 0; y < bmp_size.y; y++) {
          int charIndex = charIndex_x + (y / 12) * 16;
          if (*current_pixel.r == 255)
                characterValues[charIndex]++;
          ++current_pixel;

bmp->pixels[p.x][p.y]

auto current_pixel = bmp->pixels[p.x][p.y];

           for (int px = 0; px < 8; px++) {
                int p_x = imgx * 8 + px;
                if(p_x < bmp->size.x) {
                    for (int py = 0; py < 12; py++) {
                        int p_y = imgy * 12 + py;
                        if (p_y < bmp->size.y) {
                            auto pixel = bmp->pixels[p_x][p_y];
                            r += pixel.r;
                            g += pixel.g;
                            b += pixel.b;
                            totalRead++;
                        }
                    }
                }
            }

for (int x = 0; x < bmp->size.x; x++) {
    for (int y = 0; y < bmp->size.y; y++) {

int charIndex = (x / 8) + (y / 12) * 16;