CUDA写入常量内存错误值

CUDA写入常量内存错误值,cuda,gpu-constant-memory,Cuda,Gpu Constant Memory,我有以下代码要从主机变量复制到CUDA中的\uuuuu常量\uuuuu变量 int main(int argc, char **argv){ int exit_code; if (argc < 4) { std::cout << "Usage: \n " << argv[0] << " <input> <output> <nColors>" << std::endl;

我有以下代码要从主机变量复制到CUDA中的
\uuuuu常量\uuuuu
变量

int main(int argc, char **argv){

    int exit_code;

    if (argc < 4) {
        std::cout << "Usage: \n " << argv[0] << " <input> <output> <nColors>" << std::endl;
        return 1;
    }

    Color *h_input;
    int h_rows, h_cols;

    timer1.Start();
    exit_code = readText2RGB(argv[1], &h_input, &h_rows, &h_cols);
    timer1.Stop();
    std::cout << "Reading: " << timer1.Elapsed() << std::endl;

    if (exit_code != SUCCESS){
        std::cout << "Error trying to read file." << std::endl;
        return FAILURE;
    }

    CpuTimer timer1;
    GpuTimer timer2;
    float timeStep2 = 0, timeStep3 = 0;

    int h_numColors = atoi(argv[3]);

    int h_change = 0;
    int *h_pixelGroup = new int[h_rows*h_cols];
    Color *h_groupRep = new Color[h_numColors];
    Color *h_output = new Color[h_rows*h_cols];

    Color *d_input;
    int *d_pixelGroup;
    Color *d_groupRep;
    Color *d_output;

    dim3 block(B_WIDTH, B_HEIGHT);
    dim3 grid((h_cols+B_WIDTH-1)/B_WIDTH, (h_rows+B_HEIGHT-1)/B_HEIGHT);

    checkCudaError(cudaMalloc((void**)&d_input, sizeof(Color)*h_rows*h_cols));
    checkCudaError(cudaMalloc((void**)&d_pixelGroup, sizeof(int)*h_rows*h_cols));
    checkCudaError(cudaMalloc((void**)&d_groupRep, sizeof(Color)*h_numColors));
    checkCudaError(cudaMalloc((void**)&d_output, sizeof(Color)*h_rows*h_cols));

    //       STEP 1
    //Evenly distribute all pixels of the image onto the color set
    timer2.Start();
    checkCudaError(cudaMemcpyToSymbol(c_rows, &h_rows, sizeof(int)));
    checkCudaError(cudaMemcpyToSymbol(c_cols, &h_cols, sizeof(int)));
    checkCudaError(cudaMemcpyToSymbol(c_numColors, &h_numColors, sizeof(int)));
    checkCudaError(cudaMemcpy(d_input, h_input, sizeof(Color)*h_rows*h_cols, cudaMemcpyHostToDevice));

    clut_distributePixels<<<grid, block>>>(d_pixelGroup);
    checkCudaError(cudaMemcpy(h_pixelGroup, d_pixelGroup, sizeof(int)*h_rows*h_cols, cudaMemcpyDeviceToHost));
    timer2.Stop();
    std::cout << "Phase 1: " << timer2.Elapsed() << std::endl;

    std::cout << h_pixelGroup[0] << ","
                << h_pixelGroup[3] << ","
                << h_pixelGroup[4] << ","
                << h_pixelGroup[7] << ","
                << h_pixelGroup[8] << std::endl;

    //Do the STEP 2 and STEP 3 as long as there is at least one change of representative in a group
    do {
        //      STEP 2
        //Set the representative value to the average colour of all pixels in the same set
        timer1.Start();
        for (int ng = 0; ng < h_numColors; ng++) {
            int r = 0, g = 0, b = 0;
            int elem = 0;
            for (int i = 0; i < h_rows; i++) {
                for (int j = 0; j < h_cols; j++) {
                    if (h_pixelGroup[i*h_cols+j] == ng) {
                        r += h_input[i*h_cols+j].r;
                        g += h_input[i*h_cols+j].g;
                        b += h_input[i*h_cols+j].b;
                        elem++;
                    }
                }
            }
            if (elem == 0) {
                h_groupRep[ng].r = 255;
                h_groupRep[ng].g = 255;
                h_groupRep[ng].b = 255;
            }else{
                h_groupRep[ng].r = r/elem;
                h_groupRep[ng].g = g/elem;
                h_groupRep[ng].b = b/elem;
            }
        }
        timer1.Stop();
        timeStep2 += timer1.Elapsed();

        //      STEP 3
        //For each pixel in the image, compute Euclidean's distance to each representative
        //and assign it to the set which is closest
        h_change = 0;

        timer2.Start();
        checkCudaError(cudaMemcpyToSymbol(d_change, &h_change, sizeof(int)));
        checkCudaError(cudaMemcpy(d_groupRep, h_groupRep, sizeof(Color)*h_numColors, cudaMemcpyHostToDevice));

        clut_checkDistances<<<grid, block>>>(d_input, d_pixelGroup, d_groupRep);
        checkCudaError(cudaMemcpy(h_pixelGroup, d_pixelGroup, sizeof(int)*h_rows*h_cols, cudaMemcpyDeviceToHost));
        checkCudaError(cudaMemcpyFromSymbol(&h_change, d_change, sizeof(int)));
        timer2.Stop();
        timeStep3 += timer2.Elapsed();

        std::cout << "Chunche" << std::endl;

    } while (h_change == 1);

    std::cout << "Phase 2: " << timeStep2 << std::endl;
    std::cout << "Phase 3: " << timeStep3 << std::endl;

    //      STEP 4
    //Create the new image with the resulting color lookup table
    timer2.Start();
    clut_createImage<<<grid, block>>>(d_output, d_pixelGroup, d_groupRep);
    checkCudaError(cudaMemcpy(h_output, d_output, sizeof(Color)*h_rows*h_cols, cudaMemcpyDeviceToHost));
    timer2.Stop();
    std::cout << "Phase 4: " << timer2.Elapsed() << std::endl;

    checkCudaError(cudaFree(d_input));
    checkCudaError(cudaFree(d_pixelGroup));
    checkCudaError(cudaFree(d_groupRep));
    checkCudaError(cudaFree(d_output));

    timer1.Start();
    exit_code = writeRGB2Text(argv[2], h_input, h_rows, h_cols);
    timer1.Stop();
    std::cout << "Writing: " << timer1.Elapsed() << std::endl;

    delete[] h_pixelGroup;
    delete[] h_groupRep;
    delete[] h_output;

    return SUCCESS;
}
Imageproc.cu

#include "Imageproc.cuh"
int main(){
  int h_change = 0;
  int h_rows = 512;
  cudaMemcpyToSymbol(c_rows, &h_rows, sizeof(int));
  chunche<<<1,1>>>();
  cudaMemcpyFromSymbol(&h_change, d_change, sizeof(int));

  std::cout << "H = " << h_change << std::endl;
  return 0
}
#include "Imageproc.cuh"

__global__
void chunche(){
    d_change = c_rows + 1;
}

__global__
void clut_distributePixels(int *pixelGroup){
    int i = blockDim.y * blockIdx.y + threadIdx.y;
    int j = blockDim.x * blockIdx.x + threadIdx.x;

    while (i < c_rows) {
        while (j < c_cols) {
            pixelGroup[i*c_cols+j] = (i*c_cols+j)/c_numColors;
            j += gridDim.x * blockDim.x;
        }
        j = blockDim.x * blockIdx.x + threadIdx.x;
        i += gridDim.y * blockDim.y;
    }

}

__global__
void clut_checkDistances(Color *input, int *pixelGroup, Color *groupRep){
    int i = blockDim.y * blockIdx.y + threadIdx.y;
    int j = blockDim.x * blockIdx.x + threadIdx.x;
    int newGroup;

    while (i < c_rows) {
        while (j < c_cols) {
            newGroup = 0;
            for (int ng = 1; ng < c_numColors; ng++) {
                if (
                    /*If distance from color to group ng is less than distance from color to group idx
                     then color should belong to ng*/
                    (groupRep[ng].r-input[i*c_cols+j].r)*(groupRep[ng].r-input[i*c_cols+j].r) +
                    (groupRep[ng].g-input[i*c_cols+j].g)*(groupRep[ng].g-input[i*c_cols+j].g) +
                    (groupRep[ng].b-input[i*c_cols+j].b)*(groupRep[ng].b-input[i*c_cols+j].b)
                    <
                    (groupRep[newGroup].r-input[i*c_cols+j].r)*(groupRep[newGroup].r-input[i*c_cols+j].r)+
                    (groupRep[newGroup].g-input[i*c_cols+j].g)*(groupRep[newGroup].g-input[i*c_cols+j].g)+
                    (groupRep[newGroup].b-input[i*c_cols+j].b)*(groupRep[newGroup].b-input[i*c_cols+j].b)
                    )
                {
                    newGroup = ng;
                }
            }

            if (pixelGroup[i*c_cols+j] != newGroup) {
                pixelGroup[i*c_cols+j] = newGroup;
                d_change = 1;
            }

            j += gridDim.x * blockDim.x;
        }
        j = blockDim.x * blockIdx.x + threadIdx.x;
        i += gridDim.y * blockDim.y;
    }

}

__global__
void clut_createImage(Color *clutImage, int *pixelGroup, Color *groupRep){
    int i = blockDim.y * blockIdx.y + threadIdx.y;
    int j = blockDim.x * blockIdx.x + threadIdx.x;

    while (i < c_rows) {
        while (j < c_cols) {
            clutImage[i*c_cols+j].r = groupRep[pixelGroup[i*c_cols+j]].r;
            clutImage[i*c_cols+j].g = groupRep[pixelGroup[i*c_cols+j]].g;
            clutImage[i*c_cols+j].b = groupRep[pixelGroup[i*c_cols+j]].b;
            j += gridDim.x * blockDim.x;
        }
        j = blockDim.x * blockIdx.x + threadIdx.x;
        i += gridDim.y * blockDim.y;
    }
}
#include "Utilities.cuh"

int writeGrayscale2Text(const std::string filename, const Grayscale *image, const int rows, const int cols){    
    std::ofstream fileWriter(filename.c_str());
    if (!fileWriter.is_open()) {
        std::cerr << "** writeGrayscale2Text() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileWriter << rows << "\n";
    fileWriter << cols << "\n";
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            fileWriter << (int)image[i*cols+j] << "\n";
        }
    }
    fileWriter.close();
    return SUCCESS;
}

int readText2Grayscale(const std::string filename, Grayscale **image, int *rows, int *cols){
    std::ifstream fileReader(filename.c_str());
    if (!fileReader.is_open()) {
        std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileReader >> *rows;
    fileReader >> *cols;
    *image = new Grayscale[(*rows)*(*cols)];
    int value;
    for (int i = 0; i < *rows; i++) {
        for (int j = 0; j < *cols; j++) {
            fileReader >> value;
            (*image)[i*(*cols)+j] = (Grayscale)value;
        }
    }
    fileReader.close();
    return SUCCESS;
}

int writeRGB2Text(const std::string filename, const Color *image, const int rows, const int cols){
    std::ofstream fileWriter(filename.c_str());
    if (!fileWriter.is_open()) {
        std::cerr << "** writeRGB2Text() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileWriter << rows << "\n";
    fileWriter << cols << "\n";
    for (int k = 0; k < 3; k++) {
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                switch (k) {
                    case 0:
                        fileWriter << image[i*cols+j].r << "\n";
                        break;
                    case 1:
                        fileWriter << image[i*cols+j].g << "\n";
                        break;
                    case 2:
                        fileWriter << image[i*cols+j].b << "\n";
                        break;
                }
            }
        }
    }
    fileWriter.close();
    return SUCCESS;
}

int readText2RGB(const std::string filename, Color **image, int *rows, int *cols){
    std::ifstream fileReader(filename.c_str());
    if (!fileReader.is_open()) {
        std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileReader >> *rows;
    fileReader >> *cols;
    *image = new Color[(*rows)*(*cols)];
    for (int k = 0; k < 3; k++) {
        for (int i = 0; i < *rows; i++) {
            for (int j = 0; j < *cols; j++) {
                switch (k) {
                    case 0:
                        fileReader >> (*image)[i*(*cols)+j].r;
                        break;
                    case 1:
                        fileReader >> (*image)[i*(*cols)+j].g;
                        break;
                    case 2:
                        fileReader >> (*image)[i*(*cols)+j].b;
                        break;
                }
            }
        }
    }
    fileReader.close();
    return SUCCESS;
}
#包括“Imageproc.cuh”
__全球的__
void chunche(){
d_change=c_行+1;
}
__全球的__
void clut_分布像素(int*像素组){
int i=blockDim.y*blockIdx.y+threadIdx.y;
int j=blockDim.x*blockIdx.x+threadIdx.x;
而(i
实用程序。cuh

#ifndef _IMAGEPROC_CUH_
#define _IMAGEPROC_CUH_

#include "Utilities.cuh"

#define B_WIDTH     16
#define B_HEIGHT    16

__constant__ int c_rows;
__constant__ int c_cols;
__constant__ int c_numColors;

__device__ int d_change;

    #ifdef __cplusplus
        extern "C"
        {
    #endif
        __global__
        void chunche();
        __global__
        void clut_distributePixels(int *pixelGroup);
        __global__
        void clut_checkDistances(Color *input, int *pixelGroup, Color *groupRep);
        __global__
        void clut_createImage(Color *clutImage, int *pixelGroup, Color *groupRep);
    #ifdef __cplusplus
        }
    #endif

#endif
#ifndef _UTILITIES_CUH_
#define _UTILITIES_CUH_

#include <iostream>
#include <fstream>
#include <string>

#define SUCCESS     1
#define FAILURE     0

#define checkCudaError(val) check( (val), #val, __FILE__, __LINE__)

typedef struct {
    int r;
    int g;
    int b;
} vec3u;

typedef vec3u Color;
typedef unsigned char uchar;
typedef uchar Grayscale;

struct GpuTimer{
    cudaEvent_t start;
    cudaEvent_t stop;
    GpuTimer(){
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
    }
    ~GpuTimer(){
        cudaEventDestroy(start);
        cudaEventDestroy(stop);
    }
    void Start(){
        cudaEventRecord(start, 0);
    }
    void Stop(){
        cudaEventRecord(stop, 0);
    }
    float Elapsed(){
        float elapsed;
        cudaEventSynchronize(stop);
        cudaEventElapsedTime(&elapsed, start, stop);
        return elapsed;
    }
};

template<typename T>
void check(T err, const char* const func, const char* const file, const int line) {
    if (err != cudaSuccess) {
        std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
        std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
        exit(1);
    }
}

int writeGrayscale2Text(const std::string filename, const Grayscale *image, const int rows, const int cols);
int readText2Grayscale(const std::string filename, Grayscale **image, int *rows, int *cols);

int writeRGB2Text(const std::string filename, const Color *image, const int rows, const int cols);
int readText2RGB(const std::string filename, Color **image, int *rows, int *cols);

struct CpuTimer{
    clock_t start;
    clock_t stop;
    void Start(){
        start = clock();
    }
    void Stop(){
        stop = clock();
    }
    float Elapsed(){
        return ((float)stop-start)/CLOCKS_PER_SEC * 1000.0f;
    }
};

#endif
\ifndef\u实用程序\u CUH_
#定义实用程序_
#包括
#包括
#包括
#定义成功1
#定义故障0
#定义checkCudaError(val)check((val)、#val、_文件、_行)
类型定义结构{
INTR;
int g;
int b;
}vec3u;
typedef-vec3u颜色;
typedef无符号字符;
灰度类型;
结构GpuTimer{
cudaEvent\u t启动;
不要停下来;
GpuTimer(){
cudaEventCreate(&start);
cudaEventCreate(&stop);
}
~GpuTimer(){
cudaEventDestroy(启动);
cudaEventDestroy(停止);
}
void Start(){
cudaEventRecord(开始,0);
}
无效停止(){
cudaEventRecord(停止,0);
}
浮点数(){
浮子过去了;
CUDAEVENTS同步(停止);
CUDAEventReleasedTime(已用、开始、停止(&E));
返回时间已过;
}
};
模板
无效检查(错误,常量字符*常量函数,常量字符*常量文件,常量整型行){
if(err!=cudaSuccess){

std::cerr恒定记忆具有隐式局部范围联系-。 这意味着
cudaMemcpyToSymbol
必须位于要使用它的内核的同一个生成的.obj文件中。 您可以在
Main.cu
中执行memcopy,但使用canstant内存的内核位于
Imageproc.cu
中。因此,内核
chunche
的常量值未知


解决问题的一个选项是,实现一个包装器。只需在
Imagepro.cu
中添加一个函数,在其中执行
cudaMemcpyToSymbol
并在
Main.cu
中调用包装器,并在其中传递常量内存的所需值。

常量内存具有隐式局部范围链接-。 这意味着
cudaMemcpyToSymbol
必须位于要使用它的内核的同一个生成的.obj文件中。 您可以在
Main.cu
中执行memcopy,但使用canstant内存的内核位于
Imageproc.cu
中。因此,内核
chunche
的常量值未知


解决问题的一个方法是,实现一个包装器。只需在
Imagepro.cu
中添加一个函数,在
cudamemcytombol
中执行
cudamemcyptosymbol
并在
Main.cu
中调用包装器,并在其中传递所需的常量内存值。

c_行的位置和方式是
c_cols
c_numCo>LOR
d\u change
声明?它们位于包含在主函数所在位置的头文件中。声明是
\uuuu constant\uuuuuu int c\u rows
\uuuu constant\uuuuu int c\u cols
实际上,问题不是在编译时而是在运行时。似乎找不到任何问题
cudaMemcpyToSymbol
。这些
\uuuu常量\uuuuu
整数是否用于任何其他内核?我似乎也没有发现任何错误。是的,此
\uuu常量\uuuuu
值用于其他内核。检查
readText2RGB
是否在
h\u行
h\u cols
中返回正确的值
#include "Utilities.cuh"

int writeGrayscale2Text(const std::string filename, const Grayscale *image, const int rows, const int cols){    
    std::ofstream fileWriter(filename.c_str());
    if (!fileWriter.is_open()) {
        std::cerr << "** writeGrayscale2Text() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileWriter << rows << "\n";
    fileWriter << cols << "\n";
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            fileWriter << (int)image[i*cols+j] << "\n";
        }
    }
    fileWriter.close();
    return SUCCESS;
}

int readText2Grayscale(const std::string filename, Grayscale **image, int *rows, int *cols){
    std::ifstream fileReader(filename.c_str());
    if (!fileReader.is_open()) {
        std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileReader >> *rows;
    fileReader >> *cols;
    *image = new Grayscale[(*rows)*(*cols)];
    int value;
    for (int i = 0; i < *rows; i++) {
        for (int j = 0; j < *cols; j++) {
            fileReader >> value;
            (*image)[i*(*cols)+j] = (Grayscale)value;
        }
    }
    fileReader.close();
    return SUCCESS;
}

int writeRGB2Text(const std::string filename, const Color *image, const int rows, const int cols){
    std::ofstream fileWriter(filename.c_str());
    if (!fileWriter.is_open()) {
        std::cerr << "** writeRGB2Text() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileWriter << rows << "\n";
    fileWriter << cols << "\n";
    for (int k = 0; k < 3; k++) {
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                switch (k) {
                    case 0:
                        fileWriter << image[i*cols+j].r << "\n";
                        break;
                    case 1:
                        fileWriter << image[i*cols+j].g << "\n";
                        break;
                    case 2:
                        fileWriter << image[i*cols+j].b << "\n";
                        break;
                }
            }
        }
    }
    fileWriter.close();
    return SUCCESS;
}

int readText2RGB(const std::string filename, Color **image, int *rows, int *cols){
    std::ifstream fileReader(filename.c_str());
    if (!fileReader.is_open()) {
        std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
        return FAILURE;
    }
    fileReader >> *rows;
    fileReader >> *cols;
    *image = new Color[(*rows)*(*cols)];
    for (int k = 0; k < 3; k++) {
        for (int i = 0; i < *rows; i++) {
            for (int j = 0; j < *cols; j++) {
                switch (k) {
                    case 0:
                        fileReader >> (*image)[i*(*cols)+j].r;
                        break;
                    case 1:
                        fileReader >> (*image)[i*(*cols)+j].g;
                        break;
                    case 2:
                        fileReader >> (*image)[i*(*cols)+j].b;
                        break;
                }
            }
        }
    }
    fileReader.close();
    return SUCCESS;
}