Opengl cudaFree-无效的设备指针错误_Opengl_Cuda

Opengl cudaFree-无效的设备指针错误

opengl cuda

Opengl cudaFree-无效的设备指针错误,opengl,cuda,Opengl,Cuda,我正在尝试释放我在CUDA+OpenGL互操作代码中分配的设备内存dev_inp。在进行错误检查时，我收到无效设备指针错误，程序在cudaFree（dev_inp）停止执行在我的renderScene（）函数末尾调用。一切正常，但我担心内存泄漏问题: a。为什么我无法释放已分配的本地设备内存？我从像素缓冲区对象中取消映射cuda_资源，并取消注册该资源从CUDA C编程指南的第B.17节：无法使用运行时释放通过malloc（）分配的内存（即通过调用第3.2.2节中的任何可用内存函数）。这

我正在尝试释放我在CUDA+OpenGL互操作代码中分配的设备内存

dev_inp

。在进行错误检查时，我收到

无效设备指针

错误，程序在

cudaFree（dev_inp）停止执行在我的renderScene（）
函数末尾调用。一切正常，但我担心内存泄漏
问题:
a。为什么我无法释放已分配的本地设备内存？我从像素缓冲区对象中取消映射cuda_资源
，并取消注册该资源
从CUDA C编程指南的第B.17节：
无法使用运行时释放通过malloc（）分配的内存（即通过调用第3.2.2节中的任何可用内存函数）。

这就引出了另外两个问题：
b。我在内核中没有malloc
ed内存，因为我没有。因此，使用cudaFree
功能（技术上）应该可以在这里工作，对吗？是由程序员释放提供给本地定义指针的内存，还是nvcc编译器在程序退出或超出本地范围时负责释放？我不希望我的代码中出现内存泄漏，所以我觉得通过释放以前分配的内存会更安全
c。在renderScene（）
函数末尾调用cudaDeviceReset（）是否谨慎，以便销毁主CUDA上下文（以及其变量和指针，根据CUDA C编程指南）？我看到NVidia Visual Profiler文档也提到了这一点：
当我调用它时，渲染似乎比平常慢。如果我能简单地cudaFree
这里的内存就好了，但我似乎无法让它工作
完整代码：
#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )

GLuint tex; 
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;    

PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

// ==========================================================================================
// CUDA ERROR CHECKING CODE
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) getchar();
   }
}

// ==========================================================================================

void initCUDADevice() { 

    gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));    

}

// ==========================================================================================

void changeSize(int w, int h) {

    //cudaDeviceReset();
    //initCUDADevice();

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();

    // Prevent a divide by zero, when window is too short
    // (you cant make a window of zero width).
    if (h == 0)
        h = 1;

    float ratio =  w * 1.0 / h;

    // Use the Projection Matrix
    glMatrixMode(GL_PROJECTION);

    // Reset Matrix
    //glLoadIdentity();

    //// Set the viewport to be the entire window
    glViewport(0, 0, w, h);

    //// Get Back to the Modelview
    glMatrixMode(GL_MODELVIEW);
}

// ==========================================================================================

void renderScene(void) {

    // Clear Color and Depth Buffers
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    // Reset transformations
    glLoadIdentity();

    // ====================================================================================
    // initiate GPU by setting it correctly 
    //initCUDADevice(); 

    // ====================================================================================
    // read the image that needs to be textured 

    Mat image, flipped;
    image = imread("K:/Ultrasound experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE);   // Read the file from disk

    if(!image.data)                              // Check for invalid input
    {
        cout <<  "Could not open or find the image" << std::endl ;


    }

    cv::flip(image, flipped, 0);

    imshow("OpenCV - image", image);    // displays output

    // ====================================================================================
    // allocate the PBO, texture, and CUDA resource

    glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

    // ====================================================================================
    // generate the pixel buffer object (PBO)

    // Generate a buffer ID called a PBO (Pixel Buffer Object)
    glGenBuffers(1, &pbo);

    // Make this the current UNPACK buffer (OpenGL is state-based)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);

    // Allocate data for the buffer. 4-channel 8-bit image
    glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

    gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone)); 

    // ====================================================================================
    // create the texture object 

    // enable 2D texturing
    glEnable(GL_TEXTURE_2D);

    // generate and bind the texture    
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    // put flipped.data at the end for cpu rendering 
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,  image.cols, image.rows,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );

    // put tex at the end for cpu rendering 
    glBindTexture(GL_TEXTURE_2D, 0);

    // ====================================================================================
    // copy OpenCV flipped image data into the device pointer

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    unsigned char *dev_inp; 

    gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

    gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

    size_t size; 
    gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

    gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

    gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) ); 

    // ====================================================================================
    // bind pbo and texture to render data now 

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
    //
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
    gpuErrchk( cudaThreadSynchronize());

    //gpuErrchk(cudaFree(dev_inp));

    // ====================================================================================
    // map the texture coords to the vertex coords 

    glBegin(GL_QUADS);
    // Front Face
    glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);  // Bottom Left Of The Texture and Quad
    glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);  // Bottom Right Of The Texture and Quad
    glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);  // Top Right Of The Texture and Quad
    glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);  // Top Left Of The Texture and Quad

    glEnd();

    glFlush();  // force rendering

    glDisable(GL_TEXTURE_2D);

    //glutSwapBuffers();
    gpuErrchk(cudaFree(dev_inp));        // <--- Error here
    //cudaGraphicsUnregisterResource(cuda_resource);

}


// ==========================================================================================


int main(int argc, char **argv) {


    // init GLUT and create window
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_DEPTH | GLUT_RGB );
    glutInitWindowPosition(100,100);
    glutInitWindowSize(1024,256);
    glutCreateWindow("CUDA + OpenGL interop");


    // register callbacks
    glutDisplayFunc(renderScene);
    glutReshapeFunc(changeSize);
    //glutIdleFunc(renderScene);

    // enter GLUT event processing cycle
    glutMainLoop();

    return 1;
}

#定义获取过程地址（str）wglGetProcAddress（str）
胶合特克斯；
胶合pbo；
结构cudaGraphicsResource*cuda_资源；
PFNGLBINDBUFFERARBPROC glBindBuffer=NULL；
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers=NULL；
PFNGLGENBUFFERSARBPROC GLGENBULTS=NULL；
PFNGLBUFFERDATAARBPROC glBufferData=NULL；
// ==========================================================================================
//CUDA错误检查代码
#定义gpuerchk（ans）{gpuAssert（（ans），\u文件uuuuuuuuuuu LINE_uuu}
内联void gpuAssert（cudaError\u t代码，char*文件，int行，bool abort=true）
{
如果（代码！=cudaSuccess）
{
fprintf（标准，“GPUassert:%s%s%d\n”，cudaGetErrorString（代码）、文件、行）；
if（abort）getchar（）；
}
}
// ==========================================================================================
void initCUDADevice（）{
gpuErrchk（cudaglestgldevice（cutGetMaxGflopsDeviceId（））；
}
// ==========================================================================================
无效更改大小（整数w、整数h）{
//cudaDeviceReset（）；
//initCUDADevice（）；
glClear（GL_颜色_缓冲_位| GL_深度_缓冲_位）；
glLoadIdentity（）；
//当窗口太短时，防止被零除
//（您不能制作零宽度的窗口）。
如果（h==0）
h=1；
浮动比率=w*1.0/h；
//使用投影矩阵
glMatrixMode（GL_投影）；
//复位矩阵
//glLoadIdentity（）；
////将视口设置为整个窗口
glViewport（0,0,w,h）；
////回到模型视图
glMatrixMode（GLU模型视图）；
}
// ==========================================================================================
void renderScene（void）{
//清晰的颜色和深度缓冲区
glClear（GL_颜色_缓冲_位| GL_深度_缓冲_位）；
//重置变换
glLoadIdentity（）；
// ====================================================================================
//通过正确设置来启动GPU
//initCUDADevice（）；
// ====================================================================================
//读取需要进行纹理处理的图像
垫图像，翻转；
image=imread（“K:/超声波实验图像/PA_175.png”，CV_LOAD_image_GRAYSCALE）；//从磁盘读取文件
if（！image.data）//检查输入是否无效
{
cout这一行不是必需的，应该从代码中删除：
gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

此行创建设备分配，并将该分配的指针分配给dev\u inp

问题出现在这里：
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

此行获取一个从cuda_资源
对象派生到另一个不同分配的新指针，并将该指针放入dev_inp
，覆盖先前分配的指针（来自cudamaloc
）。此行中获取的新指针已具有基础设备分配。此时不需要单独/额外为其分配
此时，如果您尝试释放dev_inp
：
gpuErrchk(cudaFree(dev_inp));        // <--- Error here

这意味着相应的cudaFree
操作也应被取消：
gpuErrchk(cudaFree(dev_inp));        // <--- Error here

gpuErrchk（cudaFree（dev_inp））；//我明白了，现在它有意义了，谢谢！但是，如果cudaGraphicsResourceGetMappedPointer（）
根据文档返回指向cuda_资源的指针，我可以释放该内存吗？是否可以通过调用cudaGraphicsUnregisterResource（）来释放内存
？您不想取消分配内存。底层资源是OpenGL资源（在本例中），您也不想在CUDA代码中取消分配。它应该从OpenGL进行管理。具体来说，研究的定义和行为，因为指针（由cudaGraphicsResourceGetMappedPointer
放置在dev_inp
中的）未通过调用cudamaloc
分配，您无法通过调用cudaFree释放它。哦，
gpuErrchk(cudaFree(dev_inp));        // <--- Error here