Opengl cudaFree-无效的设备指针错误
我正在尝试释放我在CUDA+OpenGL互操作代码中分配的设备内存Opengl cudaFree-无效的设备指针错误,opengl,cuda,Opengl,Cuda,我正在尝试释放我在CUDA+OpenGL互操作代码中分配的设备内存dev_inp。在进行错误检查时,我收到无效设备指针错误,程序在cudaFree(dev_inp)停止执行在我的renderScene()函数末尾调用。一切正常,但我担心内存泄漏 问题: a。为什么我无法释放已分配的本地设备内存?我从像素缓冲区对象中取消映射cuda_资源,并取消注册该资源 从CUDA C编程指南的第B.17节: 无法使用运行时释放通过malloc()分配的内存(即通过调用第3.2.2节中的任何可用内存函数)。 这
dev_inp
。在进行错误检查时,我收到无效设备指针
错误,程序在cudaFree(dev_inp)停止执行代码>在我的renderScene()
函数末尾调用。一切正常,但我担心内存泄漏
问题:
a。为什么我无法释放已分配的本地设备内存?我从像素缓冲区对象中取消映射cuda_资源
,并取消注册该资源
从CUDA C编程指南的第B.17节:
无法使用运行时释放通过malloc()分配的内存(即通过调用第3.2.2节中的任何可用内存函数)。
这就引出了另外两个问题:
b。我在内核中没有malloc
ed内存,因为我没有。因此,使用cudaFree
功能(技术上)应该可以在这里工作,对吗?是由程序员释放提供给本地定义指针的内存,还是nvcc编译器在程序退出或超出本地范围时负责释放?我不希望我的代码中出现内存泄漏,所以我觉得通过释放以前分配的内存会更安全
c。在renderScene()
函数末尾调用cudaDeviceReset()
是否谨慎,以便销毁主CUDA上下文(以及其变量和指针,根据CUDA C编程指南)?我看到NVidia Visual Profiler文档也提到了这一点:
当我调用它时,渲染似乎比平常慢。如果我能简单地cudaFree
这里的内存就好了,但我似乎无法让它工作
完整代码:
#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )
GLuint tex;
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
// ==========================================================================================
// CUDA ERROR CHECKING CODE
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) getchar();
}
}
// ==========================================================================================
void initCUDADevice() {
gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));
}
// ==========================================================================================
void changeSize(int w, int h) {
//cudaDeviceReset();
//initCUDADevice();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
// Prevent a divide by zero, when window is too short
// (you cant make a window of zero width).
if (h == 0)
h = 1;
float ratio = w * 1.0 / h;
// Use the Projection Matrix
glMatrixMode(GL_PROJECTION);
// Reset Matrix
//glLoadIdentity();
//// Set the viewport to be the entire window
glViewport(0, 0, w, h);
//// Get Back to the Modelview
glMatrixMode(GL_MODELVIEW);
}
// ==========================================================================================
void renderScene(void) {
// Clear Color and Depth Buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Reset transformations
glLoadIdentity();
// ====================================================================================
// initiate GPU by setting it correctly
//initCUDADevice();
// ====================================================================================
// read the image that needs to be textured
Mat image, flipped;
image = imread("K:/Ultrasound experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE); // Read the file from disk
if(!image.data) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
}
cv::flip(image, flipped, 0);
imshow("OpenCV - image", image); // displays output
// ====================================================================================
// allocate the PBO, texture, and CUDA resource
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// ====================================================================================
// generate the pixel buffer object (PBO)
// Generate a buffer ID called a PBO (Pixel Buffer Object)
glGenBuffers(1, &pbo);
// Make this the current UNPACK buffer (OpenGL is state-based)
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
// Allocate data for the buffer. 4-channel 8-bit image
glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone));
// ====================================================================================
// create the texture object
// enable 2D texturing
glEnable(GL_TEXTURE_2D);
// generate and bind the texture
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
// put flipped.data at the end for cpu rendering
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, image.cols, image.rows, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );
// put tex at the end for cpu rendering
glBindTexture(GL_TEXTURE_2D, 0);
// ====================================================================================
// copy OpenCV flipped image data into the device pointer
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
unsigned char *dev_inp;
gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );
gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );
size_t size;
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );
gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );
gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) );
// ====================================================================================
// bind pbo and texture to render data now
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
//
glBindTexture(GL_TEXTURE_2D, tex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
gpuErrchk( cudaThreadSynchronize());
//gpuErrchk(cudaFree(dev_inp));
// ====================================================================================
// map the texture coords to the vertex coords
glBegin(GL_QUADS);
// Front Face
glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f, 1.0f); // Bottom Left Of The Texture and Quad
glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f, 1.0f); // Bottom Right Of The Texture and Quad
glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f, 1.0f, 1.0f); // Top Right Of The Texture and Quad
glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f, 1.0f, 1.0f); // Top Left Of The Texture and Quad
glEnd();
glFlush(); // force rendering
glDisable(GL_TEXTURE_2D);
//glutSwapBuffers();
gpuErrchk(cudaFree(dev_inp)); // <--- Error here
//cudaGraphicsUnregisterResource(cuda_resource);
}
// ==========================================================================================
int main(int argc, char **argv) {
// init GLUT and create window
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DEPTH | GLUT_RGB );
glutInitWindowPosition(100,100);
glutInitWindowSize(1024,256);
glutCreateWindow("CUDA + OpenGL interop");
// register callbacks
glutDisplayFunc(renderScene);
glutReshapeFunc(changeSize);
//glutIdleFunc(renderScene);
// enter GLUT event processing cycle
glutMainLoop();
return 1;
}
#定义获取过程地址(str)wglGetProcAddress(str)
胶合特克斯;
胶合pbo;
结构cudaGraphicsResource*cuda_资源;
PFNGLBINDBUFFERARBPROC glBindBuffer=NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers=NULL;
PFNGLGENBUFFERSARBPROC GLGENBULTS=NULL;
PFNGLBUFFERDATAARBPROC glBufferData=NULL;
// ==========================================================================================
//CUDA错误检查代码
#定义gpuerchk(ans){gpuAssert((ans),\u文件uuuuuuuuuuu LINE_uuu}
内联void gpuAssert(cudaError\u t代码,char*文件,int行,bool abort=true)
{
如果(代码!=cudaSuccess)
{
fprintf(标准,“GPUassert:%s%s%d\n”,cudaGetErrorString(代码)、文件、行);
if(abort)getchar();
}
}
// ==========================================================================================
void initCUDADevice(){
gpuErrchk(cudaglestgldevice(cutGetMaxGflopsDeviceId());
}
// ==========================================================================================
无效更改大小(整数w、整数h){
//cudaDeviceReset();
//initCUDADevice();
glClear(GL_颜色_缓冲_位| GL_深度_缓冲_位);
glLoadIdentity();
//当窗口太短时,防止被零除
//(您不能制作零宽度的窗口)。
如果(h==0)
h=1;
浮动比率=w*1.0/h;
//使用投影矩阵
glMatrixMode(GL_投影);
//复位矩阵
//glLoadIdentity();
////将视口设置为整个窗口
glViewport(0,0,w,h);
////回到模型视图
glMatrixMode(GLU模型视图);
}
// ==========================================================================================
void renderScene(void){
//清晰的颜色和深度缓冲区
glClear(GL_颜色_缓冲_位| GL_深度_缓冲_位);
//重置变换
glLoadIdentity();
// ====================================================================================
//通过正确设置来启动GPU
//initCUDADevice();
// ====================================================================================
//读取需要进行纹理处理的图像
垫图像,翻转;
image=imread(“K:/超声波实验图像/PA_175.png”,CV_LOAD_image_GRAYSCALE);//从磁盘读取文件
if(!image.data)//检查输入是否无效
{
cout这一行不是必需的,应该从代码中删除:
gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );
此行创建设备分配,并将该分配的指针分配给dev\u inp
问题出现在这里:
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );
此行获取一个从cuda_资源
对象派生到另一个不同分配的新指针,并将该指针放入dev_inp
,覆盖先前分配的指针(来自cudamaloc
)。此行中获取的新指针已具有基础设备分配。此时不需要单独/额外为其分配
此时,如果您尝试释放dev_inp
:
gpuErrchk(cudaFree(dev_inp)); // <--- Error here
这意味着相应的cudaFree
操作也应被取消:
gpuErrchk(cudaFree(dev_inp)); // <--- Error here
gpuErrchk(cudaFree(dev_inp));//我明白了,现在它有意义了,谢谢!但是,如果cudaGraphicsResourceGetMappedPointer()
根据文档返回指向cuda_资源的指针,我可以释放该内存吗?是否可以通过调用cudaGraphicsUnregisterResource()来释放内存
?您不想取消分配内存。底层资源是OpenGL资源(在本例中),您也不想在CUDA代码中取消分配。它应该从OpenGL进行管理。具体来说,研究的定义和行为,因为指针(由cudaGraphicsResourceGetMappedPointer
放置在dev_inp
中的)未通过调用cudamaloc
分配,您无法通过调用cudaFree
释放它。哦,
gpuErrchk(cudaFree(dev_inp)); // <--- Error here