Memory leaks AMD Codexl评测:检测到由ClenqueEndRangeKernel创建的[Ref=1]对象存在Opencl内存泄漏

Memory leaks AMD Codexl评测:检测到由ClenqueEndRangeKernel创建的[Ref=1]对象存在Opencl内存泄漏,memory-leaks,opencl,gpu,amd-processor,Memory Leaks,Opencl,Gpu,Amd Processor,我不知道为什么我一直收到这个警告,因为我已经阅读了很多次代码,我不知道这是从哪里来的,因为我很确定(但显然不是)释放了所有的内存。希望有人知道的比我多,可以看看我的代码,并指出发生这种情况的地方和原因 谢谢 int runKernel( Image *anImage, PixelPacket *imagePixels, MagickSizeType imageSizeBytes, const char *k

我不知道为什么我一直收到这个警告,因为我已经阅读了很多次代码,我不知道这是从哪里来的,因为我很确定(但显然不是)释放了所有的内存。希望有人知道的比我多,可以看看我的代码,并指出发生这种情况的地方和原因

谢谢

int runKernel( Image *anImage, 
              PixelPacket *imagePixels, 
              MagickSizeType imageSizeBytes, 
              const char *kernelSource )
{

    cl_context myContext ;
    cl_command_queue myQueue ;
    cl_mem *outputImage ;
    cl_event clEvent ;
    int bitsPerChannel = anImage[0].depth ; 
    int width = anImage[0].columns ;
    int height = anImage[0].rows ;



    /****************************
    Setup the Opencl environment
    ****************************/


    // Use this to check the output of each API call
    cl_int status ;

    // Retrieve the number of platforms
    cl_uint numPlatforms = 0 ;
    status = clGetPlatformIDs( 0, NULL, &numPlatforms ) ;

    // Allocate enough space for each platform
    cl_platform_id *platforms = NULL ;
    platforms = (cl_platform_id *) malloc( numPlatforms * sizeof(cl_platform_id) ) ;

    // Fill in the platforms
    status = clGetPlatformIDs( numPlatforms, platforms, NULL ) ;

    // Retrieve the number of devices for the 1st platform
    cl_uint numDevices = 0 ;
    status = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices ) ;

    // Allocate enough space for each device
    cl_device_id *devices ;
    devices = (cl_device_id *) malloc( numDevices * sizeof(cl_device_id) ) ;

    // Fill in the devices
    status = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, numDevices, 
                devices, NULL ) ;

    // Create the context
    myContext = clCreateContext( NULL, numDevices, devices, NULL, NULL, &status ) ;

    // Create the command queue with the 1st device
    myQueue = clCreateCommandQueue( myContext, devices[0], 0, &status ) ;



    /****************************
    Create Images and Move Data
    ****************************/



    // Set format and descriptor to proper values according to image type
    cl_image_format *image_format = NULL ;
    cl_image_desc *image_desc = NULL ;
    get_cl_image_format( anImage, &image_desc, &image_format ) ;

    // Create the image sampler
    cl_sampler clSampler = clCreateSampler(
                            myContext,
                            CL_FALSE, //use pixel based addressing not normalized
                            CL_ADDRESS_CLAMP_TO_EDGE, // set equal to the pixel at the edge of the image
                            CL_FILTER_NEAREST, 
                            &status);

    // Set input Image region parameters
    size_t origin[3] = {0, 0, 0} ; // Offset within the image to copy from
    size_t region[3] = {width, height, 1} ; // Elements per dimension for 2d image 


    // Create cl memory object for the input image
    cl_mem_flags flagsRead = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR ;

    cl_mem clInput = clCreateImage( myContext, flagsRead, 
                                ( const cl_image_format *)image_format, 
                                ( const cl_image_desc *)image_desc, 
                                imagePixels, 
                                &status ) ;


    // Allocate space for output image and create cl memory object 
    float *outputPixels =  (float *) malloc( imageSizeBytes ) ;
    cl_mem_flags flagsWrite = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR ;

    cl_mem clOutput = clCreateImage( myContext, flagsWrite, 
                                    (const cl_image_format *)image_format, 
                                    ( const cl_image_desc *)image_desc, 
                                    outputPixels, 
                                    &status ) ;


    //Copy input image to the device
    status = clEnqueueWriteImage( myQueue, clInput, CL_FALSE, origin, region,
                                0, 0, imagePixels, 0, NULL, NULL ) ;



    /*****************************
    Compile the kernel from source
    *****************************/



    // kernelSource stores the kernel code and must be NULL terminated
    cl_program myProgram = clCreateProgramWithSource( myContext, 1, 
                                                    &kernelSource, 
                                                    NULL, 
                                                    &status ) ;

    // Compile the program  
    const char buildOptions[] = "-cl-std=CL1.2 -cl-mad-enable\0";
    status = clBuildProgram( myProgram, 1, devices, buildOptions, NULL, NULL ) ;

    // Create the kernel
    cl_kernel myKernel = clCreateKernel( myProgram, "convolution", &status ) ;



    /**********************************
    Set kernel args and run the program
    **********************************/



    // Set the kernel arguments

    clSetKernelArg( myKernel, 0, sizeof( cl_mem ), &clInput ) ;
    clSetKernelArg( myKernel, 1, sizeof( cl_mem ), &clOutput ) ;
    clSetKernelArg( myKernel, 2, sizeof( int ), &height ) ;
    clSetKernelArg( myKernel, 3, sizeof( int ), &width ) ;
    clSetKernelArg( myKernel, 4, sizeof( cl_sampler ), &clSampler ) ;

    //Execute the kernel
    status = clEnqueueTask( myQueue, myKernel, 0, NULL, NULL ) ;

    //Read the output buffer back to the host
    status = clEnqueueReadImage( myQueue, clOutput, CL_TRUE, origin, region, 0, 0, 
                               (void *) outputPixels, 0, NULL, &clEvent ) ;



    /**********************************
    Free Resources
    **********************************/



    /* Wait for the kernel to finish */ 
    clWaitForEvents( 1, &clEvent ) ;

    free( refImage ) ;
    free( platforms ) ;
    free( devices ) ;
    free( outputPixels ) ;
    free( image_desc ) ;
    free( image_format ) ;

    clReleaseSampler( clSampler ) ;
    clReleaseMemObject( clInput ) ;
    clReleaseMemObject( clOutput ) ;    
    clReleaseProgram( myProgram ) ;
    clReleaseCommandQueue( myQueue ) ;
    clReleaseKernel( myKernel ) ;   
    clReleaseContext( myContext ) ; 
    clReleaseEvent( clEvent ) ;

    return 0;
}

在释放事件之前销毁队列、内核、上下文或程序(
clEvent
)可能会导致警告。您可以尝试以下方法:

clReleaseEvent( clEvent ) ; // <<< THIS ONE FIRST
clReleaseSampler( clSampler ) ;
clReleaseMemObject( clInput ) ;
clReleaseMemObject( clOutput ) ;    
clReleaseProgram( myProgram ) ;
clReleaseCommandQueue( myQueue ) ;
clReleaseKernel( myKernel ) ;   
clReleaseContext( myContext ) ; 

clreleasevent(clEvent);//您的错误消息表明在ClenqueueEndRangeKernel中发生内存泄漏。除非我是瞎子,否则你能找到的最接近的就是克伦奎茨克,它可能叫克伦奎恩德兰格内尔。在完成tim提出的更正后,请注释掉clEnqueueTask行,并查看内存泄漏是否仍然存在。如果它不存在,那么我怀疑cl_事件对象正在clEnqueueTask中创建,即使您为返回事件传递NULL!我添加了clEvent来代替NULL,然后立即释放它,它解决了泄漏问题。谢谢我想知道这是否构成了驱动程序中的错误?你所做的完全正确。“是克伦奎茨克泄露了事件,不是你。”蒂姆,我同意。我对规范的理解是,当为事件参数传递NULL时,所有函数都不应创建事件。作为将来的参考,OP已经在AMD的开发者论坛上发布了这篇文章。嘿,蒂姆,我试过了,但是没有效果。顺序似乎并不重要,因为我在最后尝试了一次,它仍然显示相同的东西,但感谢您的尝试!我不确定CL规范是怎么说的,但一般来说,如果在其他实现上运行此规范,对销毁顺序保持敏感可能是一个好主意。AMD的实施允许任何破坏命令,这是值得称赞的!clRelease*函数递减参考计数器。一旦该计数器达到0,并且附加到要释放的对象的任何对象都已删除,则该对象将被删除。然后,一个正确编写的实现应该在内部对对象的删除顺序进行排序,而不管您释放它们的顺序如何,但必须在删除它们之前释放它们。