C/CUDA-需要帮助编写一个程序来将图像存储在缓冲区中_C_Cuda

C/CUDA-需要帮助编写一个程序来将图像存储在缓冲区中

c cuda

C/CUDA-需要帮助编写一个程序来将图像存储在缓冲区中,c,cuda,C,Cuda,我是一个新的CUDA编程，我需要在编写一个程序，以存储在内存缓冲区的图像帮助。我尝试修改CUDA OpenGL互操作示例中的代码（在CUDA By example一书中给出），将两个图像一个接一个地存储在缓冲区中。如果我试图避免无限循环，但不确定是否成功，我应该如何编写程序？如果您能帮助您编写正确的程序，我们将不胜感激 #include "book.h" #include "cpu_bitmap.h" #include "cuda.h" #include <cuda_gl_interop.

我是一个新的CUDA编程，我需要在编写一个程序，以存储在内存缓冲区的图像帮助。我尝试修改CUDA OpenGL互操作示例中的代码（在CUDA By example一书中给出），将两个图像一个接一个地存储在缓冲区中。如果我试图避免无限循环，但不确定是否成功，我应该如何编写程序？如果您能帮助您编写正确的程序，我们将不胜感激

#include "book.h"
#include "cpu_bitmap.h"
#include "cuda.h"
#include <cuda_gl_interop.h>

PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
 if (code != cudaSuccess) 
{
   fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
   if (abort) system ("pause");
}
}

#define     DIM    512

#define IMAGESIZE_MAX (DIM*DIM) 

GLuint  bufferObj;
cudaGraphicsResource *resource;

// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png

__global__ void kernel( uchar4 *ptr1) 
{
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x ;

    // now calculate the value at that position
    float fx = x/(float)DIM - 0.5f;
    float fy = y/(float)DIM - 0.5f;
    unsigned char   green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );

    // accessing uchar4 vs unsigned char*
    ptr1[offset].x = 0;
    ptr1[offset].y = green;
    ptr1[offset].z = 0;
    ptr1[offset].w = 255;    

}

__global__ void kernel2( uchar4 *ptr2) 
{
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x ;

    // now calculate the value at that position
    float fx = x/(float)DIM - 0.5f;
    float fy = y/(float)DIM - 0.5f;
    unsigned char   green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );
    unsigned char orange = 1000; 
    // accessing uchar4 vs unsigned char*
    ptr2[offset].x = orange;
    ptr2[offset].y = green;
    ptr2[offset].z = 0;
    ptr2[offset].w = 255;

}

__global__ void copy ( uchar4 *pBuffer, uchar4 *Ptr )
{

   int x = threadIdx.x + blockIdx.x * blockDim.x;
   int y = threadIdx.y + blockIdx.y * blockDim.y;
   int idx = x + y * blockDim.x * gridDim.x ;
   while ( idx != DIM*DIM)
   {
    pBuffer[idx] = Ptr[idx] ;
    __syncthreads();

    }

}    

__global__ void copy2 ( uchar4 *pBuffer, uchar4 *Ptr2 )
{  
int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int idx = x + y * blockDim.x * gridDim.x ;
    int bdx = idx;

    while ( (idx < DIM*DIM) && (bdx < DIM*DIM) )
    {
   uchar4 temp = Ptr2[bdx];
   __syncthreads();

   pBuffer[idx+4] = temp;
   __syncthreads();

   if ((idx==DIM*DIM) && (bdx==DIM*DIM))
    {
     break;
    }
    }  


}



void key_func( unsigned char key, int x, int y ) {
    switch (key) {
     case 27:
        // clean up OpenGL and CUDA
        ( cudaGraphicsUnregisterResource( resource ) );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
        glDeleteBuffers( 1, &bufferObj );
        exit(0);
    }
}

void draw_func( void ) {
    // we pass zero as the last parameter, because out bufferObj is now
    // the source, and the field switches from being a pointer to a
    // bitmap to now mean an offset into a bitmap object
    glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
    glutSwapBuffers();
    }


    int main( int argc, char **argv ) {
    cudaDeviceProp  prop;
    int dev;

    (memset( &prop, 0, sizeof( cudaDeviceProp ) ));
    prop.major = 1;
    prop.minor = 0;
    HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );

    // tell CUDA which dev we will be using for graphic interop
    // from the programming guide:  Interoperability with OpenGL
    //     requires that the CUDA device be specified by
    //     cudaGLSetGLDevice() before any other runtime calls.

    HANDLE_ERROR(  cudaGLSetGLDevice( dev ) );

    // these GLUT calls need to be made before the other OpenGL
    // calls, else we get a seg fault
    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
    glutInitWindowSize( DIM, DIM );
    glutCreateWindow( "bitmap" );

    glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

    // the first three are standard OpenGL, the 4th is the CUDA reg 
    // of the bitmap these calls exist starting in OpenGL 1.5
    glGenBuffers( 1, &bufferObj );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
    glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4 ,
              NULL, GL_DYNAMIC_DRAW_ARB );

// REGISTER THE GL BufferObj and CUDA Resource

    HANDLE_ERROR(( cudaGraphicsGLRegisterBuffer( &resource, 
                                  bufferObj, 
                                  cudaGraphicsMapFlagsNone ) ));

    // do work with the memory dst being on the GPU, gotten via mapping
    HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );


    uchar4* devPtr;
    size_t  size = DIM*DIM;
    size_t  sizet = 2*DIM*DIM;

    gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr,  size)); 

    uchar4 *devPtr2; 

    gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr2,  size)); 

uchar4 *pBuffer;

gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer,  size));

uchar4 *pBufferCurrent;

gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer,  size));


uchar4 *pBufferImage;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBufferImage,  sizet));

    // REGISTER THE C BUFFER and CUDA Resource
    HANDLE_ERROR( cudaGraphicsResourceGetMappedPointer( (void**)&pBufferImage,  
                                          &size, 
                                          resource) );

    dim3    grids(DIM/16,DIM/16);
    dim3    threads(16,16);
    kernel<<<grids,threads>>>( devPtr );
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );

    kernel2<<<grids,threads>>>(devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );    
    int a = 1;
do 
{


if (a==1)
{
copy<<< 512, 512>>>(pBufferImage, devPtr);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}

if(a==2)
{
copy2<<< 512, 512>>>(pBufferImage, devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
a++;

} while (a<=2); 

HANDLE_ERROR ( cudaGraphicsUnmapResources( 1, &resource, NULL ) );

// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();

#包括“book.h”
#包括“cpu_bitmap.h”
#包括“cuda.h”
#包括
PFNGLBINDBUFFERARBPROC glBindBuffer=NULL；
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers=NULL；
PFNGLGENBUFFERSARBPROC GLGENBULTS=NULL；
PFNGLBUFFERDATAARBPROC glBufferData=NULL；
#定义gpuerchk（ans）{gpuAssert（（ans），_文件_，_行__）}
内联void gpuAssert（cudaError\u t代码，char*文件，int行，bool abort=true）
{
如果（代码！=cudaSuccess）
{
fprintf（标准，“GPUassert:%s%s%d\n”，cudaGetErrorString（代码）、文件、行）；
如果（中止）系统（“暂停”）；
}
}
#定义尺寸512
#定义图像大小_最大值（尺寸*尺寸）
GLuint bufferObj；
cudaGraphicsResource*资源；
//基于ripple代码，但使用uchar4，这是一种数据类型
//图形交互使用。请参见屏幕截图-basic2.png
__全局无效内核（uchar4*ptr1）
{
//从threadIdx/BlockIdx映射到像素位置
int x=threadIdx.x+blockIdx.x*blockDim.x；
int y=线程IDX.y+块IDX.y*块DIM.y；
int offset=x+y*blockDim.x*gridDim.x；
//现在计算该位置的值
浮动汇率=x/（浮动）尺寸-0.5f；
浮动fy=y/（浮动）尺寸-0.5f；
无符号字符绿色=128+127*tan（绝对值（fx*100）-绝对值（fy*100））；
//访问uchar4与无符号字符*
ptr1[offset].x=0；
ptr1[offset].y=绿色；
ptr1[offset].z=0；
ptr1[offset].w=255；
}
__全局无效内核2（uchar4*ptr2）
{
//从threadIdx/BlockIdx映射到像素位置
int x=threadIdx.x+blockIdx.x*blockDim.x；
int y=线程IDX.y+块IDX.y*块DIM.y；
int offset=x+y*blockDim.x*gridDim.x；
//现在计算该位置的值
浮动汇率=x/（浮动）尺寸-0.5f；
浮动fy=y/（浮动）尺寸-0.5f；
无符号字符绿色=128+127*tan（绝对值（fx*100）-绝对值（fy*100））；
无符号字符橙色=1000；
//访问uchar4与无符号字符*
ptr2[offset].x=橙色；
ptr2[offset].y=绿色；
ptr2[offset].z=0；
ptr2[offset].w=255；
}
__全局无效副本（uchar4*pBuffer，uchar4*Ptr）
{
int x=threadIdx.x+blockIdx.x*blockDim.x；
int y=线程IDX.y+块IDX.y*块DIM.y；
int idx=x+y*blockDim.x*gridDim.x；
while（idx！=DIM*DIM）
{
pBuffer[idx]=Ptr[idx]；
__同步线程（）；
}
}    
__全局无效副本2（uchar4*pBuffer，uchar4*Ptr2）
{  
int x=threadIdx.x+blockIdx.x*blockDim.x；
int y=线程IDX.y+块IDX.y*块DIM.y；
int idx=x+y*blockDim.x*gridDim.x；
int bdx=idx；
而（（idx#include "book.h"
#include "cpu_bitmap.h"

//#include "cuda.h"
#include <cuda_gl_interop.h>

int which_image;
PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

#define     DIM    512

GLuint  bufferObj;
cudaGraphicsResource *resource;

dim3    mgrids(DIM/16,DIM/16);
dim3    mthreads(16,16);

// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png
__global__ void kernel_gr( uchar4 *ptr ) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;

    // now calculate the value at that position
    float fx = x/(float)DIM - 0.5f;
    float fy = y/(float)DIM - 0.5f;
    unsigned char   green = 128 + 127 *
                            sin( abs(fx*100) - abs(fy*100) );

    // accessing uchar4 vs unsigned char*
    ptr[offset].x = 0;
    ptr[offset].y = green;
    ptr[offset].z = 0;
    ptr[offset].w = 255;
}

__global__ void kernel_or( uchar4 *ptr ) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;

    // now calculate the value at that position
    float fx = x/(float)DIM - 0.5f;
    float fy = y/(float)DIM - 0.5f;
    unsigned char   orange = 128 + 127 *
                            sin( abs(fx*100) - abs(fy*100) );

    // accessing uchar4 vs unsigned char*
    ptr[offset].x = orange;
    ptr[offset].y = orange/2;
    ptr[offset].z = 0;
    ptr[offset].w = 255;
}

static void draw_func( void ) {
    // we pass zero as the last parameter, because out bufferObj is now
    // the source, and the field switches from being a pointer to a
    // bitmap to now mean an offset into a bitmap object
    glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
    glutSwapBuffers();
}

static void key_func( unsigned char key, int x, int y ) {
    switch (key) {
        case 32:
    // do work with the memory dst being on the GPU, gotten via mapping

            HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
            uchar4* devPtr;
            size_t  size;
            HANDLE_ERROR(
              cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
                                              &size,
                                              resource) );

            if (which_image == 1){
              kernel_or<<<mgrids,mthreads>>>( devPtr );
              HANDLE_ERROR(cudaPeekAtLastError());
              HANDLE_ERROR(cudaDeviceSynchronize());
              printf("orange\n");
              which_image = 2;
              }
            else {
              kernel_gr<<<mgrids,mthreads>>>( devPtr );
              HANDLE_ERROR(cudaPeekAtLastError());
              HANDLE_ERROR(cudaDeviceSynchronize());
              printf("green\n");
              which_image = 1;
              }

            HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
            draw_func();
            break;
        case 27:
            // clean up OpenGL and CUDA
            HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
            glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
            glDeleteBuffers( 1, &bufferObj );
            exit(0);
    }
}



int main( int argc, char **argv ) {
    cudaDeviceProp  prop;
    int dev;

    memset( &prop, 0, sizeof( cudaDeviceProp ) );
    prop.major = 1;
    prop.minor = 0;
    HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );

    // tell CUDA which dev we will be using for graphic interop
    // from the programming guide:  Interoperability with OpenGL
    //     requires that the CUDA device be specified by
    //     cudaGLSetGLDevice() before any other runtime calls.

    HANDLE_ERROR( cudaGLSetGLDevice( dev ) );

    // these GLUT calls need to be made before the other OpenGL
    // calls, else we get a seg fault
    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
    glutInitWindowSize( DIM, DIM );
    glutCreateWindow( "bitmap" );

    glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

    // the first three are standard OpenGL, the 4th is the CUDA reg
    // of the bitmap these calls exist starting in OpenGL 1.5
    glGenBuffers( 1, &bufferObj );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
    glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4,
                  NULL, GL_DYNAMIC_DRAW_ARB );

    HANDLE_ERROR(
        cudaGraphicsGLRegisterBuffer( &resource,
                                      bufferObj,
                                      cudaGraphicsMapFlagsNone ) );

    // do work with the memory dst being on the GPU, gotten via mapping
    HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
    uchar4* devPtr;
    size_t  size;
    HANDLE_ERROR(
        cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
                                              &size,
                                              resource) );

    dim3    grids(DIM/16,DIM/16);
    dim3    threads(16,16);
    kernel_gr<<<grids,threads>>>( devPtr );
    HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
    which_image = 1;
    // set up GLUT and kick off main loop
    glutKeyboardFunc( key_func );
    glutDisplayFunc( draw_func );
    glutMainLoop();
}