C/CUDA-需要帮助编写一个程序来将图像存储在缓冲区中
我是一个新的CUDA编程,我需要在编写一个程序,以存储在内存缓冲区的图像帮助。我尝试修改CUDA OpenGL互操作示例中的代码(在CUDA By example一书中给出),将两个图像一个接一个地存储在缓冲区中。如果我试图避免无限循环,但不确定是否成功,我应该如何编写程序?如果您能帮助您编写正确的程序,我们将不胜感激C/CUDA-需要帮助编写一个程序来将图像存储在缓冲区中,c,cuda,C,Cuda,我是一个新的CUDA编程,我需要在编写一个程序,以存储在内存缓冲区的图像帮助。我尝试修改CUDA OpenGL互操作示例中的代码(在CUDA By example一书中给出),将两个图像一个接一个地存储在缓冲区中。如果我试图避免无限循环,但不确定是否成功,我应该如何编写程序?如果您能帮助您编写正确的程序,我们将不胜感激 #include "book.h" #include "cpu_bitmap.h" #include "cuda.h" #include <cuda_gl_interop.
#include "book.h"
#include "cpu_bitmap.h"
#include "cuda.h"
#include <cuda_gl_interop.h>
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) system ("pause");
}
}
#define DIM 512
#define IMAGESIZE_MAX (DIM*DIM)
GLuint bufferObj;
cudaGraphicsResource *resource;
// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png
__global__ void kernel( uchar4 *ptr1)
{
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x ;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr1[offset].x = 0;
ptr1[offset].y = green;
ptr1[offset].z = 0;
ptr1[offset].w = 255;
}
__global__ void kernel2( uchar4 *ptr2)
{
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x ;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );
unsigned char orange = 1000;
// accessing uchar4 vs unsigned char*
ptr2[offset].x = orange;
ptr2[offset].y = green;
ptr2[offset].z = 0;
ptr2[offset].w = 255;
}
__global__ void copy ( uchar4 *pBuffer, uchar4 *Ptr )
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int idx = x + y * blockDim.x * gridDim.x ;
while ( idx != DIM*DIM)
{
pBuffer[idx] = Ptr[idx] ;
__syncthreads();
}
}
__global__ void copy2 ( uchar4 *pBuffer, uchar4 *Ptr2 )
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int idx = x + y * blockDim.x * gridDim.x ;
int bdx = idx;
while ( (idx < DIM*DIM) && (bdx < DIM*DIM) )
{
uchar4 temp = Ptr2[bdx];
__syncthreads();
pBuffer[idx+4] = temp;
__syncthreads();
if ((idx==DIM*DIM) && (bdx==DIM*DIM))
{
break;
}
}
}
void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
// clean up OpenGL and CUDA
( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
}
void draw_func( void ) {
// we pass zero as the last parameter, because out bufferObj is now
// the source, and the field switches from being a pointer to a
// bitmap to now mean an offset into a bitmap object
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
int main( int argc, char **argv ) {
cudaDeviceProp prop;
int dev;
(memset( &prop, 0, sizeof( cudaDeviceProp ) ));
prop.major = 1;
prop.minor = 0;
HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );
// tell CUDA which dev we will be using for graphic interop
// from the programming guide: Interoperability with OpenGL
// requires that the CUDA device be specified by
// cudaGLSetGLDevice() before any other runtime calls.
HANDLE_ERROR( cudaGLSetGLDevice( dev ) );
// these GLUT calls need to be made before the other OpenGL
// calls, else we get a seg fault
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "bitmap" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// the first three are standard OpenGL, the 4th is the CUDA reg
// of the bitmap these calls exist starting in OpenGL 1.5
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4 ,
NULL, GL_DYNAMIC_DRAW_ARB );
// REGISTER THE GL BufferObj and CUDA Resource
HANDLE_ERROR(( cudaGraphicsGLRegisterBuffer( &resource,
bufferObj,
cudaGraphicsMapFlagsNone ) ));
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size = DIM*DIM;
size_t sizet = 2*DIM*DIM;
gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr, size));
uchar4 *devPtr2;
gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr2, size));
uchar4 *pBuffer;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer, size));
uchar4 *pBufferCurrent;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer, size));
uchar4 *pBufferImage;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBufferImage, sizet));
// REGISTER THE C BUFFER and CUDA Resource
HANDLE_ERROR( cudaGraphicsResourceGetMappedPointer( (void**)&pBufferImage,
&size,
resource) );
dim3 grids(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grids,threads>>>( devPtr );
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
kernel2<<<grids,threads>>>(devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
int a = 1;
do
{
if (a==1)
{
copy<<< 512, 512>>>(pBufferImage, devPtr);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
if(a==2)
{
copy2<<< 512, 512>>>(pBufferImage, devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
a++;
} while (a<=2);
HANDLE_ERROR ( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
#包括“book.h”
#包括“cpu_bitmap.h”
#包括“cuda.h”
#包括
PFNGLBINDBUFFERARBPROC glBindBuffer=NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers=NULL;
PFNGLGENBUFFERSARBPROC GLGENBULTS=NULL;
PFNGLBUFFERDATAARBPROC glBufferData=NULL;
#定义gpuerchk(ans){gpuAssert((ans),_文件_,_行__)}
内联void gpuAssert(cudaError\u t代码,char*文件,int行,bool abort=true)
{
如果(代码!=cudaSuccess)
{
fprintf(标准,“GPUassert:%s%s%d\n”,cudaGetErrorString(代码)、文件、行);
如果(中止)系统(“暂停”);
}
}
#定义尺寸512
#定义图像大小_最大值(尺寸*尺寸)
GLuint bufferObj;
cudaGraphicsResource*资源;
//基于ripple代码,但使用uchar4,这是一种数据类型
//图形交互使用。请参见屏幕截图-basic2.png
__全局无效内核(uchar4*ptr1)
{
//从threadIdx/BlockIdx映射到像素位置
int x=threadIdx.x+blockIdx.x*blockDim.x;
int y=线程IDX.y+块IDX.y*块DIM.y;
int offset=x+y*blockDim.x*gridDim.x;
//现在计算该位置的值
浮动汇率=x/(浮动)尺寸-0.5f;
浮动fy=y/(浮动)尺寸-0.5f;
无符号字符绿色=128+127*tan(绝对值(fx*100)-绝对值(fy*100));
//访问uchar4与无符号字符*
ptr1[offset].x=0;
ptr1[offset].y=绿色;
ptr1[offset].z=0;
ptr1[offset].w=255;
}
__全局无效内核2(uchar4*ptr2)
{
//从threadIdx/BlockIdx映射到像素位置
int x=threadIdx.x+blockIdx.x*blockDim.x;
int y=线程IDX.y+块IDX.y*块DIM.y;
int offset=x+y*blockDim.x*gridDim.x;
//现在计算该位置的值
浮动汇率=x/(浮动)尺寸-0.5f;
浮动fy=y/(浮动)尺寸-0.5f;
无符号字符绿色=128+127*tan(绝对值(fx*100)-绝对值(fy*100));
无符号字符橙色=1000;
//访问uchar4与无符号字符*
ptr2[offset].x=橙色;
ptr2[offset].y=绿色;
ptr2[offset].z=0;
ptr2[offset].w=255;
}
__全局无效副本(uchar4*pBuffer,uchar4*Ptr)
{
int x=threadIdx.x+blockIdx.x*blockDim.x;
int y=线程IDX.y+块IDX.y*块DIM.y;
int idx=x+y*blockDim.x*gridDim.x;
while(idx!=DIM*DIM)
{
pBuffer[idx]=Ptr[idx];
__同步线程();
}
}
__全局无效副本2(uchar4*pBuffer,uchar4*Ptr2)
{
int x=threadIdx.x+blockIdx.x*blockDim.x;
int y=线程IDX.y+块IDX.y*块DIM.y;
int idx=x+y*blockDim.x*gridDim.x;
int bdx=idx;
而((idx#include "book.h"
#include "cpu_bitmap.h"
//#include "cuda.h"
#include <cuda_gl_interop.h>
int which_image;
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
dim3 mgrids(DIM/16,DIM/16);
dim3 mthreads(16,16);
// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png
__global__ void kernel_gr( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 *
sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
__global__ void kernel_or( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char orange = 128 + 127 *
sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = orange;
ptr[offset].y = orange/2;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
// we pass zero as the last parameter, because out bufferObj is now
// the source, and the field switches from being a pointer to a
// bitmap to now mean an offset into a bitmap object
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 32:
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size;
HANDLE_ERROR(
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
&size,
resource) );
if (which_image == 1){
kernel_or<<<mgrids,mthreads>>>( devPtr );
HANDLE_ERROR(cudaPeekAtLastError());
HANDLE_ERROR(cudaDeviceSynchronize());
printf("orange\n");
which_image = 2;
}
else {
kernel_gr<<<mgrids,mthreads>>>( devPtr );
HANDLE_ERROR(cudaPeekAtLastError());
HANDLE_ERROR(cudaDeviceSynchronize());
printf("green\n");
which_image = 1;
}
HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
draw_func();
break;
case 27:
// clean up OpenGL and CUDA
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
}
int main( int argc, char **argv ) {
cudaDeviceProp prop;
int dev;
memset( &prop, 0, sizeof( cudaDeviceProp ) );
prop.major = 1;
prop.minor = 0;
HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );
// tell CUDA which dev we will be using for graphic interop
// from the programming guide: Interoperability with OpenGL
// requires that the CUDA device be specified by
// cudaGLSetGLDevice() before any other runtime calls.
HANDLE_ERROR( cudaGLSetGLDevice( dev ) );
// these GLUT calls need to be made before the other OpenGL
// calls, else we get a seg fault
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "bitmap" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// the first three are standard OpenGL, the 4th is the CUDA reg
// of the bitmap these calls exist starting in OpenGL 1.5
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4,
NULL, GL_DYNAMIC_DRAW_ARB );
HANDLE_ERROR(
cudaGraphicsGLRegisterBuffer( &resource,
bufferObj,
cudaGraphicsMapFlagsNone ) );
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size;
HANDLE_ERROR(
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
&size,
resource) );
dim3 grids(DIM/16,DIM/16);
dim3 threads(16,16);
kernel_gr<<<grids,threads>>>( devPtr );
HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
which_image = 1;
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}