C++ 使用推力';s减少带有像素uchar4数据错误的运算符
我在从排序转换为减少排序时遇到了困难 我一直在 不存在从“uchar4”到“OutputType”的适当转换函数 当我尝试编译并运行此修改后的示例时:C++ 使用推力';s减少带有像素uchar4数据错误的运算符,c++,opengl,cuda,thrust,C++,Opengl,Cuda,Thrust,我在从排序转换为减少排序时遇到了困难 我一直在 不存在从“uchar4”到“OutputType”的适当转换函数 当我尝试编译并运行此修改后的示例时: thrust::reduce(tptr, tptr+(DIM*DIM), int(0), reduce_functor()); 是我修改函子的问题的关键。。。我试图避免添加字符,但返回像素的求和int值,以便稍后获得图像的平均颜色 #include <stdio.h> #include <stdlib.h> #inc
thrust::reduce(tptr, tptr+(DIM*DIM), int(0), reduce_functor());
是我修改函子的问题的关键。。。我试图避免添加字符,但返回像素的求和int值,以便稍后获得图像的平均颜色
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
struct reduce_functor
{
__host__ __device__
int operator()(uchar4 left, uchar4 right) const
{
return (left.x + right.x) + (left.y + right.y) + (left.z + right.z);
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
thrust::reduce(tptr, tptr+(DIM*DIM), int(0), reduce_functor());
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
您应该将减少的结果指定给一个变量,否则它将丢失 您还应该确保为您正在使用的功能包括正确的推力头 查看
推力::reduce
我们看到reduce输出类型从为输入迭代器指定的类型中获取其类型(即,输入类型必须转换为输出类型)。因为你的函子混合了uchar4
和int
类型,所以推力不知道如何转换
可能有很多方法可以做你想做的事情。很明显,您只是想将每个像素的每个像素分量(R、G、B)相加
由于我们希望所有类型在归约(输入和输出)过程中匹配,一种方法是在进行求和归约(然后生成所需的int
结果)之前,使用输入类型并将其转换为int
数量
此代码显示了修改,并为我清晰地编译:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
struct transform_functor
{
__host__ __device__
int operator()(uchar4 data) const
{
return (int)data.x + (int)data.y + (int)data.z;
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
int pix_sum = thrust::transform_reduce(tptr, tptr+(DIM*DIM), transform_functor(), int(0), thrust::plus<int>());
printf("sum = %d\n", pix_sum);
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
struct transform_functor
{
__host__ __device__
int operator()(uchar4 data) const
{
return (int)data.x + (int)data.y + (int)data.z;
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
int pix_sum = thrust::transform_reduce(tptr, tptr+(DIM*DIM), transform_functor(), int(0), thrust::plus<int>());
printf("sum = %d\n", pix_sum);
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#include <thrust/tuple.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
typedef thrust::tuple<int, int, int> tpl3;
struct transform_functor
{
__host__ __device__
tpl3 operator()(uchar4 data) const
{
tpl3 result;
result.get<0>() = (int)data.x;
result.get<1>() = (int)data.y;
result.get<2>() = (int)data.z;
return result;
}
};
struct reduce_functor
{
__host__ __device__
tpl3 operator()(tpl3 left, tpl3 right) const
{
tpl3 result;
result.get<0>() = left.get<0>() + right.get<0>();
result.get<1>() = left.get<1>() + right.get<1>();
result.get<2>() = left.get<2>() + right.get<2>();
return result;
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
tpl3 my_init;
my_init.get<0>() = 0;
my_init.get<1>() = 0;
my_init.get<2>() = 0;
tpl3 pix_sum = thrust::transform_reduce(tptr, tptr+(DIM*DIM), transform_functor(), my_init, reduce_functor());
printf("avg red = %f\n", (float)(pix_sum.get<0>())/(float)(DIM*DIM));
printf("avg grn = %f\n", (float)(pix_sum.get<1>())/(float)(DIM*DIM));
printf("avg blu = %f\n", (float)(pix_sum.get<2>())/(float)(DIM*DIM));
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}