Visual studio 2010 CXX0030:错误:无法计算表达式,visual studio&&;库达
每当我尝试在我发布在下面的调试器中运行代码时,它总是在设备指针上显示一条错误消息,例如在变量check\d、check\re\h…中 CXX0030:错误:无法计算表达式 我是CUDA和visual studio的新手,因此非常感谢您的帮助Visual studio 2010 CXX0030:错误:无法计算表达式,visual studio&&;库达,visual-studio-2010,visual-studio-2012,cuda,Visual Studio 2010,Visual Studio 2012,Cuda,每当我尝试在我发布在下面的调试器中运行代码时,它总是在设备指针上显示一条错误消息,例如在变量check\d、check\re\h…中 CXX0030:错误:无法计算表达式 我是CUDA和visual studio的新手,因此非常感谢您的帮助 #include<stdio.h> //#include<stddef.h> //#include"sourceannotations.h" #include<cuda.h> //#include<cutil.h&g
#include<stdio.h>
//#include<stddef.h>
//#include"sourceannotations.h"
#include<cuda.h>
//#include<cutil.h>
#include<cuda_runtime_api.h>
#include<cuda_runtime.h>
#include<iostream>
#include<device_launch_parameters.h>
#include <cmath>
#include<cstdlib>
#include<time.h>
#include<string>
#include<vector>
#define PI 3.141592654
using namespace std;
// #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
struct vertex
{
float x,y,z,h;
// struct triangle* _triangle ;
// struct octree* tree ;
vertex():x(0),y(0),z(0),h(1){};//,tree(NULL)/*, _triangle(NULL)*/{}
vertex(float x, float y, float z, float h=0):x(x),y(y),z(z),h(h){};//,/*_triangle(NULL),*/tree(NULL){}
};
struct triangle
{
vertex v1,v2,v3;
triangle(){
//v1._triangle = v2._triangle = v3._triangle = this;
}
};
//if the function is decleared as global then it is run by multiple threads parallelly
__global__ void VecAdd(/*int *A, int *B, int *C,*/ int *check)
{
//int count =0;
int idx = blockIdx.x+ threadIdx.x;
// int count=0;
//int tx = threadIdx.x;
//this is for checking the value of idx
check[idx] = idx;
//C[idx] = A[idx] + B[idx];
}
__global__ void check(float mat[][4],vertex *a,float *re,int *index)
{
// float re[4];
float sum =0;
int idx = blockIdx.x+ threadIdx.x;
// index[idx] = idx;
//int count=0;
//int tx = threadIdx.x;
/*for (int i=0; i<4; i++)
{*/
sum += mat[idx][0]* a->x;
sum += mat[idx][1]* a->y;
sum += mat[idx][2]* a->z;
sum += mat[idx][3];
/*sum += *((float*)mat+idx+4*0)* a->x;
sum += *((float*)mat+idx+4*1)* a->y;
sum += *((float*)mat+idx+4*2)* a->z;
sum += *((float*)mattr+idx+4*3);*/
/*}*/
re[idx] = sum;
}
int main()
{
//float res[4][4];
triangle t1;
t1.v1.x = 2;
t1.v1.y = 1.33512;
t1.v1.z = 5.849567;
t1.v2.x = 2;
t1.v2.y = -1.33512;
t1.v2.z = 5.849567;
t1.v3.x = 2;
t1.v3.y = 0;
t1.v3.z = 5;
vertex *check_d;
vertex *check_h;
float *check_re_d;
float *check_re_h;
float translation_check_d[4][4];
float translation_check_h[4][4] = {{1, 0, 0, -t1.v1.x},
{0, 1, 0, -t1.v1.y},
{0, 0, 1, -t1.v1.z},
{0 ,0 ,0, 1}};
check_h = new vertex(1,-4,3);
check_re_h = new float[4];
cudaMalloc((void**)&check_d,sizeof(vertex));
cudaMalloc((void**)&check_re_d,4*sizeof(float));
cudaMemcpy(check_d,check_h,sizeof(vertex),cudaMemcpyHostToDevice);
cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);
size_t dPitch;
cudaMallocPitch((void**)translation_check_d,&dPitch,4*sizeof(float),4);
cudaMemcpy2D(translation_check_d,dPitch,translation_check_h,dPitch,4*sizeof(float),4,cudaMemcpyHostToDevice);
int *index_h = NULL;
int *index_d = NULL;
index_h = new int[4];
cudaMalloc((void**)&index_d,4*sizeof(int));
cudaMemcpy(index_d,index_h,sizeof(int),cudaMemcpyHostToDevice);
check<<<1,4>>>(translation_check_d,check_d,check_re_d,index_d);
//VecAdd<<<10,1>>>(index_d);
cudaMemcpy(check_re_h,check_re_d,4*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(index_h,index_d,4*sizeof(int),cudaMemcpyDeviceToHost);
std::cout<<"These are the value"<<"INDEX: "<<index_h[0]<<" x: "<<check_re_h[0]<<"\n";
std::cout<<"These are the value"<<"INDEX: "<<index_h[1]<<" x: "<<check_re_h[1]<<"\n";
std::cout<<"These are the value"<<"INDEX: "<<index_h[2]<<" x: "<<check_re_h[2]<<"\n";
cudaFree(check_d);
cudaFree(check_re_d);
cudaFree(index_d);
cudaFree(check_h);
cudaFree(check_re_h);
cudaFree(index_h);
int a;
cin>>a;
return 0;
}
#包括
//#包括
//#包括“sourceannotations.h”
#包括
//#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义PI 3.141592654
使用名称空间std;
//#定义checkCudaErrors(err)uu checkCudaErrors(err、uuuu文件、uuuuu行)
结构顶点
{
浮动x,y,z,h;
//结构三角形*\u三角形;
//结构八叉树*tree;
顶点():x(0),y(0),z(0),h(1){};//,树(NULL)/*,_三角形(NULL)*/{
顶点(浮动x,浮动y,浮动z,浮动h=0):x(x),y(y),z(z),h(h){};/,/*_三角形(NULL),*/树(NULL){}
};
结构三角形
{
顶点v1,v2,v3;
三角形(){
//v1._三角形=v2._三角形=v3._三角形=此;
}
};
//如果函数被声明为全局函数,那么它将由多个线程并行运行
__全局无效向量添加(/*int*A,int*B,int*C,*/int*check)
{
//整数计数=0;
intidx=blockIdx.x+threadIdx.x;
//整数计数=0;
//int tx=线程idx.x;
//这用于检查idx的值
检查[idx]=idx;
//C[idx]=A[idx]+B[idx];
}
__全局无效检查(float mat[][4],顶点*a,float*re,int*索引)
{
//浮动re[4];
浮点数和=0;
intidx=blockIdx.x+threadIdx.x;
//索引[idx]=idx;
//整数计数=0;
//int tx=线程idx.x;
/*对于(int i=0;ix;
sum+=mat[idx][1]*a->y;
sum+=mat[idx][2]*a->z;
sum+=mat[idx][3];
/*总和+=*((浮点*)mat+idx+4*0)*a->x;
总和+=*((浮点*)mat+idx+4*1)*a->y;
总和+=*((浮点*)mat+idx+4*2)*a->z;
总和+=*((浮点数*)mattr+idx+4*3);*/
/*}*/
re[idx]=总和;
}
int main()
{
//浮点数[4][4];
三角形t1;
t1.v1.x=2;
t1.v1.y=1.33512;
t1.v1.z=5.849567;
t1.v2.x=2;
t1.v2.y=-1.33512;
t1.v2.z=5.849567;
t1.v3.x=2;
t1.v3.y=0;
t1.v3.z=5;
顶点*检查d;
顶点*检查h;
浮动*检查;
浮动*检查;
浮动平移检查[4][4];
浮点转换检查[4][4]={{1,0,0,-t1.v1.x},
{0,1,0,-t1.v1.y},
{0,0,1,-t1.v1.z},
{0 ,0 ,0, 1}};
检查_h=新顶点(1,-4,3);
检查_re_h=新浮动[4];
cudamaloc((void**)和check_d,sizeof(vertex));
cudamaloc((无效**)和check_re__d,4*sizeof(浮动));
cudaMemcpy(check_d,check_h,sizeof(顶点),cudamemcpyhostodevice);
cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);
大小不一;
cudaMallocPitch((void**)translation\u check\u d和dPitch,4*sizeof(float),4);
cudaMemcpy2D(翻译检查,dPitch,翻译检查,dPitch,4*sizeof(float),4,cudaMemcpyHostToDevice);
int*index_h=NULL;
int*index_d=NULL;
索引_h=新整数[4];
cudamaloc((void**)和index_d,4*sizeof(int));
cudaMemcpy(索引d、索引h、大小f(int)、cudamemcpyhostodevice);
检查(翻译检查、检查、检查、索引);
//VecAdd(索引d);
cudaMemcpy(check_re_h,check_re_d,4*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(索引h,索引d,4*sizeof(int),cudaMemcpyDeviceToHost);
std::cout您的代码出现了一些问题,首先是2D mem拷贝的设备内存分配(也包括倾斜分配)。下面是一个“固定”代码。请注意,我已在注释中澄清了修改
#include<stdio.h>
#include<cuda.h>
#include<cuda_runtime_api.h>
#include<cuda_runtime.h>
#include<iostream>
#include<device_launch_parameters.h>
#include <cmath>
#include<cstdlib>
#include<time.h>
#include<string>
#include<vector>
#include<conio.h>
#define PI 3.141592654
using namespace std;
/*****************/
/* VERTEX STRUCT */
/*****************/
struct vertex
{
float x,y,z,h;
vertex():x(0),y(0),z(0),h(1){};//,tree(NULL)/*, _triangle(NULL)*/{}
vertex(float x, float y, float z, float h=0):x(x),y(y),z(z),h(h {};//,/*_triangle(NULL),*/tree(NULL){}
};
/*******************/
/* TRIANGLE STRUCT */
/*******************/
struct triangle
{
vertex v1,v2,v3;
triangle(){ }
};
// The kernel function interface should contain also the pitch value. I have removed the int* index parameter (not needed now).
// I have also updated the mat parameter.
//__global__ void check(float mat[][4],vertex *a,float *re,int *index)
__global__ void check(float** mat,vertex *a,float *re,size_t pitch)
{
float sum = 0;
int idx = threadIdx.x;
float* row = (float*)((char*)mat + idx*pitch);
printf("row %i column 0 value %f \n",idx,row[0]);
printf("row %i column 1 value %f \n",idx,row[1]);
printf("row %i column 2 value %f \n",idx,row[2]);
printf("row %i column 3 value %f \n",idx,row[3]);
sum += mat[idx][0]* a->x;
sum += mat[idx][1]* a->y;
sum += mat[idx][2]* a->z;
sum += mat[idx][3];
re[idx] = sum;
}
/********/
/* MAIN */
/********/
int main()
{
triangle t1;
t1.v1.x = 2;
t1.v1.y = 1.33512;
t1.v1.z = 5.849567;
t1.v2.x = 2;
t1.v2.y = -1.33512;
t1.v2.z = 5.849567;
t1.v3.x = 2;
t1.v3.y = 0;
t1.v3.z = 5;
vertex* check_h = new vertex(1,-4,3);
vertex* check_d; cudaMalloc((void**)&check_d,sizeof(vertex));
float* check_re_h = new float[4];
float* check_re_d; cudaMalloc((void**)&check_re_d,4*sizeof(float));
cudaMemcpy(check_d,check_h,sizeof(vertex),cudaMemcpyHostToDevice);
cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);
float translation_check_h[4][4] = {{1, 0, 0, -t1.v1.x},{0, 1, 0, -t1.v1.y},{0, 0, 1, -t1.v1.z},{0 ,0 ,0, 1}};
//This is a host-side static array definition.
//float translation_check_d[4][4];
float** translation_check_d;
// This is a wrong usage of cudaMallocPitch. The correct syntax is cudaMallocPitch(void** devPtr, size_t* pitch, size_t widthInBytes, size_t height).
// size_t dPitch; cudaMallocPitch((void**)translation_check_d,&dPitch,4*sizeof(float),4);
size_t dPitch; cudaMallocPitch(&translation_check_d,&dPitch,4*sizeof(float),4);
// I have fixed also the cudaMemcpy2D call, see below.
//cudaMemcpy2D(translation_check_d,dPitch,translation_check_h,dPitch,4*sizeof(float),4,cudaMemcpyHostToDevice);
cudaMemcpy2D(translation_check_d, dPitch, translation_check_h, 4*sizeof(float), 4*sizeof(float), 4, cudaMemcpyHostToDevice);
// Useless
//int *index_h = new int[4];
//int *index_d = NULL; cudaMalloc((void**)&index_d,4*sizeof(int));
//cudaMemcpy(index_d,index_h,sizeof(int),cudaMemcpyHostToDevice);
//check<<<1,4>>>(translation_check_d,check_d,check_re_d,index_d);
check<<<1,4>>>(translation_check_d,check_d,check_re_d,dPitch);
// I haven't checked the rest, being it straightforward.
getch();
return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义PI 3.141592654
使用名称空间std;
/*****************/
/*顶点结构*/
/*****************/
结构顶点
{
浮动x,y,z,h;
顶点():x(0),y(0),z(0),h(1){};//,树(NULL)/*,_三角形(NULL)*/{
顶点(浮动x,浮动y,浮动z,浮动h=0):x(x),y(y),z(z),h(h{};/,/*_三角形(NULL),*/树(NULL){}
};
/*******************/
/*三角形结构*/
/*******************/
结构三角形
{
顶点v1,v2,v3;
三角形(){}
};
//内核函数接口也应该包含基音值。我已经删除了int*index参数(现在不需要)。
//我还更新了mat参数。
//__全局无效检查(float mat[][4],顶点*a,float*re,int*索引)
__全局无效检查(浮动**垫、顶点*a、浮动*re、大小\u t间距)
{
浮点数和=0;
int idx=threadIdx.x;
浮点*行=(浮点*)((字符*)mat+idx*节距);
printf(“行%i列0值%f\n”,idx,行[0]);
printf(“第%i行第1列值%f\n”,idx,第[1]行];
printf(“第%i行第2列值%f\n”,idx,第[2]行];
printf(“第%i行第3列值%f\n”,idx,第[3]行];
sum+=mat[idx][0]*a->x;
sum+=mat[idx][1]*a->y;
sum+=mat[idx][2]*a->z;
sum+=mat[idx][3];
re[idx]=总和;
}
/********/
/*主要*/
/********/
int main()
{
三角形t1;
t1.v1.x=2;
t1.v1.y=1.33512;
t1.v1.z=5.849567;
t1.v2.x=2;
t1.v2.y=-1.33512;
t1.v2.z=5.849567;
t1.v3.x=2;
t1.v3.y=0;
t1.v3.z=5;
顶点*check_h=新顶点(1,-4,3);
顶点*check_d;Cudamaloc((void**)和check_d,sizeof(顶点));
浮点数*检查浮点数=新浮点数[4];
浮动*检查;cudaMalloc((无效**)和检查,4*sizeof(浮动));
cudaMemcpy(check_d,check_h,sizeof(顶点),cudamemcpyhostodevice);
cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);
浮点转换检查[4][4]={{1,0,0,-t1.v1.x},{0,1,0,-t1.v1.y},{0,0,1,-t1.v1.z},{0,0,0,1};
//这是主机端静态数组定义。
//浮动平移检查[4][4];
浮动**平移检查;
//这是cudaMallocPitch的错误用法。正确的语法是cudaMallocPitch(void**devPtr,size\u t*