Visual studio 2010 CXX0030:错误：无法计算表达式，visual studio&&；库达_Visual Studio 2010_Visual Studio 2012_Cuda

Visual studio 2010 CXX0030:错误：无法计算表达式，visual studio&&；库达

visual-studio-2010 visual-studio-2012 cuda

Visual studio 2010 CXX0030:错误：无法计算表达式，visual studio&&；库达,visual-studio-2010,visual-studio-2012,cuda,Visual Studio 2010,Visual Studio 2012,Cuda,每当我尝试在我发布在下面的调试器中运行代码时，它总是在设备指针上显示一条错误消息，例如在变量check\d、check\re\h…中 CXX0030:错误：无法计算表达式我是CUDA和visual studio的新手，因此非常感谢您的帮助 #include<stdio.h> //#include<stddef.h> //#include"sourceannotations.h" #include<cuda.h> //#include<cutil.h&g

每当我尝试在我发布在下面的调试器中运行代码时，它总是在设备指针上显示一条错误消息，例如在变量check\d、check\re\h…中

CXX0030:错误：无法计算表达式

我是CUDA和visual studio的新手，因此非常感谢您的帮助

#include<stdio.h>
//#include<stddef.h>
//#include"sourceannotations.h"
#include<cuda.h>
//#include<cutil.h>
#include<cuda_runtime_api.h>
#include<cuda_runtime.h>
#include<iostream>
#include<device_launch_parameters.h>
#include <cmath>
#include<cstdlib>
#include<time.h>
#include<string>
#include<vector>
#define PI 3.141592654
using namespace std;
// #define checkCudaErrors(err)  __checkCudaErrors (err, __FILE__, __LINE__)
struct vertex
{
    float x,y,z,h;
    // struct triangle* _triangle ;
    // struct octree* tree ;
    vertex():x(0),y(0),z(0),h(1){};//,tree(NULL)/*, _triangle(NULL)*/{}
    vertex(float x, float y, float z, float h=0):x(x),y(y),z(z),h(h){};//,/*_triangle(NULL),*/tree(NULL){}


};
struct triangle
{
    vertex v1,v2,v3;
    triangle(){
        //v1._triangle = v2._triangle = v3._triangle = this;
    }
};
//if the function is decleared as global then it is run by multiple threads parallelly

__global__ void VecAdd(/*int *A, int *B, int *C,*/ int *check)
{
    //int count =0;
    int idx = blockIdx.x+ threadIdx.x;
    //  int count=0;
    //int tx = threadIdx.x;
    //this is for checking the value of idx 
    check[idx] = idx;
    //C[idx] = A[idx] + B[idx];


}
__global__ void check(float mat[][4],vertex *a,float *re,int *index)
{
    // float re[4];
    float sum =0;  
    int idx = blockIdx.x+ threadIdx.x;
    //   index[idx] = idx;
    //int count=0;
    //int tx = threadIdx.x;

    /*for (int i=0; i<4; i++)
      {*/

    sum +=  mat[idx][0]* a->x;
    sum +=  mat[idx][1]* a->y;
    sum +=  mat[idx][2]* a->z;
    sum +=  mat[idx][3];
    /*sum +=  *((float*)mat+idx+4*0)* a->x;
      sum +=  *((float*)mat+idx+4*1)* a->y;
      sum +=  *((float*)mat+idx+4*2)* a->z;
      sum +=  *((float*)mattr+idx+4*3);*/    


    /*}*/
    re[idx] = sum;

}
int main()
{
    //float res[4][4];

    triangle t1;

    t1.v1.x = 2;
    t1.v1.y = 1.33512;
    t1.v1.z = 5.849567;

    t1.v2.x = 2;
    t1.v2.y = -1.33512;
    t1.v2.z = 5.849567;

    t1.v3.x = 2;
    t1.v3.y = 0;
    t1.v3.z = 5;

    vertex *check_d;
    vertex *check_h;
    float *check_re_d;
    float *check_re_h;

    float translation_check_d[4][4];
    float translation_check_h[4][4] = {{1, 0, 0, -t1.v1.x},
        {0, 1, 0, -t1.v1.y},
        {0, 0, 1, -t1.v1.z},
        {0 ,0 ,0, 1}};

    check_h = new vertex(1,-4,3);
    check_re_h = new float[4];
    cudaMalloc((void**)&check_d,sizeof(vertex));
    cudaMalloc((void**)&check_re_d,4*sizeof(float));
    cudaMemcpy(check_d,check_h,sizeof(vertex),cudaMemcpyHostToDevice);
    cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);

    size_t dPitch;

    cudaMallocPitch((void**)translation_check_d,&dPitch,4*sizeof(float),4);

    cudaMemcpy2D(translation_check_d,dPitch,translation_check_h,dPitch,4*sizeof(float),4,cudaMemcpyHostToDevice);
    int *index_h = NULL;
    int *index_d = NULL;
    index_h = new int[4];
    cudaMalloc((void**)&index_d,4*sizeof(int));

    cudaMemcpy(index_d,index_h,sizeof(int),cudaMemcpyHostToDevice);

    check<<<1,4>>>(translation_check_d,check_d,check_re_d,index_d);
    //VecAdd<<<10,1>>>(index_d);
    cudaMemcpy(check_re_h,check_re_d,4*sizeof(float),cudaMemcpyDeviceToHost);
    cudaMemcpy(index_h,index_d,4*sizeof(int),cudaMemcpyDeviceToHost);
    std::cout<<"These are the value"<<"INDEX: "<<index_h[0]<<" x: "<<check_re_h[0]<<"\n";
    std::cout<<"These are the value"<<"INDEX: "<<index_h[1]<<" x: "<<check_re_h[1]<<"\n";
    std::cout<<"These are the value"<<"INDEX: "<<index_h[2]<<" x: "<<check_re_h[2]<<"\n";
    cudaFree(check_d);
    cudaFree(check_re_d);
    cudaFree(index_d);
    cudaFree(check_h);
    cudaFree(check_re_h);
    cudaFree(index_h);
    int a;
    cin>>a;
    return 0;
}

#包括
//#包括
//#包括“sourceannotations.h”
#包括
//#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义PI 3.141592654
使用名称空间std；
//#定义checkCudaErrors（err）uu checkCudaErrors（err、uuuu文件、uuuuu行）
结构顶点
{
浮动x，y，z，h；
//结构三角形*\u三角形；
//结构八叉树*tree；
顶点（）：x（0），y（0），z（0），h（1）{}；//，树（NULL）/*，_三角形（NULL）*/{
顶点（浮动x，浮动y，浮动z，浮动h=0）：x（x），y（y），z（z），h（h）{}；/，/*_三角形（NULL），*/树（NULL）{}
};
结构三角形
{
顶点v1，v2，v3；
三角形（）{
//v1._三角形=v2._三角形=v3._三角形=此；
}
};
//如果函数被声明为全局函数，那么它将由多个线程并行运行
__全局无效向量添加（/*int*A，int*B，int*C，*/int*check）
{
//整数计数=0；
intidx=blockIdx.x+threadIdx.x；
//整数计数=0；
//int tx=线程idx.x；
//这用于检查idx的值
检查[idx]=idx；
//C[idx]=A[idx]+B[idx]；
}
__全局无效检查（float mat[][4]，顶点*a，float*re，int*索引）
{
//浮动re[4]；
浮点数和=0；
intidx=blockIdx.x+threadIdx.x；
//索引[idx]=idx；
//整数计数=0；
//int tx=线程idx.x；
/*对于（int i=0；ix；
sum+=mat[idx][1]*a->y；
sum+=mat[idx][2]*a->z；
sum+=mat[idx][3]；
/*总和+=*（（浮点*）mat+idx+4*0）*a->x；
总和+=*（（浮点*）mat+idx+4*1）*a->y；
总和+=*（（浮点*）mat+idx+4*2）*a->z；
总和+=*（（浮点数*）mattr+idx+4*3）；*/
/*}*/
re[idx]=总和；
}
int main（）
{
//浮点数[4][4]；
三角形t1；
t1.v1.x=2；
t1.v1.y=1.33512；
t1.v1.z=5.849567；
t1.v2.x=2；
t1.v2.y=-1.33512；
t1.v2.z=5.849567；
t1.v3.x=2；
t1.v3.y=0；
t1.v3.z=5；
顶点*检查d；
顶点*检查h；
浮动*检查；
浮动*检查；
浮动平移检查[4][4]；
浮点转换检查[4][4]={{1,0,0，-t1.v1.x}，
{0,1,0，-t1.v1.y}，
{0,0,1，-t1.v1.z}，
{0 ,0 ,0, 1}};
检查_h=新顶点（1，-4,3）；
检查_re_h=新浮动[4]；
cudamaloc（（void**）和check_d，sizeof（vertex））；
cudamaloc（（无效**）和check_re__d，4*sizeof（浮动））；
cudaMemcpy（check_d，check_h，sizeof（顶点），cudamemcpyhostodevice）；
cudaMemcpy（check_re_d，check_re_h，4*sizeof（float），cudaMemcpyHostToDevice）；
大小不一；
cudaMallocPitch（（void**）translation\u check\u d和dPitch，4*sizeof（float），4）；
cudaMemcpy2D（翻译检查，dPitch，翻译检查，dPitch，4*sizeof（float），4，cudaMemcpyHostToDevice）；
int*index_h=NULL；
int*index_d=NULL；
索引_h=新整数[4]；
cudamaloc（（void**）和index_d，4*sizeof（int））；
cudaMemcpy（索引d、索引h、大小f（int）、cudamemcpyhostodevice）；
检查（翻译检查、检查、检查、索引）；
//VecAdd（索引d）；
cudaMemcpy（check_re_h，check_re_d，4*sizeof（float），cudaMemcpyDeviceToHost）；
cudaMemcpy（索引h，索引d，4*sizeof（int），cudaMemcpyDeviceToHost）；
std:：cout您的代码出现了一些问题，首先是2D mem拷贝的设备内存分配（也包括倾斜分配）。下面是一个“固定”代码。请注意，我已在注释中澄清了修改
#include<stdio.h>  
#include<cuda.h>
#include<cuda_runtime_api.h>
#include<cuda_runtime.h>
#include<iostream>
#include<device_launch_parameters.h>
#include <cmath>
#include<cstdlib>
#include<time.h>
#include<string>
#include<vector>
#include<conio.h>

#define PI 3.141592654
using namespace std;

/*****************/
/* VERTEX STRUCT */
/*****************/
struct vertex
{
    float x,y,z,h;
    vertex():x(0),y(0),z(0),h(1){};//,tree(NULL)/*, _triangle(NULL)*/{}
    vertex(float x, float y, float z, float h=0):x(x),y(y),z(z),h(h {};//,/*_triangle(NULL),*/tree(NULL){}
};

/*******************/
/* TRIANGLE STRUCT */
/*******************/
struct triangle
{
    vertex v1,v2,v3;
    triangle(){ }
};

// The kernel function interface should contain also the pitch value. I have removed the int* index parameter (not needed now).
// I have also updated the mat parameter.
//__global__ void check(float mat[][4],vertex *a,float *re,int *index)
__global__ void check(float** mat,vertex *a,float *re,size_t pitch)
{
    float sum = 0;  
    int idx = threadIdx.x;

    float* row = (float*)((char*)mat + idx*pitch);

    printf("row %i column 0 value %f \n",idx,row[0]);
    printf("row %i column 1 value %f \n",idx,row[1]);
    printf("row %i column 2 value %f \n",idx,row[2]);
    printf("row %i column 3 value %f \n",idx,row[3]);

    sum +=  mat[idx][0]* a->x;
    sum +=  mat[idx][1]* a->y;
    sum +=  mat[idx][2]* a->z;
    sum +=  mat[idx][3];

    re[idx] = sum;
 }

 /********/
 /* MAIN */
 /********/

 int main()
 {
     triangle t1;

     t1.v1.x = 2;
     t1.v1.y = 1.33512;
     t1.v1.z = 5.849567;

     t1.v2.x = 2;
     t1.v2.y = -1.33512;
     t1.v2.z = 5.849567;

     t1.v3.x = 2;
     t1.v3.y = 0;
     t1.v3.z = 5;

     vertex* check_h = new vertex(1,-4,3);
     vertex* check_d;   cudaMalloc((void**)&check_d,sizeof(vertex));
     float* check_re_h = new float[4];
     float* check_re_d; cudaMalloc((void**)&check_re_d,4*sizeof(float));

     cudaMemcpy(check_d,check_h,sizeof(vertex),cudaMemcpyHostToDevice);
     cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);

     float translation_check_h[4][4] = {{1, 0, 0, -t1.v1.x},{0, 1, 0, -t1.v1.y},{0, 0, 1, -t1.v1.z},{0 ,0 ,0, 1}};
     //This is a host-side static array definition. 
     //float translation_check_d[4][4];
     float** translation_check_d;

     // This is a wrong usage of cudaMallocPitch. The correct syntax is cudaMallocPitch(void** devPtr, size_t* pitch, size_t widthInBytes, size_t height).   
     // size_t dPitch; cudaMallocPitch((void**)translation_check_d,&dPitch,4*sizeof(float),4);
     size_t dPitch; cudaMallocPitch(&translation_check_d,&dPitch,4*sizeof(float),4);
     // I have fixed also the cudaMemcpy2D call, see below.

      //cudaMemcpy2D(translation_check_d,dPitch,translation_check_h,dPitch,4*sizeof(float),4,cudaMemcpyHostToDevice);
     cudaMemcpy2D(translation_check_d, dPitch, translation_check_h, 4*sizeof(float), 4*sizeof(float), 4, cudaMemcpyHostToDevice);

     // Useless
     //int *index_h = new int[4];
     //int *index_d = NULL; cudaMalloc((void**)&index_d,4*sizeof(int));
     //cudaMemcpy(index_d,index_h,sizeof(int),cudaMemcpyHostToDevice);

     //check<<<1,4>>>(translation_check_d,check_d,check_re_d,index_d);
     check<<<1,4>>>(translation_check_d,check_d,check_re_d,dPitch);

     // I haven't checked the rest, being it straightforward.
     getch();
     return 0;
}

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义PI 3.141592654
使用名称空间std；
/*****************/
/*顶点结构*/
/*****************/
结构顶点
{
浮动x，y，z，h；
顶点（）：x（0），y（0），z（0），h（1）{}；//，树（NULL）/*，_三角形（NULL）*/{
顶点（浮动x，浮动y，浮动z，浮动h=0）：x（x），y（y），z（z），h（h{}；/，/*_三角形（NULL），*/树（NULL）{}
};
/*******************/
/*三角形结构*/
/*******************/
结构三角形
{
顶点v1，v2，v3；
三角形（）{}
};
//内核函数接口也应该包含基音值。我已经删除了int*index参数（现在不需要）。
//我还更新了mat参数。
//__全局无效检查（float mat[][4]，顶点*a，float*re，int*索引）
__全局无效检查（浮动**垫、顶点*a、浮动*re、大小\u t间距）
{
浮点数和=0；
int idx=threadIdx.x；
浮点*行=（浮点*）（（字符*）mat+idx*节距）；
printf（“行%i列0值%f\n”，idx，行[0]）；
printf（“第%i行第1列值%f\n”，idx，第[1]行]；
printf（“第%i行第2列值%f\n”，idx，第[2]行]；
printf（“第%i行第3列值%f\n”，idx，第[3]行]；
sum+=mat[idx][0]*a->x；
sum+=mat[idx][1]*a->y；
sum+=mat[idx][2]*a->z；
sum+=mat[idx][3]；
re[idx]=总和；
}
/********/
/*主要*/
/********/
int main（）
{
三角形t1；
t1.v1.x=2；
t1.v1.y=1.33512；
t1.v1.z=5.849567；
t1.v2.x=2；
t1.v2.y=-1.33512；
t1.v2.z=5.849567；
t1.v3.x=2；
t1.v3.y=0；
t1.v3.z=5；
顶点*check_h=新顶点（1，-4,3）；
顶点*check_d；Cudamaloc（（void**）和check_d，sizeof（顶点））；
浮点数*检查浮点数=新浮点数[4]；
浮动*检查；cudaMalloc（（无效**）和检查，4*sizeof（浮动））；
cudaMemcpy（check_d，check_h，sizeof（顶点），cudamemcpyhostodevice）；
cudaMemcpy（check_re_d，check_re_h，4*sizeof（float），cudaMemcpyHostToDevice）；
浮点转换检查[4][4]={{1,0,0，-t1.v1.x}，{0,1,0，-t1.v1.y}，{0,0,1，-t1.v1.z}，{0,0,0,1}；
//这是主机端静态数组定义。
//浮动平移检查[4][4]；
浮动**平移检查；
//这是cudaMallocPitch的错误用法。正确的语法是cudaMallocPitch（void**devPtr，size\u t*