C++ 错误:遇到非法内存访问

C++ 错误:遇到非法内存访问,c++,opencv,cuda,C++,Opencv,Cuda,我提出了疑问,我得到了答案。因此,我修改了代码bt,无法访问d_point[i]。我怎样才能访问它 __global__ void densefun(int *d_counters,float2 *d_points,int d_x_max,int d_y_max,int width,int height, int min_distance,int size) { int i = blockDim.x * blockIdx.x + threadIdx.x; if(i <= s

我提出了疑问,我得到了答案。因此,我修改了代码bt,无法访问d_point[i]。我怎样才能访问它

  __global__ void densefun(int *d_counters,float2 *d_points,int d_x_max,int d_y_max,int width,int height, int min_distance,int size)
{
  int i = blockDim.x * blockIdx.x + threadIdx.x;

   if(i <= size)
   {
        float2 point = (d_points)[i];
        int x = floorf(point.x);
        int y = floorf(point.y);
        printf(" ( %d  %d )",x,y);
        if(x < d_x_max && y < d_y_max)
        {  
            x /= min_distance;
            y /= min_distance;
            (d_counters)[y*width+x]++;
            __syncthreads();
        }
    }
}


void DenseSample(const Mat& grey, std::vector<Point2f>& points, const double quality, const int min_distance)
{
    int width = grey.cols/min_distance;
    int height = grey.rows/min_distance;
    Mat eig;
    cornerMinEigenVal(grey, eig, 3, 3);
    double maxVal = 0;
    minMaxLoc(eig, 0, &maxVal);
    const double threshold = maxVal*quality;
    std::vector<int> counters(width*height);
    int x_max = min_distance*width;
    int y_max = min_distance*height; 
    printf("in descriptor size:%ld ",points.size());   
    int *d_counters;
    float2 *d_points;
    cudaMalloc(&d_counters,counters.size()*width*height*sizeof(int));
    printf("in cuda point size:%d       ",points.size());
    cudaMalloc(&d_points,points.size()*sizeof(float2));
    cout<<"points.size() : "<<points.size()<<endl;
    cudaMemcpy(d_points, &points, points.size()*sizeof(float2), cudaMemcpyHostToDevice);
    int blk=cvFloor(points.size()/1024)+1;
    cout<<"blk : "<<blk<<endl;
    if(points.size()>0)
    {
        densefun<<<blk,1024>>>(d_counters,d_points,x_max,y_max,width,height,min_distance, points.size());
        cudaError_t err = cudaGetLastError();
        if (err != cudaSuccess) 
        printf("Error: %s\n", cudaGetErrorString(err));
        cudaMemcpy(&counters, d_counters, counters.size()* width*height*sizeof(int), cudaMemcpyDeviceToHost);
    }
    cudaFree(d_counters);
    cudaFree(d_points);
    points.clear();
    int index = 0;
    int offset = min_distance/2;
    for(int i = 0; i < height; i++)
    for(int j = 0; j < width; j++, index++) 
    {
        if(counters[index] <= 0)
        {
            int x = j*min_distance+offset;
            int y = i*min_distance+offset;
            if(eig.at<float>(y, x) > threshold)
            points.push_back(Point2f(float(x), float(y)));
        }
    }
}
\uuuuu全局\uuuuuu无效densefun(int*d_计数器、float2*d_点、int d_x_max、int d_y_max、int宽度、int高度、int最小距离、int大小)
{
int i=blockDim.x*blockIdx.x+threadIdx.x;

如果(i您创建了一个螺纹网格,其块长度
1024
,网格长度等于

int blk=cvFloor(points.size()/1024)+1;

这基本上意味着线程数将是大于
points.size()
的1024的倍数。在这种情况下,使用:

int i = blockDim.x * blockIdx.x + threadIdx.x;
float2 point = (d_points)[i];
无法成功,因为您几乎可以确定您将获得越界内存访问。请添加一些条件以确保不会发生这种情况

__global__ void densefun(int *d_counters,float2 *d_points,int d_x_max,int d_y_max,int width, int height, int min_distance)
{
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if(i < width * height)
  {
      //rest of the code
  }

}

如果要分配
float2
数组(或复制到该数组),则需要使用
sizeof(float2)

您如何知道d_点的访问实际上是问题的原因?我假设您想检查
x
y
是否需要分别在
d_x_max
d_max
的约束范围内。不应该
if(x
be
if(x
then?d_点的访问是问题的原因,因为我试图打印x和y的值,但没有打印。我尝试了if(xd\u点
分配足够的内存。我编辑了我的答案。您能解释一下吗将你问题中的代码日期定为当前版本?你能让这段代码可读吗?拜托,伙计,人们都在试图帮助你,你可能会付出一些努力,让代码看起来不那么混乱。
float2 *d_points;

cudaMalloc(&d_points,points.size()*sizeof(float));