求cudamaloc'的最小元素位置；按推力排列的ed阵列：：最小元素_Cuda_Thrust

求cudamaloc'的最小元素位置；按推力排列的ed阵列：：最小元素

cuda

求cudamaloc'的最小元素位置；按推力排列的ed阵列：：最小元素,cuda,thrust,Cuda,Thrust,我试图在某个CUDA数组中找到最小的元素 float *p; ... thrust::device_ptr<float> pWrapper(p); thrust::device_ptr<float> pos = thrust::min_element(pWrapper, pWrapper + MAXX * MAXY, thrust::minimum<float>()); 与此相反，当提供给min\u元素调用的类

我试图在某个CUDA数组中找到最小的元素

float *p;
...    
thrust::device_ptr<float> pWrapper(p);    
thrust::device_ptr<float> pos = 
               thrust::min_element(pWrapper, pWrapper + MAXX * MAXY, thrust::minimum<float>());

与此相反，当提供给

min\u元素

调用的类型是

device\u ptr

时，

min\u元素

的返回类型是

float*p

（根据

device\u向量

的定义模板）。从我刚才提供的代码片段中，我无法判断最小值的位置以及如何从数组中提取它

我试图从

min\u元素的返回类型中减去p
和pwraper
的地址，但两者都不起作用。
我刚刚发现我只需要在min\u元素输出结果上使用*操作符。
在你的帖子中，当您有一个cudamaloc
”数组，并且希望通过推力：：最小元素
找到其最小元素的位置和值时，您正在考虑一种非常常见的情况。下面，我提供了一个完整的例子，希望它能对其他用户有用
基本上，下面的解决方案与将推力：：设备_ptr
封装在Cudamaloc
的线性内存上的想法相同。但是，该位置可通过推力：：距离找到
以下是完整的代码：
#include <thrust/device_vector.h>
#include <thrust/extrema.h>

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

/********/
/* MAIN */
/********/
int main() {

    const int N = 16;

    srand(time(NULL));

    // --- Host side memory allocation and initialization
    float *h_A = (float*)malloc(N * sizeof(float));
    for (int i=0; i<N; i++) h_A[i] = rand();

    // --- Device side memory allocation and initialization
    float *d_A; gpuErrchk(cudaMalloc((void**)&d_A, N * sizeof(float)));
    gpuErrchk(cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice));

    thrust::device_ptr<float> dp = thrust::device_pointer_cast(d_A);
    thrust::device_ptr<float> pos = thrust::min_element(dp, dp + N);

    unsigned int pos_index = thrust::distance(dp, pos);
    float min_val;
    gpuErrchk(cudaMemcpy(&min_val, &d_A[pos_index], sizeof(float), cudaMemcpyDeviceToHost));

    for (int i=0; i<N; i++) printf("d_A[%i] = %f\n", i, h_A[i]);
    printf("\n");
    printf("Position of the minimum element = %i; Value of the minimum element = %f\n", thrust::distance(dp, pos), min_val);

    cudaDeviceReset();

    return 0;
}

#包括
#包括
/********************/
/*CUDA错误检查*/
/********************/
#定义gpuerchk（ans）{gpuAssert（（ans），_文件_，_行__）}
内联void gpuAssert（cudaError\u t代码，char*文件，int行，bool abort=true）
{
如果（代码！=cudaSuccess）
{
fprintf（标准，“GPUassert:%s%s%d\n”，cudaGetErrorString（代码）、文件、行）；
如果（中止）退出（代码）；
}
}
/********/
/*主要*/
/********/
int main（）{
常数int N=16；
srand（时间（空））；
//---主机端内存分配和初始化
float*h_A=（float*）malloc（N*sizeof（float））；
对于（int i=0；我在这里不使用asch:：minimum
。您需要asch:：less
（或者不使用，asch:：less是默认值）。
#include <thrust/device_vector.h>
#include <thrust/extrema.h>

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

/********/
/* MAIN */
/********/
int main() {

    const int N = 16;

    srand(time(NULL));

    // --- Host side memory allocation and initialization
    float *h_A = (float*)malloc(N * sizeof(float));
    for (int i=0; i<N; i++) h_A[i] = rand();

    // --- Device side memory allocation and initialization
    float *d_A; gpuErrchk(cudaMalloc((void**)&d_A, N * sizeof(float)));
    gpuErrchk(cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice));

    thrust::device_ptr<float> dp = thrust::device_pointer_cast(d_A);
    thrust::device_ptr<float> pos = thrust::min_element(dp, dp + N);

    unsigned int pos_index = thrust::distance(dp, pos);
    float min_val;
    gpuErrchk(cudaMemcpy(&min_val, &d_A[pos_index], sizeof(float), cudaMemcpyDeviceToHost));

    for (int i=0; i<N; i++) printf("d_A[%i] = %f\n", i, h_A[i]);
    printf("\n");
    printf("Position of the minimum element = %i; Value of the minimum element = %f\n", thrust::distance(dp, pos), min_val);

    cudaDeviceReset();

    return 0;
}