C++ 带有原始指针的max_元素索引_C++_Cuda_Thrust

C++ 带有原始指针的max_元素索引

c++ cuda

C++ 带有原始指针的max_元素索引,c++,cuda,thrust,C++,Cuda,Thrust,我一直在研究推力，我偶然发现了一个几乎（但不完全）回答了我的问题：答案中的示例运行良好，但如何对原始指针执行相同的操作？让我们假设我认为这段代码是正确的（为了简单起见，忽略内核配置）： float*d_A；常数无符号整数noElems=10； cudaMalloc（&d_A，noElems*sizeof（浮动））；初始设备向量（d_A）；推力：：装置\u ptr d\u ptr=推力：：装置\u指针\u投射（d\u A）；推力：：设备向量：：迭代器iter= 推力：最大元素（d_ptr

我一直在研究推力，我偶然发现了一个几乎（但不完全）回答了我的问题：

答案中的示例运行良好，但如何对原始指针执行相同的操作？让我们假设我认为这段代码是正确的（为了简单起见，忽略内核配置）：

float*d_A；
常数无符号整数noElems=10；
cudaMalloc（&d_A，noElems*sizeof（浮动））；
初始设备向量（d_A）；
推力：：装置\u ptr d\u ptr=推力：：装置\u指针\u投射（d\u A）；
推力：：设备向量：：迭代器iter=
推力：最大元素（d_ptr，d_ptr+noElems）；

我不太明白如何使用iter和原始指针提取位置

谢谢您的时间。

可能有很多方法可以做到这一点。但是，如果我们首先将

iter

的值转换为合适的设备指针，则可以直接从您的代码进行比较

下面的完整示例演示了这一点：

$ cat t436.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/extrema.h>
#include <stdio.h>


__global__ void initDeviceVector(float *data){
  int idx = threadIdx.x+blockDim.x*blockIdx.x;
  data[idx] = idx%7;
}

  int main(){

  float* d_A;
  const unsigned int noElems = 10;
  cudaMalloc(&d_A, noElems * sizeof(float));
  initDeviceVector<<<1, noElems>>>(d_A);

  thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);
  thrust::device_vector<float>::iterator iter = thrust::max_element(d_ptr, d_ptr + noElems);

  int pos = thrust::device_pointer_cast(&(iter[0])) - d_ptr;

  printf("pos = %d\n", pos);
  return 0;
}

$ nvcc -arch=sm_20 -o t436 t436.cu
$ ./t436
pos = 6
$

$cat t436.cu
#包括
#包括
#包括
#包括
__全局无效初始设备向量（浮点*数据）{
int idx=threadIdx.x+blockDim.x*blockIdx.x；
数据[idx]=idx%7；
}
int main（）{
浮动*d_A；
常数无符号整数noElems=10；
cudaMalloc（&d_A，noElems*sizeof（浮动））；
初始设备向量（d_A）；
推力：：装置\u ptr d\u ptr=推力：：装置\u指针\u投射（d\u A）；
推力：：设备\向量：：迭代器iter=推力：：最大\元素（d_ptr，d_ptr+noElems）；
int pos=推力：设备指针投射（&（iter[0]）-d_ptr；
printf（“pos=%d\n”，pos）；
返回0；
}
$nvcc-arch=sm_20-o t436 t436.cu
美元/t436
位置=6
$

提示：指针是一个随机访问迭代器。没错，我想我知道我应该做什么（只是从d_ptr中减去iter），但我不知道怎么做。显然，iter-d_ptr不能仅仅因为不同的数据类型而工作。例如，有没有办法从iter和d_ptr中得到一个整数值？我检查了文档，无法从中提取信息。如果我正确理解了您的问题，您需要两个操作：1）将原始指针转换为

设备\u ptr

，以便可以使用原始指针馈送

max\u元素

；2）将迭代器转换为指针，以便以后可以指向max元素值。这两个帖子对你有帮助吗：还有，谢谢你，就是这样。如果可以的话，还有一个更小的问题：在我的实验中，我将max_元素函数的结果直接存储到了推力：：设备_ptr，它似乎也能工作。推力隐式地执行指针转换吗？是的，它执行，这样做大大简化了代码。

$ cat t436.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/extrema.h>
#include <stdio.h>


__global__ void initDeviceVector(float *data){
  int idx = threadIdx.x+blockDim.x*blockIdx.x;
  data[idx] = idx%7;
}

  int main(){

  float* d_A;
  const unsigned int noElems = 10;
  cudaMalloc(&d_A, noElems * sizeof(float));
  initDeviceVector<<<1, noElems>>>(d_A);

  thrust::device_ptr<float> d_ptr = thrust::device_pointer_cast(d_A);
  thrust::device_vector<float>::iterator iter = thrust::max_element(d_ptr, d_ptr + noElems);

  int pos = thrust::device_pointer_cast(&(iter[0])) - d_ptr;

  printf("pos = %d\n", pos);
  return 0;
}

$ nvcc -arch=sm_20 -o t436 t436.cu
$ ./t436
pos = 6
$