Cuda推力-最大vec3_Cuda_Vector Graphics_Thrust_Glm Math

Cuda推力-最大vec3

cuda

Cuda推力-最大vec3,cuda,vector-graphics,thrust,glm-math,Cuda,Vector Graphics,Thrust,Glm Math,当我想对浮点数组执行缩减时，我通常会执行以下操作： float res = *thrust::max_element(thrust::device, thrust::device_ptr<float>(dDensities), thrust::device_ptr<float>(dDensities+numParticles) ); 有可能吗？是的，有可能。如果您还不熟悉，可能需要阅读如

当我想对浮点数组执行缩减时，我通常会执行以下操作：

    float res = *thrust::max_element(thrust::device, 
            thrust::device_ptr<float>(dDensities),
            thrust::device_ptr<float>(dDensities+numParticles)
            );

有可能吗？

是的，有可能。如果您还不熟悉，可能需要阅读

如果您查看，您会注意到

推力：：max_元素

有几种不同的类型（大多数推力算法也是如此）。其中一个接受二进制比较函子。我们可以定义一个比较函子，它将执行您想要的操作

下面是一个简单的工作示例：

$ cat t134.cu
#include <thrust/extrema.h>
#include <thrust/device_ptr.h>
#include <glm/glm.hpp>
#include <iostream>

struct comp
{
template <typename T>
__host__ __device__
bool operator()(T &t1, T &t2){
  return ((t1.x*t1.x+t1.y*t1.y+t1.z*t1.z) < (t2.x*t2.x+t2.y*t2.y+t2.z*t2.z));
  }
};

int main(){

  int numParticles = 3;
  glm::vec3 d[numParticles];
  d[0].x = 0; d[0].y = 0; d[0].z = 0;
  d[1].x = 2; d[1].y = 2; d[1].z = 2;
  d[2].x = 1; d[2].y = 1; d[2].z = 1;

  glm::vec3 *dDensities;
  cudaMalloc(&dDensities, numParticles*sizeof(glm::vec3));
  cudaMemcpy(dDensities, d, numParticles*sizeof(glm::vec3), cudaMemcpyHostToDevice);
  glm::vec3 res = *thrust::max_element(thrust::device,
            thrust::device_ptr<glm::vec3>(dDensities),
            thrust::device_ptr<glm::vec3>(dDensities+numParticles),
            comp()
            );
  std::cout << "max element x: " << res.x << " y: " << res.y << " z: " << res.z << std::endl;
}
$ nvcc -arch=sm_61 -o t134 t134.cu
$ ./t134
max element x: 2 y: 2 z: 2
$

$cat t134.cu
#包括
#包括
#包括
#包括
结构组件
{
模板
__主机设备__
布尔运算符（）（T&t1、T&t2）{
返回（（t1.x*t1.x+t1.y*t1.y+t1.z*t1.z）<（t2.x*t2.x+t2.y*t2.y+t2.z*t2.z））；
}
};
int main（）{
int numParticles=3；
glm:：Vec3D[numParticles]；
d[0].x=0；d[0].y=0；d[0].z=0；
d[1].x=2；d[1].y=2；d[1].z=2；
d[2].x=1；d[2].y=1；d[2].z=1；
glm:：vec3*dDensities；
cudaMalloc（&dDensities，numParticles*sizeof（glm:：vec3））；
cudaMemcpy（密度，d，numParticles*sizeof（glm:：vec3），cudaMemcpyHostToDevice）；
glm:：vec3 res=*推力：：最大元素（推力：：设备，
推力：装置（密度），
推力：装置（密度+粒子），
comp（）
);
标准：：cout
len = sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);

$ cat t134.cu
#include <thrust/extrema.h>
#include <thrust/device_ptr.h>
#include <glm/glm.hpp>
#include <iostream>

struct comp
{
template <typename T>
__host__ __device__
bool operator()(T &t1, T &t2){
  return ((t1.x*t1.x+t1.y*t1.y+t1.z*t1.z) < (t2.x*t2.x+t2.y*t2.y+t2.z*t2.z));
  }
};

int main(){

  int numParticles = 3;
  glm::vec3 d[numParticles];
  d[0].x = 0; d[0].y = 0; d[0].z = 0;
  d[1].x = 2; d[1].y = 2; d[1].z = 2;
  d[2].x = 1; d[2].y = 1; d[2].z = 1;

  glm::vec3 *dDensities;
  cudaMalloc(&dDensities, numParticles*sizeof(glm::vec3));
  cudaMemcpy(dDensities, d, numParticles*sizeof(glm::vec3), cudaMemcpyHostToDevice);
  glm::vec3 res = *thrust::max_element(thrust::device,
            thrust::device_ptr<glm::vec3>(dDensities),
            thrust::device_ptr<glm::vec3>(dDensities+numParticles),
            comp()
            );
  std::cout << "max element x: " << res.x << " y: " << res.y << " z: " << res.z << std::endl;
}
$ nvcc -arch=sm_61 -o t134 t134.cu
$ ./t134
max element x: 2 y: 2 z: 2
$