用CUDA计算协方差

用CUDA计算协方差,cuda,covariance,pca,Cuda,Covariance,Pca,我正在使用CUDA实现基于主成分分析(PCA)的人脸识别。我使用orl人脸数据库,计算平均图像和归一化图像我在计算协方差矩阵时遇到了一个问题 __global__ void mean(int* i_data, int num, int size, int* o_data, int WIDTH, int HEIGHT, int* normalized) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y

我正在使用CUDA实现基于主成分分析(PCA)的人脸识别。我使用orl人脸数据库,计算平均图像和归一化图像我在计算协方差矩阵时遇到了一个问题

__global__ void mean(int* i_data, int num, int size, int* o_data, int WIDTH, int HEIGHT, int* normalized)
{
  int x = threadIdx.x + blockIdx.x * blockDim.x;
  int y = threadIdx.y + blockIdx.y * blockDim.y;
  int idx = x + y * WIDTH;
  int r = 0;
  int idx_z=0;
  for (int z = 0; z < num; ++z)
  {
   idx_z = z * WIDTH*HEIGHT + idx;
   r += i_data[ idx_z ];
  }
   o_data[ idx ] = int(r/num);
   for (int z = 0; z < num; ++z)
  {
   idx_z = z * WIDTH*HEIGHT + idx;
   normalized[idx_z]  = abs(i_data[idx_z] - o_data[idx]); 
  }
}

dim3 dimBlock = dim3(8,4,1);
dim3 dimGrid = dim3(ceil(rows/dimBlock.x) , ceil(cols/dimBlock.y));

mean<<<dimGrid,dimBlock>>>(dev_images, IMAGE_NUM,size,dev_output,rows,cols,dev_normalized);
\uuuuu全局\uuuuuu无效平均值(int*i\u数据、int-num、int-size、int*o\u数据、int-WIDTH、int-HEIGHT、int*normalized)
{
int x=threadIdx.x+blockIdx.x*blockDim.x;
int y=线程IDX.y+块IDX.y*块DIM.y;
int idx=x+y*宽度;
int r=0;
int idx_z=0;
对于(int z=0;z

数据库图像的大小为
(92112)

您的代码对我来说没有任何意义

CUDA中的协方差计算可以通过结合使用cuBLAS和推力来轻松执行。考虑到
N
实现
K
随机变量,协方差估计公式如下

其中,
qjk
j,k=1,…,k
是协方差估计值,
Xj
Xk
加上
overbar
是根据可用实现估计的随机变量均值

下面,我将报告一个完整的示例:

#include <cublas_v2.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>
#include <thrust/sequence.h>

#include <stdio.h>
#include <iostream>

#include "Utilities.cuh"
#include "TimingGPU.cuh"

/*************************************/
/* CONVERT LINEAR INDEX TO ROW INDEX */
/*************************************/
template <typename T>
struct linear_index_to_row_index : public thrust::unary_function<T,T> {

    T Ncols; // --- Number of columns

    __host__ __device__ linear_index_to_row_index(T Ncols) : Ncols(Ncols) {}

    __host__ __device__ T operator()(T i) { return i / Ncols; }
};

/********/
/* MAIN */
/********/
int main()
{
    const int Nsamples = 3;     // --- Number of realizations for each random variable (number of rows of the X matrix)
    const int NX    = 4;        // --- Number of random variables (number of columns of the X matrix)

    // --- Random uniform integer distribution between 10 and 99
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(10, 99);

    // --- Matrix allocation and initialization
    thrust::device_vector<float> d_X(Nsamples * NX);
    for (size_t i = 0; i < d_X.size(); i++) d_X[i] = (float)dist(rng);

    // --- cuBLAS handle creation
    cublasHandle_t handle;
    cublasSafeCall(cublasCreate(&handle));

    /*************************************************/
    /* CALCULATING THE MEANS OF THE RANDOM VARIABLES */
    /*************************************************/
    // --- Array containing the means multiplied by Nsamples
    thrust::device_vector<float> d_means(NX);

    thrust::device_vector<float> d_ones(Nsamples, 1.f);

    float alpha = 1.f / (float)Nsamples;
    float beta  = 0.f;
    cublasSafeCall(cublasSgemv(handle, CUBLAS_OP_T, Nsamples, NX, &alpha, thrust::raw_pointer_cast(d_X.data()), Nsamples, 
                               thrust::raw_pointer_cast(d_ones.data()), 1, &beta, thrust::raw_pointer_cast(d_means.data()), 1));

    /**********************************************/
    /* SUBTRACTING THE MEANS FROM THE MATRIX ROWS */
    /**********************************************/
    thrust::transform(
                d_X.begin(), d_X.end(),
                thrust::make_permutation_iterator(
                        d_means.begin(),
                        thrust::make_transform_iterator(thrust::make_counting_iterator(0), linear_index_to_row_index<int>(Nsamples))),
                d_X.begin(),
                thrust::minus<float>());    

    /*************************************/
    /* CALCULATING THE COVARIANCE MATRIX */
    /*************************************/
    thrust::device_vector<float> d_cov(NX * NX);

    alpha = 1.f;
    cublasSafeCall(cublasSgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, NX, NX, Nsamples, &alpha,
                               thrust::raw_pointer_cast(d_X.data()), Nsamples, thrust::raw_pointer_cast(d_X.data()), Nsamples, &beta,
                               thrust::raw_pointer_cast(d_cov.data()), NX));

    // --- Final normalization by Nsamples - 1
    thrust::transform(
                d_cov.begin(), d_cov.end(),
                thrust::make_constant_iterator((float)(Nsamples-1)),
                d_cov.begin(),
                thrust::divides<float>());  

    for(int i = 0; i < NX * NX; i++) std::cout << d_cov[i] << "\n";

    return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括“Utilities.cuh”
#包括“TimingGPU.cuh”
/*************************************/
/*将线性索引转换为行索引*/
/*************************************/
模板
结构线性索引到行索引:公共推力::一元函数{
T Ncols;//--列数
__主机设备线性索引到行索引(T Ncols):Ncols(Ncols){
__主机设备操作符()(ti){return i/Ncols;}
};
/********/
/*主要*/
/********/
int main()
{
const int Nsamples=3;//--每个随机变量的实现数(X矩阵的行数)
const int NX=4;//--随机变量数(X矩阵的列数)
//---10到99之间的随机均匀整数分布
推力:默认随机发动机转速;
推力:均匀分布区(10,99);
//---矩阵分配和初始化
推力:设备矢量d(Nsamples*NX);
对于(size_t i=0;i对于(int i=0;i
//! Calculates one or more than one coVarianceMatrix given data.
//  There can be many classes since many covariance matrixes.
/*!
    \param inMatrix This vector contains matrix data in major storage. 
    Forexample if inMatrix=[1 2 3 4 5 6] and trialSizes=[2] this means matrix we will work on a matrix like :
        |1 4 |
        |2 5 |
        |3 6 | -> 2 Trials, 3 Features. Columns contains feature rows contains trials (samples)
    \param trialSizes There can be many classes since many covariance matrixes. Samples from all classes will be given with inMatrix.
    But we need to know how many trials(samples) we have for each class. 
    For example if inMatrix=[1 2 3 4 5 6 7 8 9 10 11 12] and trialSizes=[2,2] 
    this means matrix we will work on a matrix like :
        |1 4 |  |7 10 |
        |2 5 |  |8 11 |
        |3 6 |  |9 12 |  --> Total number of trials(samples which is total rowCount) 2 + 2 = 4 , 
                             So colSize = inMatrix.size()/4 = 3(feature vector size)
                         --> There is two element in trialSize vec so each vector has to samples
*/
void multiQDACovianceCalculator(std::vector<float>& inMatrix, std::vector<int>& trialSizes)
{
    cublasHandle_t handle; // CUBLAS context
    int classCount = trialSizes.size();
    int rowSize = std::accumulate(trialSizes.begin(), trialSizes.end(), 0);
    int dimensionSize = inMatrix.size() / rowSize;
    float alpha = 1.0f;
    float beta = 0.0f; // bet =1

    thrust::device_vector<float> d_cov1(dimensionSize * dimensionSize);
    thrust::device_vector<float> d_cov2(dimensionSize * dimensionSize);
    thrust::device_vector<float> d_covResult(dimensionSize * dimensionSize);

    thrust::device_vector<float> d_wholeMatrix(inMatrix);
    thrust::device_vector<float> d_meansVec(dimensionSize); // rowVec of means of trials
    float *meanVecPtr = thrust::raw_pointer_cast(d_meansVec.data());
    float *device2DMatrixPtr = thrust::raw_pointer_cast(d_wholeMatrix.data());
    auto maxTrialNumber = *std::max_element(trialSizes.begin(), trialSizes.end());
    thrust::device_vector<float> deviceVector(maxTrialNumber, 1.0f);

    cublasCreate(&handle);
    // Inside of for loop  one covariance matrix calculated each time
    for (int i = 0; i < trialSizes.size(); i++)
    {
        // X*transpose(X) / N
        alpha = 1.0f / trialSizes[i];
        cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, dimensionSize, dimensionSize, trialSizes[i], &alpha,
            device2DMatrixPtr, dimensionSize, device2DMatrixPtr, dimensionSize, &beta,
            thrust::raw_pointer_cast(d_cov1.data()), dimensionSize);

        // Mean vector of each column
        alpha = 1.0f;
        cublasSgemv(handle, CUBLAS_OP_N, dimensionSize, trialSizes[i], &alpha, device2DMatrixPtr,
            dimensionSize, thrust::raw_pointer_cast(deviceVector.data()), 1, &beta, meanVecPtr, 1);

        // MeanVec * transpose(MeanVec) / N*N
        alpha = 1.0f / (trialSizes[i] * trialSizes[i]);
        cublasSgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, dimensionSize, dimensionSize, 1, &alpha,
            meanVecPtr, 1, meanVecPtr, 1, &beta,
            thrust::raw_pointer_cast(d_cov2.data()), dimensionSize);

        alpha = 1.0f;
        beta = -1.0f;
        //  (X*transpose(X) / N) -  (MeanVec * transpose(MeanVec) / N*N)
        cublasSgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, dimensionSize, dimensionSize, &alpha,
            thrust::raw_pointer_cast(d_cov1.data()), dimensionSize, &beta, thrust::raw_pointer_cast(d_cov2.data()), 
            dimensionSize, thrust::raw_pointer_cast(d_covResult.data()), dimensionSize);

        // Go to other class and calculate its covarianceMatrix
        device2DMatrixPtr += trialSizes[i] * dimensionSize;
    }
    printVector(d_covResult);
    cublasDestroy(handle);
}
/!计算给定数据的一个或多个协方差矩阵。
//因为协方差矩阵很多,所以可以有很多类。
/*!
\param inMatrix此向量包含主存储器中的矩阵数据。
例如,如果inMatrix=[1 2 3 4 5 6]和trialSizes=[2],这意味着我们将处理一个矩阵,如:
|1 4 |
|2 5 |
|3 6 |->2个试验,3个特征。列包含特征行包含试验(样本)
\由于协方差矩阵很多,所以可以有很多类。所有类的样本都将用inMatrix给出。
但我们需要知道每门课有多少个实验(样本)。
例如,如果inMatrix=[1 2 3 4 5 6 7 8 9 10 11 12]和trialSizes=[2,2]
这意味着我们将在矩阵上工作,如:
|1 4 |  |7 10 |
|2 5 |  |8 11 |
|3 6 | | 9 12 |-->总计n