Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/352.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
为什么Python在SVD方面比LAPACK和C更快_Python_C_Performance_Numpy_Lapack - Fatal编程技术网

为什么Python在SVD方面比LAPACK和C更快

为什么Python在SVD方面比LAPACK和C更快,python,c,performance,numpy,lapack,Python,C,Performance,Numpy,Lapack,我最近需要从一些C代码中计算SVD。鉴于LAPACK的稳定性和广泛接受度,我决定使用它。代码的运行速度似乎比我想象的要慢得多。我相信Python和Numpy是通过LAPACK计算SVD的,所以我决定将速度与Python进行比较。令我惊讶的是,Python代码在计算SVD方面比C代码快得多。事实上,Python代码计算完整SVD的速度大约与C/LAPACK代码计算半个SVD的速度一样快 这是我第一次真正体验从C调用Fortran,所以我想我是做错了什么导致了速度的减慢,但我不知道这是什么 问题:为

我最近需要从一些C代码中计算SVD。鉴于LAPACK的稳定性和广泛接受度,我决定使用它。代码的运行速度似乎比我想象的要慢得多。我相信Python和Numpy是通过LAPACK计算SVD的,所以我决定将速度与Python进行比较。令我惊讶的是,Python代码在计算SVD方面比C代码快得多。事实上,Python代码计算完整SVD的速度大约与C/LAPACK代码计算半个SVD的速度一样快

这是我第一次真正体验从C调用Fortran,所以我想我是做错了什么导致了速度的减慢,但我不知道这是什么

问题:为什么Python代码比C代码快得多

最后,我需要在C中使用更快的SVD,因此下面的问题是:

问题:是否有一个稳定的性能C库用于计算部分奇异值分解(最好是稀疏矩阵)

下面是一些用于计时测试的最小代码。所使用的矩阵没有任何特殊意义,我只需要一些可以用C和Python轻松生成的东西

test\u svd.c

#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <time.h>

extern void dgesvdx(
  char* jobu,
  char* jobv,
  char* range,
  int* m,
  int* n,
  double* a,
  int* lda,
  double* vl,
  double* vu,
  int* il,
  int* iu,
  int* ns,
  double* s,
  double* u,
  int* ldu,
  double* vt,
  int* ldvt,
  double* work,
  int* lwork,
  int* iwork,
  int* info);

extern void dgesdd(
  char* jobz,
  int* m,
  int* n,
  double* a,
  int* lda,
  double* s,
  double* u,
  int* ldu,
  double* vt,
  int* ldvt,
  double* work,
  int* lwork,
  int* iwork,
  int* info);

typedef enum bool {false, true} bool;

bool svd(double* a, 
         int num_rows,
         int num_cols,
         int num_sing_vals,
         double* u,
         double* s,
         double* vt,
         bool use_dgesdd) {
  double* a_work;
  int* iwork;
  double* work;
  int lwork;
  int min_m_n;
  int info = 0;
  double vl = 0;
  double vu = 1;
  int il = 1;
  int iu = num_sing_vals;
  int ns = num_sing_vals;

  //////////////////////////////////////////////////////
  // Compute and allocate optimal workspace
  //////////////////////////////////////////////////////
  min_m_n = (num_cols < num_rows) ? num_cols : num_rows;
  lwork = -1;
  // copy a to prevent corruption
  a_work = (double*)malloc(num_cols*num_rows*sizeof(double));
  // compute optimal workspace (lwork)
  double worksize;
  if(use_dgesdd) {
    iwork = (int*)malloc(7*(min_m_n)*sizeof(int));
    dgesdd("S",&num_cols,&num_rows,a_work,&num_cols,s,vt,&num_cols,u,&num_sing_vals,&worksize,&lwork,iwork,&info);
  } else {
    iwork = (int*)malloc(24*(min_m_n)*sizeof(int));
    dgesvdx("V","V","I",&num_cols,&num_rows,a_work,&num_cols,&vl,&vu,&il,&iu,&ns,s,vt,&num_cols,u,&num_sing_vals,&worksize,&lwork,iwork,&info);
  }
  if(info) {
    perror("error computing lwork\n");
    return(false);
  }
  // allocate work
  lwork = (int)worksize;
  work = (double*)malloc(lwork*sizeof(double));

  //////////////////////////////////////////////////////
  // Compute the svd
  //////////////////////////////////////////////////////
  memcpy(a_work,a,num_cols*num_rows*sizeof(double));
  if(use_dgesdd) {
    dgesdd("S",&num_cols,&num_rows,a_work,&num_cols,s,vt,&num_cols,u,&num_sing_vals,work,&lwork,iwork,&info);
  } else {
    ns = num_sing_vals;
    dgesvdx("V","V","I",&num_cols,&num_rows,a_work,&num_cols,&vl,&vu,&il,&iu,&ns,s,vt,&num_cols,u,&num_sing_vals,work,&lwork,iwork,&info);
  }
  if(info<0) {
    perror("invalid argument in SVD\n");
    return(false);
  } else if(info>0) {
    printf("SVD did not converge");
    return(false);
  }
  free(iwork);
  iwork = NULL;
  free(work);
  work = NULL;
  free(a_work);
  a_work = NULL;
  return(true);
}

int main(int argc, char** argv) {
  int num_rows = 1000;
  int num_cols = 1000;
  int size = num_rows*num_cols;
  double* A = (double*)calloc(num_rows*num_cols,sizeof(double));
  for(int r = 0; r < num_rows; ++r) {
    for(int c = 0; c < num_cols; ++c) {
      A[r*num_cols+c] = sin(5*M_PI*(r*num_cols+c)/((double)size))+cos(10*M_PI*(c*num_rows+c)/((double)size));
      if(fabs(A[r*num_cols+c])<0.75 || fabs(A[r*num_cols+c]) > 0.9) {
        A[r*num_cols+c] = 0;
      }
    }
  }
  int num_sing_vals = (num_rows < num_cols) ? num_rows : num_cols;
  double* u_sdd = (double*)calloc(num_rows*num_sing_vals,sizeof(double));
  double* s_sdd = (double*)calloc(num_sing_vals,sizeof(double));
  double* vt_sdd = (double*)calloc(num_sing_vals*num_cols,sizeof(double));
  clock_t tic = clock();
  bool success = svd(A,num_rows,num_cols,num_sing_vals,u_sdd,s_sdd,vt_sdd,true);
  clock_t toc = clock();
  printf("full dgesdd %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

  double* u_svd = (double*)calloc(num_rows*num_sing_vals,sizeof(double));
  double* s_svd = (double*)calloc(num_sing_vals,sizeof(double));
  double* vt_svd = (double*)calloc(num_sing_vals*num_cols,sizeof(double));
  tic = clock();
  success = svd(A,num_rows,num_cols,num_sing_vals,u_svd,s_svd,vt_svd,false);
  toc = clock();
  printf("full dgesvdx %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

  int first_few_sv = 200;
  double* u_partial_svd = (double*)calloc(num_rows*first_few_sv,sizeof(double));
  double* s_partial_svd = (double*)calloc(num_sing_vals,sizeof(double));
  double* vt_partial_svd = (double*)calloc(first_few_sv*num_cols,sizeof(double));
  tic = clock();
  success = svd(A,num_rows,num_cols,first_few_sv,u_partial_svd,s_partial_svd,vt_partial_svd,false);
  toc= clock();
  printf("partial dgesvdx %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

  free(A);
  free(u_sdd);
  free(s_sdd);
  free(vt_sdd);
  free(u_svd);
  free(s_svd);
  free(vt_svd);
  free(u_partial_svd);
  free(s_partial_svd);
  free(vt_partial_svd);
  return 0;
}
这为我提供了以下定时输出:

full dgesdd 8.003578 seconds
full dgesvdx 17.077815 seconds
partial dgesvdx 1.498775 seconds
与以下python代码相比:

test\u svd.py

import numpy, time
dim = 1000
term1 = numpy.sin((5*numpy.pi*numpy.arange(dim**2).reshape(dim,dim))/(dim**2))
term2 = numpy.cos((10*numpy.pi*numpy.arange(dim**2).reshape(dim,dim).T)/(dim**2))
A = term1+term2
A = numpy.where(numpy.abs(A)<0.75,0,A)
A = numpy.where(numpy.abs(A)>0.9,0,A)
tic = time.time()
U,S,Vt = numpy.linalg.svd(A)
toc = time.time()
print "full svd", toc - tic, "seconds"
实际singluar值一致。然而,Python中的完整SVD在时间上与C中的部分SVD相当这一事实让我感到困惑


如果有关系的话,我的python是用GCC 6.2.1编译的python 2.7.12,我的GCC是6.2.1,这个函数很可能是在fortran中实现的,numpyWhat@BlackBear说的是对的-
numpy
svd
实现实际上是优化的。许多核心Numpy数学函数都是用C编写的,包含在一个C库中,该库旨在实现跨库集成(例如,SciPy使用它)。我在这里看到了LAPACK和Numpy之间的较小差异。1.42s(LAPACK)和1.17s(numpy)。您是否正在使用不适合您的体系结构的LAPACK构建?我确实觉得numpy获胜有点令人惊讶,但它确实有一个优点,就是它的输入形式的多样性要小得多。正如其他评论所观察到的,这不是Python打败C/Fortran,而是一个C/Fortran实现打败了另一个C/Fortran实现。欢迎使用Stackoverflow!有不同的算法可用于计算奇异值分解。根据DGESDD使用分治方法(DBDSDC)
DGESVDX
使用特征值问题(DBDSVDX),
DGESVD
使用QR分解,
DGESVJ
可能能够处理微小的奇异值。。。综上所述,numpy很可能回到LAPACK的
DGESDD
@francis在第二次调用Fortan代码之前有一个memcpy。第一个调用不应该引用a_工作,它只是确定数组大小。该函数很可能是在fortran中实现的,在@BlackBear说的没错的情况下,
numpy
svd
实现实际上处于优化状态。许多核心Numpy数学函数都是用C编写的,包含在一个C库中,该库旨在实现跨库集成(例如,SciPy使用它)。我在这里看到了LAPACK和Numpy之间的较小差异。1.42s(LAPACK)和1.17s(numpy)。您是否正在使用不适合您的体系结构的LAPACK构建?我确实觉得numpy获胜有点令人惊讶,但它确实有一个优点,就是它的输入形式的多样性要小得多。正如其他评论所观察到的,这不是Python打败C/Fortran,而是一个C/Fortran实现打败了另一个C/Fortran实现。欢迎使用Stackoverflow!有不同的算法可用于计算奇异值分解。根据DGESDD使用分治方法(DBDSDC)
DGESVDX
使用特征值问题(DBDSVDX),
DGESVD
使用QR分解,
DGESVJ
可能能够处理微小的奇异值。。。综上所述,numpy很可能回到LAPACK的
DGESDD
@francis在第二次调用Fortan代码之前有一个memcpy。第一个调用不应该引用a_工作,它只是确定数组大小。
import numpy, time
dim = 1000
term1 = numpy.sin((5*numpy.pi*numpy.arange(dim**2).reshape(dim,dim))/(dim**2))
term2 = numpy.cos((10*numpy.pi*numpy.arange(dim**2).reshape(dim,dim).T)/(dim**2))
A = term1+term2
A = numpy.where(numpy.abs(A)<0.75,0,A)
A = numpy.where(numpy.abs(A)>0.9,0,A)
tic = time.time()
U,S,Vt = numpy.linalg.svd(A)
toc = time.time()
print "full svd", toc - tic, "seconds"
python2 test_svd.py
full svd 2.4460 seconds