C 如何优化

C 如何优化,c,C,我要做的是接受以下代码: char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation"; void naive_smooth(int dim, pixel *src, pixel *dst) { int i, j; for (i = 0; i < dim; i++) for (j = 0; j < dim; j++) dst[RIDX(i, j, d

我要做的是接受以下代码:

char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";

void naive_smooth(int dim, pixel *src, pixel *dst) 

{

    int i, j;

    for (i = 0; i < dim; i++)
    for (j = 0; j < dim; j++)
        dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
这是我要替换函数调用的代码
avg(dim,I,j,src)带有:

static pixel avg (int dim, int i, int j, pixel *src) 

{

    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;

    initialize_pixel_sum(&sum);
    for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) 
    for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) 
         accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

    assign_sum_to_pixel(&current_pixel, sum);
    return current_pixel;


}
静态像素平均值(int-dim、int-i、int-j、pixel*src)
{
int ii,jj;
像素和;
像素电流×像素;
初始化像素和(&sum);

对于(ii=max(i-1,0);ii如果您的代码库很小,包含10-12个函数,您可能希望尝试在每个函数前面添加关键字
inline

第二个选项,使用内联所有函数调用的编译器选项,不要手动执行(这就是编译器存在的原因)。您使用的编译器是什么?您可以联机查找其内联所有函数调用的选项(如果有)

第三,如果您使用GCC编译代码,可以为函数指定
always\u inline
属性

static pixel avg (int dim, int i, int j, pixel *src) __attribute__((always_inline));

使用内联和宏:

<> LI>如果使用C99编译器或C++编译器,可以使用<代码>内联关键字。但是,它不能保证调用将被实际代码替换,只有编译器认为它更有效。
  • 否则,如果您使用的是纯C89,那么
    avg()
    必须是一个宏。然后您可以保证将函数“call”替换为实际代码

  • 我必须说,我同意确保使用编译器优化和内联的方法……但是如果您仍然想回答您的特定问题,我认为您得到的是:

    for (j = 0; j < dim; j++)
    {
    
        /* ...avg() code body except for the return... */ 
    
        dst[RIDX(i, j, dim)] = current_pixel;
    }
    
    (j=0;j { /*…avg()代码体,返回除外…*/ dst[RIDX(i,j,dim)]=当前像素; }
    我展开了循环的开始和结束,以消除代码中的min()和max():

    void smooth_B(int dim, struct pixel src[dim][dim], struct pixel dst[dim][dim]){
      dst[0][0].red  =(src[0][0].red  +src[1][0].red  +src[0][1].red  +src[1][1].red  )/4;
      dst[0][0].green=(src[0][0].green+src[1][0].green+src[0][1].green+src[1][1].green)/4;
      dst[0][0].blue =(src[0][0].blue +src[1][0].blue +src[0][1].blue +src[1][1].blue )/4;
      for( int j=1; j<dim-1; j++){
        dst[0][j].red  =(src[0][j-1].red  +src[1][j-1].red  +src[0][j].red  +src[1][j].red  +src[0][j+1].red  +src[1][j+1].red  )/6;
        dst[0][j].green=(src[0][j-1].green+src[1][j-1].green+src[0][j].green+src[1][j].green+src[0][j+1].green+src[1][j+1].green)/6;
        dst[0][j].blue =(src[0][j-1].blue +src[1][j-1].blue +src[0][j].blue +src[1][j].blue +src[0][j+1].blue +src[1][j+1].blue )/6;
      }
      dst[0][dim-1].red  =(src[0][dim-2].red  +src[1][dim-2].red  +src[0][dim-1].red  +src[1][dim-1].red  )/4;
      dst[0][dim-1].green=(src[0][dim-2].green+src[1][dim-2].green+src[0][dim-1].green+src[1][dim-1].green)/4;
      dst[0][dim-1].blue =(src[0][dim-2].blue +src[1][dim-2].blue +src[0][dim-1].blue +src[1][dim-1].blue )/4;
    
      for( int i=1; i<dim-1; i++){
        dst[i][0].red  =(src[i-1][0].red  +src[i-1][1].red  +src[i][0].red  +src[i][1].red  +src[i+1][0].red  +src[i+1][1].red  )/6;
        dst[i][0].green=(src[i-1][0].green+src[i-1][1].green+src[i][0].green+src[i][1].green+src[i+1][0].green+src[i+1][1].green)/6;
        dst[i][0].blue =(src[i-1][0].blue +src[i-1][1].blue +src[i][0].blue +src[i][1].blue +src[i+1][0].blue +src[i+1][1].blue )/6;
        for( int j=1; j<dim; j++){
          dst[i][j].red  =(src[i-1][j-1].red  +src[i][j-1].red  +src[i+1][j-1].red  +src[i-1][j].red  +src[i][j].red  +src[i+1][j].red  +src[i-1][j+1].red  +src[i][j+1].red  +src[i+1][j+1].red  )/9;
          dst[i][j].green=(src[i-1][j-1].green+src[i][j-1].green+src[i+1][j-1].green+src[i-1][j].green+src[i][j].green+src[i+1][j].green+src[i-1][j+1].green+src[i][j+1].green+src[i+1][j+1].green)/9;
          dst[i][j].blue =(src[i-1][j-1].blue +src[i][j-1].blue +src[i+1][j-1].blue +src[i-1][j].blue +src[i][j].blue +src[i+1][j].blue +src[i-1][j+1].blue +src[i][j+1].blue +src[i+1][j+1].blue )/9;
        }
        dst[i][dim-1].red  =(src[i-1][dim-2].red  +src[i][dim-2].red  +src[i+1][dim-2].red  +src[i-1][dim-1].red  +src[i][dim-1].red  +src[i+1][dim-1].red  )/6;
        dst[i][dim-1].green=(src[i-1][dim-2].green+src[i][dim-2].green+src[i+1][dim-2].green+src[i-1][dim-1].green+src[i][dim-1].green+src[i+1][dim-1].green)/6;
        dst[i][dim-1].blue =(src[i-1][dim-2].blue +src[i][dim-2].blue +src[i+1][dim-2].blue +src[i-1][dim-1].blue +src[i][dim-1].blue +src[i+1][dim-1].blue )/6;
      }
      dst[dim-1][0].red  =(src[dim-2][0].red  +src[dim-2][1].red  +src[dim-1][0].red  +src[dim-1][1].red  )/4;
      dst[dim-1][0].green=(src[dim-2][0].green+src[dim-2][1].green+src[dim-1][0].green+src[dim-1][1].green)/4;
      dst[dim-1][0].blue =(src[dim-2][0].blue +src[dim-2][1].blue +src[dim-1][0].blue +src[dim-1][1].blue )/4;
      for( int j=1; j<dim; j++){
        dst[dim-1][j].red  =(src[dim-2][j-1].red  +src[dim-1][j-1].red  +src[dim-2][j].red  +src[dim-1][j].red  +src[dim-2][j+1].red  +src[dim-1][j+1].red  )/6;
        dst[dim-1][j].green=(src[dim-2][j-1].green+src[dim-1][j-1].green+src[dim-2][j].green+src[dim-1][j].green+src[dim-2][j+1].green+src[dim-1][j+1].green)/6;
        dst[dim-1][j].blue =(src[dim-2][j-1].blue +src[dim-1][j-1].blue +src[dim-2][j].blue +src[dim-1][j].blue +src[dim-2][j+1].blue +src[dim-1][j+1].blue )/6;
      }
      dst[dim-1][dim-1].red  =(src[dim-2][dim-2].red  +src[dim-1][dim-2].red  +src[dim-2][dim-1].red  +src[dim-1][dim-1].red  )/4;
      dst[dim-1][dim-1].green=(src[dim-2][dim-2].green+src[dim-1][dim-2].green+src[dim-2][dim-1].green+src[dim-1][dim-1].green)/4;
      dst[dim-1][dim-1].blue =(src[dim-2][dim-2].blue +src[dim-1][dim-2].blue +src[dim-2][dim-1].blue +src[dim-1][dim-1].blue )/4;
    }
    
    void smooth_B(int dim,结构像素src[dim][dim],结构像素dst[dim][dim]){
    dst[0][0]。红色=(src[0][0]。红色+src[1][0]。红色+src[0][1]。红色+src[1][1]。红色)/4;
    dst[0][0]。绿色=(src[0][0]。绿色+src[1][0]。绿色+src[0][1]。绿色+src[1][1]。绿色)/4;
    dst[0][0]。蓝色=(src[0][0]。蓝色+src[1][0]。蓝色+src[0][1]。蓝色+src[1][1]。蓝色)/4;
    
    对于(intj=1;jactuall…这正是我想要的…现在我对avg()中的其余函数基本上也这样做了吗代码体?你为什么要这么努力进行微优化?你从中得到了什么?David的问题特别重要,因为这种算法可以通过滚动计算平均值而不是从头开始计算平均值来加快速度。如果你这样做是为了好玩,那就把自己弄出来,但如果你真的想要代码要以最少的努力跑得更快,请使用更高效的alg。
    for (j = 0; j < dim; j++)
    {
    
        /* ...avg() code body except for the return... */ 
    
        dst[RIDX(i, j, dim)] = current_pixel;
    }
    
    void smooth_B(int dim, struct pixel src[dim][dim], struct pixel dst[dim][dim]){
      dst[0][0].red  =(src[0][0].red  +src[1][0].red  +src[0][1].red  +src[1][1].red  )/4;
      dst[0][0].green=(src[0][0].green+src[1][0].green+src[0][1].green+src[1][1].green)/4;
      dst[0][0].blue =(src[0][0].blue +src[1][0].blue +src[0][1].blue +src[1][1].blue )/4;
      for( int j=1; j<dim-1; j++){
        dst[0][j].red  =(src[0][j-1].red  +src[1][j-1].red  +src[0][j].red  +src[1][j].red  +src[0][j+1].red  +src[1][j+1].red  )/6;
        dst[0][j].green=(src[0][j-1].green+src[1][j-1].green+src[0][j].green+src[1][j].green+src[0][j+1].green+src[1][j+1].green)/6;
        dst[0][j].blue =(src[0][j-1].blue +src[1][j-1].blue +src[0][j].blue +src[1][j].blue +src[0][j+1].blue +src[1][j+1].blue )/6;
      }
      dst[0][dim-1].red  =(src[0][dim-2].red  +src[1][dim-2].red  +src[0][dim-1].red  +src[1][dim-1].red  )/4;
      dst[0][dim-1].green=(src[0][dim-2].green+src[1][dim-2].green+src[0][dim-1].green+src[1][dim-1].green)/4;
      dst[0][dim-1].blue =(src[0][dim-2].blue +src[1][dim-2].blue +src[0][dim-1].blue +src[1][dim-1].blue )/4;
    
      for( int i=1; i<dim-1; i++){
        dst[i][0].red  =(src[i-1][0].red  +src[i-1][1].red  +src[i][0].red  +src[i][1].red  +src[i+1][0].red  +src[i+1][1].red  )/6;
        dst[i][0].green=(src[i-1][0].green+src[i-1][1].green+src[i][0].green+src[i][1].green+src[i+1][0].green+src[i+1][1].green)/6;
        dst[i][0].blue =(src[i-1][0].blue +src[i-1][1].blue +src[i][0].blue +src[i][1].blue +src[i+1][0].blue +src[i+1][1].blue )/6;
        for( int j=1; j<dim; j++){
          dst[i][j].red  =(src[i-1][j-1].red  +src[i][j-1].red  +src[i+1][j-1].red  +src[i-1][j].red  +src[i][j].red  +src[i+1][j].red  +src[i-1][j+1].red  +src[i][j+1].red  +src[i+1][j+1].red  )/9;
          dst[i][j].green=(src[i-1][j-1].green+src[i][j-1].green+src[i+1][j-1].green+src[i-1][j].green+src[i][j].green+src[i+1][j].green+src[i-1][j+1].green+src[i][j+1].green+src[i+1][j+1].green)/9;
          dst[i][j].blue =(src[i-1][j-1].blue +src[i][j-1].blue +src[i+1][j-1].blue +src[i-1][j].blue +src[i][j].blue +src[i+1][j].blue +src[i-1][j+1].blue +src[i][j+1].blue +src[i+1][j+1].blue )/9;
        }
        dst[i][dim-1].red  =(src[i-1][dim-2].red  +src[i][dim-2].red  +src[i+1][dim-2].red  +src[i-1][dim-1].red  +src[i][dim-1].red  +src[i+1][dim-1].red  )/6;
        dst[i][dim-1].green=(src[i-1][dim-2].green+src[i][dim-2].green+src[i+1][dim-2].green+src[i-1][dim-1].green+src[i][dim-1].green+src[i+1][dim-1].green)/6;
        dst[i][dim-1].blue =(src[i-1][dim-2].blue +src[i][dim-2].blue +src[i+1][dim-2].blue +src[i-1][dim-1].blue +src[i][dim-1].blue +src[i+1][dim-1].blue )/6;
      }
      dst[dim-1][0].red  =(src[dim-2][0].red  +src[dim-2][1].red  +src[dim-1][0].red  +src[dim-1][1].red  )/4;
      dst[dim-1][0].green=(src[dim-2][0].green+src[dim-2][1].green+src[dim-1][0].green+src[dim-1][1].green)/4;
      dst[dim-1][0].blue =(src[dim-2][0].blue +src[dim-2][1].blue +src[dim-1][0].blue +src[dim-1][1].blue )/4;
      for( int j=1; j<dim; j++){
        dst[dim-1][j].red  =(src[dim-2][j-1].red  +src[dim-1][j-1].red  +src[dim-2][j].red  +src[dim-1][j].red  +src[dim-2][j+1].red  +src[dim-1][j+1].red  )/6;
        dst[dim-1][j].green=(src[dim-2][j-1].green+src[dim-1][j-1].green+src[dim-2][j].green+src[dim-1][j].green+src[dim-2][j+1].green+src[dim-1][j+1].green)/6;
        dst[dim-1][j].blue =(src[dim-2][j-1].blue +src[dim-1][j-1].blue +src[dim-2][j].blue +src[dim-1][j].blue +src[dim-2][j+1].blue +src[dim-1][j+1].blue )/6;
      }
      dst[dim-1][dim-1].red  =(src[dim-2][dim-2].red  +src[dim-1][dim-2].red  +src[dim-2][dim-1].red  +src[dim-1][dim-1].red  )/4;
      dst[dim-1][dim-1].green=(src[dim-2][dim-2].green+src[dim-1][dim-2].green+src[dim-2][dim-1].green+src[dim-1][dim-1].green)/4;
      dst[dim-1][dim-1].blue =(src[dim-2][dim-2].blue +src[dim-1][dim-2].blue +src[dim-2][dim-1].blue +src[dim-1][dim-1].blue )/4;
    }