C 查找缓存性能

C 查找缓存性能,c,caching,C,Caching,这就是代码缓存 #include <sys/times.h> #include <stdio.h> #define CACHE_MIN (1024) /* smallest cache (in words) */ #define CACHE_MAX (1024*1024) /* largest cache */ #define STRIDE_MIN 1 /* smallest stride (in words) */ #define STRIDE_MAX 128 /*

这就是代码缓存

#include <sys/times.h>
#include <stdio.h>

#define CACHE_MIN (1024) /* smallest cache (in words) */
#define CACHE_MAX (1024*1024) /* largest cache */
#define STRIDE_MIN 1 /* smallest stride (in words) */
#define STRIDE_MAX 128 /* largest stride */
#define SAMPLE 10 /* to get a larger time sample */
#define CLK_TCK 60 /* number clock cycles per second */
int x[CACHE_MAX]; /* array going to stride through */

double get_seconds () { /* routine to read time */

  struct tms rusage;
  times (&rusage); /* UNIX utility: time in clock ticks */
  return (double) (rusage.tms_utime) / CLK_TCK;
}

int main () {

  int register i, index, stride, limit, temp;
  int steps, tsteps, csize;
  double sec0, sec; /* timing variables */
  for (csize = CACHE_MIN; csize <= CACHE_MAX; csize = csize * 2)

    for (stride = STRIDE_MIN; stride <= STRIDE_MAX; stride = stride * 2) {
      sec = 0; /* initialize timer */
      limit = csize - stride + 1; /* cache size this loop */
      steps = 0;
      do { /* repeat until collect 1 second */

    sec0 = get_seconds (); /* start timer */
    for (i = SAMPLE * stride; i != 0; i = i - 1) /* larger sample */
      for (index = 0; index < limit; index = index + stride)
        x[index] = x[index] + 1; /* cache access */
    steps = steps + 1; /* count while loop iterations */
    sec = sec + (get_seconds () - sec0); /* end timer */

      }
      while (sec < 1.0); /* until collect 1 second */

      /* Repeat empty loop to loop subtract overhead */
      tsteps = 0; /* used to match number of while iterations */
      do { /* repeat until same number of iterations as above */

    sec0 = get_seconds (); /* start timer */
    for (i = SAMPLE * stride; i != 0; i = i - 1) /* larger sample */
      for (index = 0; index < limit; index = index + stride)
        temp = temp + index; /* dummy code */
    tsteps = tsteps + 1; /* count while iterations */
    sec = sec - (get_seconds () - sec0); /* - overhead */

      }
      while (tsteps < steps); /* until equal to number of iterations */

      if( stride==STRIDE_MIN ) printf("\n"); /* extra line to separate array sizes */
      printf("Size(bytes): %7d Stride(bytes): %4d read+write: %4.0f ns\n",
         csize * sizeof (int), stride * sizeof (int),
         (double) sec*1e9 / (steps*SAMPLE*stride*((limit-1)/stride + 1)));

    } /* end of both outer for loops */
}
现在我试图找出缓存命中和未命中的速度,以及一级缓存的大小和一级缓存的块大小

一级缓存大小和块大小不是只有4kb吗? 我不知道如何找到速度,有什么想法吗

  • 使用线程关联将性能检查线程连接到单个特定核心。这将允许您消除不同处理器内核之间线程迁移的影响,从而导致错误的结果
  • 使用时间戳计数器并以CPU周期为单位测量开销。它是x86CPU上可用的最细粒度的时间测量计时器
  • 不要忘记从测量结果中减去时间测量值
  • 在反汇编程序中由编译器生成的控制代码,以确保编译器不会引入不必要的优化(例如,将变量放入CPU寄存器而不是内存中)
  • CPU缓存和缓存线的大小在很大程度上取决于特定的CPU型号,并且可能会有很大的差异。检查您使用的CPU的文档

  • 使用线程关联将性能检查线程连接到单个特定核心。这将允许您消除不同处理器内核之间线程迁移的影响,从而导致错误的结果
  • 使用时间戳计数器并以CPU周期为单位测量开销。它是x86CPU上可用的最细粒度的时间测量计时器
  • 不要忘记从测量结果中减去时间测量值
  • 在反汇编程序中由编译器生成的控制代码,以确保编译器不会引入不必要的优化(例如,将变量放入CPU寄存器而不是内存中)
  • CPU缓存和缓存线的大小在很大程度上取决于特定的CPU型号,并且可能会有很大的差异。检查您使用的CPU的文档

    Size(bytes):    4096 Stride(bytes):    4 read+write:    1 ns
    Size(bytes):    4096 Stride(bytes):    8 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):   16 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):   32 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):   64 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):  128 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):  256 read+write:    0 ns
    Size(bytes):    4096 Stride(bytes):  512 read+write:    0 ns
    
    Size(bytes):    8192 Stride(bytes):    4 read+write:    0 ns
    Size(bytes):    8192 Stride(bytes):    8 read+write:    1 ns
    Size(bytes):    8192 Stride(bytes):   16 read+write:    0 ns
    Size(bytes):    8192 Stride(bytes):   32 read+write:    1 ns
    Size(bytes):    8192 Stride(bytes):   64 read+write:    0 ns
    Size(bytes):    8192 Stride(bytes):  128 read+write:    0 ns
    Size(bytes):    8192 Stride(bytes):  256 read+write:    1 ns
    Size(bytes):    8192 Stride(bytes):  512 read+write:    0 ns
    
    Size(bytes):   16384 Stride(bytes):    4 read+write:    1 ns
    Size(bytes):   16384 Stride(bytes):    8 read+write:    1 ns
    Size(bytes):   16384 Stride(bytes):   16 read+write:    1 ns
    Size(bytes):   16384 Stride(bytes):   32 read+write:    0 ns
    Size(bytes):   16384 Stride(bytes):   64 read+write:    1 ns
    Size(bytes):   16384 Stride(bytes):  128 read+write:    0 ns
    Size(bytes):   16384 Stride(bytes):  256 read+write:    0 ns
    Size(bytes):   16384 Stride(bytes):  512 read+write:    0 ns
    
    Size(bytes):   32768 Stride(bytes):    4 read+write:    1 ns
    Size(bytes):   32768 Stride(bytes):    8 read+write:    1 ns
    Size(bytes):   32768 Stride(bytes):   16 read+write:    0 ns
    Size(bytes):   32768 Stride(bytes):   32 read+write:    0 ns
    Size(bytes):   32768 Stride(bytes):   64 read+write:    1 ns
    Size(bytes):   32768 Stride(bytes):  128 read+write:    0 ns
    Size(bytes):   32768 Stride(bytes):  256 read+write:    0 ns
    Size(bytes):   32768 Stride(bytes):  512 read+write:    0 ns
    
    Size(bytes):   65536 Stride(bytes):    4 read+write:    0 ns
    Size(bytes):   65536 Stride(bytes):    8 read+write:    0 ns
    Size(bytes):   65536 Stride(bytes):   16 read+write:    1 ns
    Size(bytes):   65536 Stride(bytes):   32 read+write:    1 ns
    Size(bytes):   65536 Stride(bytes):   64 read+write:    2 ns
    Size(bytes):   65536 Stride(bytes):  128 read+write:    2 ns
    Size(bytes):   65536 Stride(bytes):  256 read+write:    1 ns
    Size(bytes):   65536 Stride(bytes):  512 read+write:    1 ns
    
    Size(bytes):  131072 Stride(bytes):    4 read+write:    0 ns
    Size(bytes):  131072 Stride(bytes):    8 read+write:    0 ns
    Size(bytes):  131072 Stride(bytes):   16 read+write:    0 ns
    Size(bytes):  131072 Stride(bytes):   32 read+write:    1 ns
    Size(bytes):  131072 Stride(bytes):   64 read+write:    2 ns
    Size(bytes):  131072 Stride(bytes):  128 read+write:    2 ns
    Size(bytes):  131072 Stride(bytes):  256 read+write:    2 ns
    Size(bytes):  131072 Stride(bytes):  512 read+write:    1 ns
    
    Size(bytes):  262144 Stride(bytes):    4 read+write:    0 ns
    Size(bytes):  262144 Stride(bytes):    8 read+write:    0 ns
    Size(bytes):  262144 Stride(bytes):   16 read+write:    0 ns
    Size(bytes):  262144 Stride(bytes):   32 read+write:    1 ns
    Size(bytes):  262144 Stride(bytes):   64 read+write:    2 ns
    Size(bytes):  262144 Stride(bytes):  128 read+write:    2 ns
    Size(bytes):  262144 Stride(bytes):  256 read+write:    2 ns
    Size(bytes):  262144 Stride(bytes):  512 read+write:    1 ns
    
    Size(bytes):  524288 Stride(bytes):    4 read+write:    0 ns
    Size(bytes):  524288 Stride(bytes):    8 read+write:    0 ns
    Size(bytes):  524288 Stride(bytes):   16 read+write:    1 ns
    Size(bytes):  524288 Stride(bytes):   32 read+write:    1 ns
    Size(bytes):  524288 Stride(bytes):   64 read+write:    3 ns
    Size(bytes):  524288 Stride(bytes):  128 read+write:    3 ns
    Size(bytes):  524288 Stride(bytes):  256 read+write:    3 ns
    Size(bytes):  524288 Stride(bytes):  512 read+write:    3 ns
    
    Size(bytes): 1048576 Stride(bytes):    4 read+write:    1 ns
    Size(bytes): 1048576 Stride(bytes):    8 read+write:    1 ns
    Size(bytes): 1048576 Stride(bytes):   16 read+write:    1 ns
    Size(bytes): 1048576 Stride(bytes):   32 read+write:    1 ns
    Size(bytes): 1048576 Stride(bytes):   64 read+write:    3 ns
    Size(bytes): 1048576 Stride(bytes):  128 read+write:    3 ns
    Size(bytes): 1048576 Stride(bytes):  256 read+write:    3 ns
    Size(bytes): 1048576 Stride(bytes):  512 read+write:    3 ns
    
    Size(bytes): 2097152 Stride(bytes):    4 read+write:    1 ns
    Size(bytes): 2097152 Stride(bytes):    8 read+write:    1 ns
    Size(bytes): 2097152 Stride(bytes):   16 read+write:    1 ns
    Size(bytes): 2097152 Stride(bytes):   32 read+write:    1 ns
    Size(bytes): 2097152 Stride(bytes):   64 read+write:    3 ns
    Size(bytes): 2097152 Stride(bytes):  128 read+write:    3 ns
    Size(bytes): 2097152 Stride(bytes):  256 read+write:    3 ns
    Size(bytes): 2097152 Stride(bytes):  512 read+write:    3 ns
    
    Size(bytes): 4194304 Stride(bytes):    4 read+write:    1 ns
    Size(bytes): 4194304 Stride(bytes):    8 read+write:    1 ns
    Size(bytes): 4194304 Stride(bytes):   16 read+write:    1 ns
    Size(bytes): 4194304 Stride(bytes):   32 read+write:    2 ns
    Size(bytes): 4194304 Stride(bytes):   64 read+write:    3 ns
    Size(bytes): 4194304 Stride(bytes):  128 read+write:    3 ns
    Size(bytes): 4194304 Stride(bytes):  256 read+write:    3 ns
    Size(bytes): 4194304 Stride(bytes):  512 read+write:    3 ns