C Linux模块:性能计数器不工作

C Linux模块:性能计数器不工作,c,linux,module,kernel,perf,C,Linux,Module,Kernel,Perf,我想监控最后一级缓存中的缓存请求编号。我根据教程编写了一个Linux模块来获取这些信息 它可以编译并运行,但输出结果始终为0。换句话说,当我使用rdmsr时,它总是给我edx=0,eax=0。我甚至在中尝试了演示代码,输出仍然是0 我为这个问题纠结了整整一周。有人能帮我指出我在节目中犯的错误吗 我知道有一些现有的程序在做同样的事情,但我必须知道如何自己编写代码,因为我想在Xen hypervisor中监视缓存请求。我不能在Xen中使用这些工具,除非我将这些工具合并到Xen的虚拟机监控程序中,这似

我想监控最后一级缓存中的缓存请求编号。我根据教程编写了一个Linux模块来获取这些信息

它可以编译并运行,但输出结果始终为0。换句话说,当我使用rdmsr时,它总是给我edx=0,eax=0。我甚至在中尝试了演示代码,输出仍然是0

我为这个问题纠结了整整一周。有人能帮我指出我在节目中犯的错误吗

我知道有一些现有的程序在做同样的事情,但我必须知道如何自己编写代码,因为我想在Xen hypervisor中监视缓存请求。我不能在Xen中使用这些工具,除非我将这些工具合并到Xen的虚拟机监控程序中,这似乎需要更多的工作

/*
 * Record the cache miss rate of Intel Sandybridge cpu
 * To confirm the event is correctly set!
 */
#include <linux/module.h>   /* Needed by all modules */
#include <linux/kernel.h>   /* Needed for KERN_INFO */

/*4 Performance Counters Selector for %ecx in insn wrmsr*/
#define PERFEVTSEL0    0x186
#define PERFEVTSEL1    0x187
#define PERFEVTSEL2    0x188
#define PERFEVTSEL3    0x189

/*4 MSR Performance Counter for the above selector*/
#define PMC0    0xc1
#define PMC1    0xc2
#define PMC2    0xc2
#define PMC3    0xc3

/*Intel Software Developer Manual Page 2549*/ /*L1I L1D cache events has not been confirmed!*/
/*L1 Instruction Cache Performance Tuning Events*/
#define L1I_ALLHIT_EVENT    0x80
#define L1I_ALLHIT_MASK     0x01
#define L1I_ALLMISS_EVENT   0x80    /*confirmed*/
#define L1I_ALLMISS_MASK    0x02    /*confirmed*/

/*L1 Data Cache Performance Tuning Events*/ 
/*Intel does not have the ALLREQ Miss mask; have to add LD_miss and ST_miss*/
#define L1D_ALLREQ_EVENT    0x43
#define L1D_ALLREQ_MASK     0x01
#define L1D_LDMISS_EVENT    0x40
#define L1D_LDMISS_MASK     0x01
#define L1D_STMISS_EVENT    0x28
#define L1D_STMISS_MASK     0x01

/*L2 private cache for each core*/ /*confirmed*/
#define L2_ALLREQ_EVENT     0x24
#define L2_ALLREQ_MASK      L2_ALLCODEREQ_MASK  /*0xFF*/
#define L2_ALLMISS_EVENT    0x24
#define L2_ALLMISS_MASK     L2_ALLCODEMISS_MASK /*0xAA*/

#define L2_ALLCODEREQ_MASK  0x30
#define L2_ALLCODEMISS_MASK 0x20

/*L3 shared cache*/ /*confirmed*/
/*Use the last level cache event and mask*/
#define L3_ALLREQ_EVENT     0x2E
#define L3_ALLREQ_MASK      0x4F
#define L3_ALLMISS_EVENT    0x2E
#define L3_ALLMISS_MASK     0x41 

#define USR_BIT             (0x01UL << 16)
#define OS_BIT              (0x01UL << 17)


#define SET_MSR_USR_BIT(eax)    eax |= USR_BIT
#define CLEAR_MSR_USR_BIT(exa)  eax &= (~USR_BIT)
#define SET_MSR_OS_BIT(eax)     eax |= OS_BIT
#define CLEAR_MSR_OS_BIT(eax)   eax &= (~OS_BIT)

#define SET_EVENT_MASK(eax, event, umask)    eax |= (event | (umask << 8))  

/*MSR EN flag: when set start the counter!*/
//#define MSR_ENFLAG      (0x1<<22)
#define MSR_ENFLAG      (0x1<<22)


/* 32bit insn v3*/
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx)
{
    /*clear counter first*/
   __asm__ __volatile__ ("movl %0, %%ecx\n\t"
        "xorl %%edx, %%edx\n\t"
        "xorl %%eax, %%eax\n\t"
        "wrmsr\n\t"
        : /* no outputs */
        : "m" (ecx)
        : "eax", "ecx", "edx" /* all clobbered */);

   eax |= MSR_ENFLAG;

   __asm__("movl %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
        "xorl %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
        "movl %1, %%eax\n\t" /* eax contains the log bits to set the MSR to */
        "wrmsr\n\t"
        : /* no outputs */
        : "m" (ecx), "m" (eax)
        : "eax", "ecx", "edx" /* clobbered */);
}

static inline void  rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx)
{    __asm__ __volatile__(\
        "rdmsr"\
        :"=d" (*edx), "=a" (*eax)\
        :"c"(*ecx)
        );
}

static inline void delay(void )
{
    char tmp[1000]; 
    int i;
    for( i = 0; i < 1000; i++ )
    {
        tmp[i] = i * 2;
    }
}

enum cache_level
{
    UOPS,
    L1I,
    L1D,
    L2,
    L3
};

int init_module(void)
{
    enum cache_level op;
    uint32_t eax, edx, ecx;
    uint64_t l3_all;
    op = UOPS;
    switch(op)
    {
    case UOPS:
        eax = 0x0001010E;
        eax |= MSR_ENFLAG;
        ecx = 0x187;
        printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        ecx = 0xc2;
        eax = 1;
        edx = 2;
        rtxen_read_msr(&ecx, &eax, &edx);
        printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
        break;
    case L3: 
        eax = 0;
        SET_MSR_USR_BIT(eax);
        SET_MSR_OS_BIT(eax);
        SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
        eax |= MSR_ENFLAG;
        ecx = PERFEVTSEL2;
        printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        printk(KERN_INFO "L3 all request set MSR PMC2\n");
        printk(KERN_INFO "delay by access an array\n");
        delay();
        ecx = PMC2;
        eax = 1;
        edx = 2;
        printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
        rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
        l3_all = ( ((uint64_t) edx << 32) | eax );
        printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
        break;
    default:
        printk(KERN_INFO "operation not implemented yet\n");   
    }
    /* 
     * A non 0 return means init_module failed; module can't be loaded. 
     */
    return 0;
}

void cleanup_module(void)
{
    printk(KERN_INFO "Goodbye world 1.\n");
}

在@Manuel Selva的帮助下,我终于解决了这个问题

设置性能的正确流程。柜台为:

步骤1:通过在eax中设置EN位来设置msr并启用计数器

步骤2:通过写入msr停止计数器

第三步:阅读计数器

我错过了第二步,这就是为什么它总是给我0。如果我想在停止计数器之前读取计数器,则报告0是有意义的

switch语句的正确代码如下所示:

 switch(op)
    {
    case UOPS:
        eax = 0x0051010E;
        eax |= MSR_ENFLAG;
        ecx = 0x187;
        printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        //stop counting
        eax = 0x0011010E;
        rtxen_write_msr(eax,ecx);
        ecx = 0xc2;
        eax = 1;
        edx = 2;
        rtxen_read_msr(&ecx, &eax, &edx);
        printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
        break;
    case L3: 
        eax = 0;
        SET_MSR_USR_BIT(eax);
        SET_MSR_OS_BIT(eax);
        SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
        eax |= MSR_ENFLAG;
        eax |= (1<<20); //INT bit: counter overflow
        ecx = PERFEVTSEL2;
        printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        printk(KERN_INFO "L3 all request set MSR PMC2\n");
        printk(KERN_INFO "delay by access an array\n");
        delay();
        eax &= (~MSR_ENFLAG);
        rtxen_write_msr(eax, ecx);
        printk(KERN_INFO "stop the counter, eax=%#010x\n", eax);
        ecx = PMC2;
        eax = 1;
        edx = 2;
        printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
        rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
        l3_all = ( ((uint64_t) edx << 32) | eax );
        printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
        break;
    default:
        printk(KERN_INFO "operation not implemented yet\n");   
    }
开关(op)
{
案例UOP:
eax=0x0051010E;
eax |=MSR|U ENFLAG;
ecx=0x187;
printk(KERN#u INFO“UOPS Demo:write#msr:eax=%#010x,ecx=%#010x\n”,eax,ecx);
rtxen_write_msr(eax、ecx);
//停止计数
eax=0x0011010E;
rtxen_write_msr(eax、ecx);
ecx=0xc2;
eax=1;
edx=2;
rtxen_read_msr(ecx、eax和edx);
printk(KERN#u INFO“UOPS演示:读取#msr:edx=%#010x,eax=%#010x\n”,edx,eax);
打破
案例L3:
eax=0;
设置\u MSR\u USR\u位(eax);
设置MSR操作系统位(eax);
设置事件掩码(eax、L3所有请求掩码、L3所有请求掩码);
eax |=MSR|U ENFLAG;

eax |=(1)您是否检查了所遵循的教程是否使用了与您相同的体系结构?另一种方法是使用linux导出的系统调用perf_event_open,它可以从用户级别完成code@ManuelSelva,非常感谢您的建议!我确实查阅了《英特尔程序员手册》和我的计算机的arch。我认为事件编号和mask应该是正确的。现在我不确定我的代码流是否正确?例如,当我发出wrmsr时,我是否必须做其他事情?此外,我不确定当我发出wrmsr命令时是否为eax设置了正确的值?顺便说一句,我必须编写代码而不是使用linux调用,因为我最终会将代码放入虚拟化hypervisor。如果我使用perf_event_open函数,我将不得不包含很多依赖项。我从来没有手动编写汇编代码来编写和读取MSR。不过,我使用MSR内核模块编写了一个简单的C程序来使用PMU:也许你可以先尝试这种方法来检查你的问题是否来自错误使用MSR的编写和重编.不客气,我很高兴知道我的评论对你有帮助。
 switch(op)
    {
    case UOPS:
        eax = 0x0051010E;
        eax |= MSR_ENFLAG;
        ecx = 0x187;
        printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        //stop counting
        eax = 0x0011010E;
        rtxen_write_msr(eax,ecx);
        ecx = 0xc2;
        eax = 1;
        edx = 2;
        rtxen_read_msr(&ecx, &eax, &edx);
        printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax);
        break;
    case L3: 
        eax = 0;
        SET_MSR_USR_BIT(eax);
        SET_MSR_OS_BIT(eax);
        SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK);
        eax |= MSR_ENFLAG;
        eax |= (1<<20); //INT bit: counter overflow
        ecx = PERFEVTSEL2;
        printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        rtxen_write_msr(eax, ecx);
        printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx);
        printk(KERN_INFO "L3 all request set MSR PMC2\n");
        printk(KERN_INFO "delay by access an array\n");
        delay();
        eax &= (~MSR_ENFLAG);
        rtxen_write_msr(eax, ecx);
        printk(KERN_INFO "stop the counter, eax=%#010x\n", eax);
        ecx = PMC2;
        eax = 1;
        edx = 2;
        printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx);
        rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/
        l3_all = ( ((uint64_t) edx << 32) | eax );
        printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all);
        break;
    default:
        printk(KERN_INFO "operation not implemented yet\n");   
    }