如何在GEM5中使用ARM PMU?

如何在GEM5中使用ARM PMU?,arm,simulation,cpu-registers,arm64,gem5,Arm,Simulation,Cpu Registers,Arm64,Gem5,我在初始化gem5中的PMU时遇到问题,该系统使用cpu hpi中的starter\u fs.py。 我遵照这篇文章的指示,设法解决了我的问题。我添加了补丁并配置了系统。我没有使用perf。我尝试直接访问寄存器并读取它们。如我所见,GEM5只实现了一些寄存器事件。我们是否可以添加其他内容以及: 例如,未实施EXC_。以下是添加它们的方法吗 self.addEvent(ProbeEvent(self,0x09,cpu,“EXC_take”)) #0x09:EXC_已执行 此外,读取pmu事件寄存器

我在初始化gem5中的PMU时遇到问题,该系统使用cpu hpi中的starter\u fs.py。 我遵照这篇文章的指示,设法解决了我的问题。我添加了补丁并配置了系统。我没有使用perf。我尝试直接访问寄存器并读取它们。如我所见,GEM5只实现了一些寄存器事件。我们是否可以添加其他内容以及: 例如,未实施EXC_。以下是添加它们的方法吗

self.addEvent(ProbeEvent(self,0x09,cpu,“EXC_take”))

#0x09:EXC_已执行

此外,读取pmu事件寄存器我设法读取它们并提取事件,但pmccntr循环寄存器始终返回零?gem5如何增加该寄存器?读取循环注册表的步骤是什么

我使用perf读取的代码如下:

#include <stdio.h>
    
    
    #include <stdint.h>
    #include <string.h>
    
    
    
    #include <errno.h>
    
    #include <unistd.h>
    #include <sys/syscall.h>
    #include <linux/perf_event.h>
    
    
    
    #define NUM_NODES                          100
    #define NONE                               9999
    
    
    
    
    struct _NODE
    {
      int iDist;
      int iPrev;
    };
    typedef struct _NODE NODE;
    
    struct _QITEM
    {
      int iNode;
      int iDist;
      int iPrev;
      struct _QITEM *qNext;
    };
    typedef struct _QITEM QITEM;
    
    QITEM *qHead = NULL;
    
                 
                 
                 
    int AdjMatrix[NUM_NODES][NUM_NODES];
    
    int g_qCount = 0;
    NODE rgnNodes[NUM_NODES];
    int ch;
    int iPrev, iNode;
    int i, iCost, iDist;
    
    
    void print_path (NODE *rgnNodes, int chNode)
    {
      if (rgnNodes[chNode].iPrev != NONE)
        {
          //print_path(rgnNodes, rgnNodes[chNode].iPrev);
        }
      //printf (" %d", chNode);
      fflush(stdout);
    }
    
    
    void enqueue (int iNode, int iDist, int iPrev)
    {
      QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));
      QITEM *qLast = qHead;
      
      if (!qNew) 
        {
          //fprintf(stderr, "Out of memory.\n");
          exit(1);
        }
      qNew->iNode = iNode;
      qNew->iDist = iDist;
      qNew->iPrev = iPrev;
      qNew->qNext = NULL;
      
      if (!qLast) 
        {
          qHead = qNew;
        }
      else
        {
          while (qLast->qNext) qLast = qLast->qNext;
          qLast->qNext = qNew;
        }
      g_qCount++;
      //               ASSERT(g_qCount);
    }
    
    
    void dequeue (int *piNode, int *piDist, int *piPrev)
    {
      QITEM *qKill = qHead;
      
      if (qHead)
        {
          //                 ASSERT(g_qCount);
          *piNode = qHead->iNode;
          *piDist = qHead->iDist;
          *piPrev = qHead->iPrev;
          qHead = qHead->qNext;
          free(qKill);
          g_qCount--;
        }
    }
    
    
    int qcount (void)
    {
      return(g_qCount);
    }
    
    int dijkstra(int chStart, int chEnd) 
    {
      
    
      
      for (ch = 0; ch < NUM_NODES; ch++)
        {
          rgnNodes[ch].iDist = NONE;
          rgnNodes[ch].iPrev = NONE;
        }
    
      if (chStart == chEnd) 
        {
          //printf("Shortest path is 0 in cost. Just stay where you are.\n");
        }
      else
        {
          rgnNodes[chStart].iDist = 0;
          rgnNodes[chStart].iPrev = NONE;
          
          enqueue (chStart, 0, NONE);
          
         while (qcount() > 0)
        {
          dequeue (&iNode, &iDist, &iPrev);
          for (i = 0; i < NUM_NODES; i++)
            {
              if ((iCost = AdjMatrix[iNode][i]) != NONE)
            {
              if ((NONE == rgnNodes[i].iDist) || 
                  (rgnNodes[i].iDist > (iCost + iDist)))
                {
                  rgnNodes[i].iDist = iDist + iCost;
                  rgnNodes[i].iPrev = iNode;
                  enqueue (i, iDist + iCost, iNode);
                }
            }
            }
        }
          
          //printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
          //printf("Path is: ");
          //print_path(rgnNodes, chEnd);
          //printf("\n");
        }
    }
    
    int main(int argc, char *argv[]) {
      int diff = 0;
      uint64_t num_cycles_nominal=0;
        uint64_t num_cycles_attack=0;
        uint64_t counter_cpu_cycles = 0;
      //system("./load-module");
      int i,j,k;
      FILE *fp;
      static int perf_fd_cpu_cycles;
      static struct perf_event_attr attr_cpu_cycles;
      attr_cpu_cycles.size = sizeof(attr_cpu_cycles);
      attr_cpu_cycles.exclude_kernel = 1;
      attr_cpu_cycles.exclude_hv = 1;
      attr_cpu_cycles.exclude_callchain_kernel = 1;
        attr_cpu_cycles.type = PERF_TYPE_RAW;
        attr_cpu_cycles.config = 0x11;
    
        /* Open the file descriptor corresponding to this counter. The counter
             should start at this moment. */
        if ((perf_fd_cpu_cycles = syscall(__NR_perf_event_open, &attr_cpu_cycles, 0, -1, -1, 0)) == -1)
            fprintf(stderr, "perf_event_open fail %d %d: %s\n", perf_fd_cpu_cycles, errno, strerror(errno));
        
      
      
      if (argc<2) {
        //fprintf(stderr, "Usage: dijkstra <filename>\n");
        //fprintf(stderr, "Only supports matrix size is #define'd.\n");
      }
    
      /* open the adjacency matrix file */
      fp = fopen (argv[1],"r");
        
    
      /* make a fully connected matrix */
      for (i=0;i<NUM_NODES;i++) {
        for (j=0;j<NUM_NODES;j++) {
          /* make it more sparce */
          fscanf(fp,"%d",&k);
                AdjMatrix[i][j]= k;
        }
      }
        
        /* Get and close the performance counters. */
        read(perf_fd_cpu_cycles, &counter_cpu_cycles, sizeof(counter_cpu_cycles));
        //close(perf_fd_cpu_cycles);
        printf("Number of cpu_cycles before: %d\n", counter_cpu_cycles);
        num_cycles_nominal = counter_cpu_cycles;    
        
        /* Get and close the performance counters. */
        read(perf_fd_cpu_cycles, &counter_cpu_cycles, sizeof(counter_cpu_cycles));
        //close(perf_fd_cpu_cycles);
        printf("Number of cpu_cycles after attack: %d\n", counter_cpu_cycles);
        num_cycles_attack = counter_cpu_cycles - num_cycles_nominal; 
  
        
      /* finds 10 shortest paths between nodes */
      for (i=0,j=NUM_NODES/2;i<100;i++,j++) {
                j=j%NUM_NODES;
          dijkstra(i,j);
      }
        
        read(perf_fd_cpu_cycles, &counter_cpu_cycles, sizeof(counter_cpu_cycles));
        close(perf_fd_cpu_cycles);
        printf("Number of cpu_cycles end: %d\n", counter_cpu_cycles);
        num_cycles_nominal = counter_cpu_cycles - num_cycles_attack;
        printf("Number of cpu_cycles nominal: %d\n", num_cycles_nominal);
        printf("Number of cpu_cycles attack: %d\n", num_cycles_attack);
    
        
        
      exit(0);
      
    
    }
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义NUM_节点100
#定义无9999
结构节点
{
国际主义者;
int iPrev;
};
typedef结构节点;
结构
{
int-iNode;
国际主义者;
int iPrev;
结构_QITEM*qNext;
};
typedef struct_QITEM QITEM;
QITEM*qHead=NULL;
int AdjMatrix[NUM_NODES][NUM_NODES];
int g_qCount=0;
节点rgnNodes[NUM_节点];
int-ch;
int-iPrev,iNode;
int i、iCost、iDist;
void print\u路径(节点*rgnnode,int chNode)
{
if(rgnNodes[chNode].iPrev!=无)
{
//打印路径(rgnNodes,rgnNodes[chNode].iPrev);
}
//printf(“%d”,chNode);
fflush(stdout);
}
无效队列(int-iNode、int-iDist、int-iPrev)
{
QITEM*qNew=(QITEM*)malloc(sizeof(QITEM));
QITEM*qLast=qHead;
如果(!qNew)
{
//fprintf(stderr,“内存不足”。\n”);
出口(1);
}
qNew->iNode=iNode;
qNew->iDist=iDist;
qNew->iPrev=iPrev;
qNew->qNext=NULL;
如果(!qLast)
{
qHead=qNew;
}
其他的
{
而(qLast->qNext)qLast=qLast->qNext;
qLast->qNext=qNew;
}
g_qCount++;
//断言(g_qCount);
}
无效出列(int*piNode,int*piDist,int*piPrev)
{
QITEM*qKill=qHead;
if(qHead)
{
//断言(g_qCount);
*piNode=qHead->iNode;
*piDist=qHead->iDist;
*piPrev=qHead->iPrev;
qHead=qHead->qNext;
免费(qKill);
g_qCount--;
}
}
int qcount(无效)
{
返回(g_qCount);
}
迪杰克斯特拉国际机场(chStart国际机场、chEnd国际机场)
{
对于(ch=0;ch0)
{
退出队列(&iNode,&iDist,&iPrev);
对于(i=0;i(iCost+iDist)))
{
rgnNodes[i].iDist=iDist+iCost;
rgnNodes[i].iPrev=iNode;
排队(i,iDist+iCost,iNode);
}
}
}
}
//printf(“最短路径成本为%d.”,rgnNodes[chEnd].iDist);
//printf(“路径为:”);
//打印路径(rgnNodes,chEnd);
//printf(“\n”);
}
}
int main(int argc,char*argv[]){
int-diff=0;
uint64\u t num\u循环\u标称=0;
uint64\u t num\u cycles\u attack=0;
uint64计数器cpu周期=0;
//系统(“/加载模块”);
int i,j,k;
文件*fp;
静态int性能fd cpu周期;
静态结构性能事件属性属性cpu周期;
attr\u cpu\u cycles.size=sizeof(attr\u cpu\u cycles);
attr_cpu_cycles.exclude_kernel=1;
属性cpu周期。排除hv=1;
attr\u cpu\u cycles.exclude\u callchain\u kernel=1;
attr\u cpu\u cycles.type=性能类型\u原始;
attr_cpu_cycles.config=0x11;
/*打开与此计数器对应的文件描述符。计数器
应该从这个时候开始*/
如果((性能fd\u cpu\u周期=系统调用(\uu NR\u性能事件\u打开,&attr\u cpu\u周期,0,-1,-1,0))=-1)
fprintf(标准,“性能事件打开失败%d%d:%s\n”,性能fd\u cpu\u周期,错误号,strerror(错误号));

如果(argc对于丢失的事件,添加了一个记录单。顺便说一句:我将很快尝试实现一些。与已实现的事件一样,您必须在CPU源代码中找到需要增加这些事件的正确位置,并首先在那里添加一个探测点。已实现的事件,我已经可以在
man perf_event_open
通过内核API的示例,该API间接调用PMU。内核可能正在进行一些必要的初始化。如果可能,只需使用
perf_event_open
。您好,谢谢您的回答。我的问题是,一些计数器没有递增。ie周期和指令已经实现,但是不递增。我将给出一个示例:'./dijkstra的性能计数器统计