2014-02-08 15 views
4

我想監視最後一級緩存中的緩存請求編號。我編寫了一個Linux模塊來獲取基於教程here的信息。Linux模塊:性能計數器不起作用

它可以編譯運行,但輸出結果總是0.換句話說,當我使用rdmsr時,它總是給我edx = 0,eax = 0。我甚至嘗試了tutorial中的演示代碼,輸出仍爲0.

我在這個問題上停留了整整一週。任何人都可以幫我指出我在程序中犯的錯誤嗎?

我知道有一些現有的程序在做同樣的事情,但我必須知道如何自己編寫代碼,因爲我想監視Xen hypervisor中的緩存請求。我不能在Xen中使用這些工具,除非我將這些工具整合到Xen的虛擬機管理程序中,這似乎有更多的工作。

/* 
* Record the cache miss rate of Intel Sandybridge cpu 
* To confirm the event is correctly set! 
*/ 
#include <linux/module.h> /* Needed by all modules */ 
#include <linux/kernel.h> /* Needed for KERN_INFO */ 

/*4 Performance Counters Selector for %ecx in insn wrmsr*/ 
#define PERFEVTSEL0 0x186 
#define PERFEVTSEL1 0x187 
#define PERFEVTSEL2 0x188 
#define PERFEVTSEL3 0x189 

/*4 MSR Performance Counter for the above selector*/ 
#define PMC0 0xc1 
#define PMC1 0xc2 
#define PMC2 0xc2 
#define PMC3 0xc3 

/*Intel Software Developer Manual Page 2549*/ /*L1I L1D cache events has not been confirmed!*/ 
/*L1 Instruction Cache Performance Tuning Events*/ 
#define L1I_ALLHIT_EVENT 0x80 
#define L1I_ALLHIT_MASK  0x01 
#define L1I_ALLMISS_EVENT 0x80 /*confirmed*/ 
#define L1I_ALLMISS_MASK 0x02 /*confirmed*/ 

/*L1 Data Cache Performance Tuning Events*/ 
/*Intel does not have the ALLREQ Miss mask; have to add LD_miss and ST_miss*/ 
#define L1D_ALLREQ_EVENT 0x43 
#define L1D_ALLREQ_MASK  0x01 
#define L1D_LDMISS_EVENT 0x40 
#define L1D_LDMISS_MASK  0x01 
#define L1D_STMISS_EVENT 0x28 
#define L1D_STMISS_MASK  0x01 

/*L2 private cache for each core*/ /*confirmed*/ 
#define L2_ALLREQ_EVENT  0x24 
#define L2_ALLREQ_MASK  L2_ALLCODEREQ_MASK /*0xFF*/ 
#define L2_ALLMISS_EVENT 0x24 
#define L2_ALLMISS_MASK  L2_ALLCODEMISS_MASK /*0xAA*/ 

#define L2_ALLCODEREQ_MASK 0x30 
#define L2_ALLCODEMISS_MASK 0x20 

/*L3 shared cache*/ /*confirmed*/ 
/*Use the last level cache event and mask*/ 
#define L3_ALLREQ_EVENT  0x2E 
#define L3_ALLREQ_MASK  0x4F 
#define L3_ALLMISS_EVENT 0x2E 
#define L3_ALLMISS_MASK  0x41 

#define USR_BIT    (0x01UL << 16) 
#define OS_BIT    (0x01UL << 17) 


#define SET_MSR_USR_BIT(eax) eax |= USR_BIT 
#define CLEAR_MSR_USR_BIT(exa) eax &= (~USR_BIT) 
#define SET_MSR_OS_BIT(eax)  eax |= OS_BIT 
#define CLEAR_MSR_OS_BIT(eax) eax &= (~OS_BIT) 

#define SET_EVENT_MASK(eax, event, umask) eax |= (event | (umask << 8)) 

/*MSR EN flag: when set start the counter!*/ 
//#define MSR_ENFLAG  (0x1<<22) 
#define MSR_ENFLAG  (0x1<<22) 


/* 32bit insn v3*/ 
static inline void rtxen_write_msr(uint32_t eax, uint32_t ecx) 
{ 
    /*clear counter first*/ 
    __asm__ __volatile__ ("movl %0, %%ecx\n\t" 
     "xorl %%edx, %%edx\n\t" 
     "xorl %%eax, %%eax\n\t" 
     "wrmsr\n\t" 
     : /* no outputs */ 
     : "m" (ecx) 
     : "eax", "ecx", "edx" /* all clobbered */); 

    eax |= MSR_ENFLAG; 

    __asm__("movl %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */ 
     "xorl %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */ 
     "movl %1, %%eax\n\t" /* eax contains the log bits to set the MSR to */ 
     "wrmsr\n\t" 
     : /* no outputs */ 
     : "m" (ecx), "m" (eax) 
     : "eax", "ecx", "edx" /* clobbered */); 
} 

static inline void rtxen_read_msr(uint32_t* ecx, uint32_t *eax, uint32_t* edx) 
{ __asm__ __volatile__(\ 
     "rdmsr"\ 
     :"=d" (*edx), "=a" (*eax)\ 
     :"c"(*ecx) 
     ); 
} 

static inline void delay(void) 
{ 
    char tmp[1000]; 
    int i; 
    for(i = 0; i < 1000; i++) 
    { 
     tmp[i] = i * 2; 
    } 
} 

enum cache_level 
{ 
    UOPS, 
    L1I, 
    L1D, 
    L2, 
    L3 
}; 

int init_module(void) 
{ 
    enum cache_level op; 
    uint32_t eax, edx, ecx; 
    uint64_t l3_all; 
    op = UOPS; 
    switch(op) 
    { 
    case UOPS: 
     eax = 0x0001010E; 
     eax |= MSR_ENFLAG; 
     ecx = 0x187; 
     printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     rtxen_write_msr(eax, ecx); 
     ecx = 0xc2; 
     eax = 1; 
     edx = 2; 
     rtxen_read_msr(&ecx, &eax, &edx); 
     printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax); 
     break; 
    case L3: 
     eax = 0; 
     SET_MSR_USR_BIT(eax); 
     SET_MSR_OS_BIT(eax); 
     SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK); 
     eax |= MSR_ENFLAG; 
     ecx = PERFEVTSEL2; 
     printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     rtxen_write_msr(eax, ecx); 
     printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     printk(KERN_INFO "L3 all request set MSR PMC2\n"); 
     printk(KERN_INFO "delay by access an array\n"); 
     delay(); 
     ecx = PMC2; 
     eax = 1; 
     edx = 2; 
     printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx); 
     rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/ 
     l3_all = (((uint64_t) edx << 32) | eax); 
     printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all); 
     break; 
    default: 
     printk(KERN_INFO "operation not implemented yet\n"); 
    } 
    /* 
    * A non 0 return means init_module failed; module can't be loaded. 
    */ 
    return 0; 
} 

void cleanup_module(void) 
{ 
    printk(KERN_INFO "Goodbye world 1.\n"); 
} 

結果我是:

[ 1780.946584] UOPS Demo: write_msr: eax=0x0001010e, ecx=0x00000187 
[ 1780.946590] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000 
[ 1818.595055] Goodbye world 1. 
[ 1821.153947] UOPS Demo: write_msr: eax=0x0041010e, ecx=0x00000187 
[ 1821.153950] UOPS Demo: read_msr: edx=0x00000000, eax=0x00000000 
+1

你檢查的教程你是以下已使用相同的架構不是你的嗎?另一種方法是使用linux導出的系統調用perf_event_open http://web.eece.maine.edu/~vweaver/projects/perf_events/perf_event_open.html它可以從用戶級代碼 –

+0

@ ManuelSelva完成,非常感謝你多爲您的建議!我確實檢查了英特爾程序員手冊和我的機器的拱門。我認爲事件編號和掩碼應該是正確的。現在我不確定我的代碼流是否正確?例如,當我發佈wrmsr時,我必須做些什麼。其他?另外,當我發出wrmsr命令時,我不確定是否爲eax設置了正確的值? – Mike

+0

順便說一下,我必須編寫代碼而不是使用linux調用,因爲我最終會將代碼放入虛擬化管理程序中。如果我使用perf_event_open函數,我將不得不包含很多依賴。 – Mike

回答

5

我最後用@Manuel塞爾瓦的幫助下解決了!

設置perf的正確流程。計數器是:

第1步:設置msr並通過在eax中設置EN位來啓用計數器;

第2步:通過寫MSR

第3步停止計數器:讀取計數器

我錯過了第2步,這就是爲什麼它總是給我0這是有道理的,如果報告0 I想在停止之前閱讀櫃檯。

switch語句的正確的代碼如下:

switch(op) 
    { 
    case UOPS: 
     eax = 0x0051010E; 
     eax |= MSR_ENFLAG; 
     ecx = 0x187; 
     printk(KERN_INFO "UOPS Demo: write_msr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     rtxen_write_msr(eax, ecx); 
     //stop counting 
     eax = 0x0011010E; 
     rtxen_write_msr(eax,ecx); 
     ecx = 0xc2; 
     eax = 1; 
     edx = 2; 
     rtxen_read_msr(&ecx, &eax, &edx); 
     printk(KERN_INFO "UOPS Demo: read_msr: edx=%#010x, eax=%#010x\n", edx, eax); 
     break; 
    case L3: 
     eax = 0; 
     SET_MSR_USR_BIT(eax); 
     SET_MSR_OS_BIT(eax); 
     SET_EVENT_MASK(eax, L3_ALLREQ_EVENT, L3_ALLREQ_MASK); 
     eax |= MSR_ENFLAG; 
     eax |= (1<<20); //INT bit: counter overflow 
     ecx = PERFEVTSEL2; 
     printk(KERN_INFO "before wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     rtxen_write_msr(eax, ecx); 
     printk(KERN_INFO "after wrmsr: eax=%#010x, ecx=%#010x\n", eax, ecx); 
     printk(KERN_INFO "L3 all request set MSR PMC2\n"); 
     printk(KERN_INFO "delay by access an array\n"); 
     delay(); 
     eax &= (~MSR_ENFLAG); 
     rtxen_write_msr(eax, ecx); 
     printk(KERN_INFO "stop the counter, eax=%#010x\n", eax); 
     ecx = PMC2; 
     eax = 1; 
     edx = 2; 
     printk(KERN_INFO "rdmsr: ecx=%#010x\n", ecx); 
     rtxen_read_msr(&ecx, &eax, &edx); /*need to pass into address!*/ 
     l3_all = (((uint64_t) edx << 32) | eax); 
     printk(KERN_INFO "rdmsr: L3 all request is %llu (%#010lx)\n", l3_all, (unsigned long)l3_all); 
     break; 
    default: 
     printk(KERN_INFO "operation not implemented yet\n"); 
    }