2011-08-02 48 views
1

更新2011年9月12日與hs_err_pid識別碰撞* .log和GDB

我能得到的核心文件,並立即dissabled崩潰的指令。根據建議,我跟蹤了r28的值(順便說一下,沒有註冊表項記錄到hs_err_pid * .log)並檢查值是從哪裏來的(參見下面的w/< ---)。但是,我無法確定r32的價值。

未命中對齊的原因是r28是一個8字節的整數加載到一個4字節的整數r31?

;;; 1053    if(Transfer(len) == FALSE) { 
0xc00000000c0c55c0:2 <TFM::PrintTrace(..)+0x32>:  adds   r44=0x480,r32;; <--- 
0xc00000000c0c55d0:0 <TFM::PrintTrace(..)+0x40>:  ld8   r43=[ret2] 
0xc00000000c0c55d0:1 <TFM::PrintTrace(..)+0x41>: (p6) st4   [r35]=ret3 
0xc00000000c0c55d0:2 <TFM::PrintTrace(..)+0x42>:  adds   r48=28,r33 
0xc00000000c0c55e0:0 <TFM::PrintTrace(..)+0x50>:  mov   ret0=0;; 
0xc00000000c0c55e0:1 <TFM::PrintTrace(..)+0x51>:  ld8.c.clr r62=[r45] 
0xc00000000c0c55e0:2 <TFM::PrintTrace(..)+0x52>:  cmp.eq.unc p6,p1=r0,r62 
;;; 1056      throw MutexLock ; 
0xc00000000c0c55f0:0 <TFM::PrintTrace(..)+0x60>:  nop.m  0x0 
0xc00000000c0c55f0:1 <TFM::PrintTrace(..)+0x61>:  nop.m  0x0 
0xc00000000c0c55f0:2 <TFM::PrintTrace(..)+0x62>: (p6) br.cond.dpnt.many _NZ10TFM07PrintTraceEPi+0x800;; 
;;; 1057    } 
0xc00000000c0c5600:0 <TFM::PrintTrace(..)+0x70>:  adds   r41=0x488,r32 
0xc00000000c0c5600:1 <TFM::PrintTrace(..)+0x71>:  adds   r40=0x490,r32 
0xc00000000c0c5600:2 <TFM::PrintTrace(..)+0x72>:  br.call.dptk.many rp=0xc00000000c080620;; 
;;; 1060    dwDataLen = len ; 
0xc00000000c0c5610:0 <TFM::PrintTrace(..)+0x80>:  ld8   r16=[r44] <--- 
0xc00000000c0c5610:1 <TFM::PrintTrace(..)+0x81>:  mov   gp=r36 
0xc00000000c0c5610:2 <TFM::PrintTrace(..)+0x82>: (p1) mov   r62=8;; 
0xc00000000c0c5620:0 <TFM::PrintTrace(..)+0x90>:  cmp.eq.unc p6=r0,r16 
0xc00000000c0c5620:1 <TFM::PrintTrace(..)+0x91>:  nop.m  0x0 
0xc00000000c0c5620:2 <TFM::PrintTrace(..)+0x92>: (p6) br.cond.dpnt.many _NZ10TFM07PrintTraceEPi+0xda0;; 
0xc00000000c0c5630:0 <TFM::PrintTrace(..)+0xa0>:  adds   r21=16,r16 <--- 
0xc00000000c0c5630:1 <TFM::PrintTrace(..)+0xa1>: (p1) mov   r62=8;; 
0xc00000000c0c5630:2 <TFM::PrintTrace(..)+0xa2>:  nop.i  0x0 
0xc00000000c0c5640:0 <TFM::PrintTrace(..)+0xb0>:  ld8   r42=[r21];; <--- 
0xc00000000c0c5640:1 <TFM::PrintTrace(..)+0xb1>:  cmp.eq.unc p6=r0,r42 
0xc00000000c0c5640:2 <TFM::PrintTrace(..)+0xb2>:  nop.i  0x0 
0xc00000000c0c5650:0 <TFM::PrintTrace(..)+0xc0>:  nop.m  0x0 
0xc00000000c0c5650:1 <TFM::PrintTrace(..)+0xc1>:  mov   r47=5 
0xc00000000c0c5650:2 <TFM::PrintTrace(..)+0xc2>: (p6) br.cond.dpnt.many _NZ10TFM07PrintTraceEPi+0xdf0;; 
0xc00000000c0c5660:0 <TFM::PrintTrace(..)+0xd0>:  ld4.a  r27=[r48] 
;;; 1064      if(dwDataLen <= dwViewLen) { 
0xc00000000c0c5660:1 <TFM::PrintTrace(..)+0xd1>:  adds   r28=28,r42 <-- 
0xc00000000c0c5660:2 <TFM::PrintTrace(..)+0xd2>:  cmp.ne.unc p6=r0,r46;; 
0xc00000000c0c5670:0 <TFM::PrintTrace(..)+0xe0>:  ld4.sa  r26=[r28], 
0xc00000000c0c5670:1 <TFM::PrintTrace(..)+0xe1>: (p6) ld4   r31=[r28] <-- instruction that crashed 

讓我知道是否需要寄存器值。我想我可以使用gdb的info reg命令獲取寄存器值。 這是信息寄存器的結果(我排除了prXXX和brXXX的值),我不知道如何將這些映射到上面的反彙編指令。

gr1: 0x9fffffffbf716588 
    gr2: 0x9fffffff5f667c00 
    gr3: 0x9fffffff5f667c00 
    gr4: 0x6000000000e0b000 
    gr5: 0x9fffffff8adfe2e0 
    gr6: 0x9fffffff8ada9000 
    gr7: 0x9fffffff8ad7a000 
    gr8:    0x1 
    gr9: 0x9fffffff8adfd0f0 
gr10:     0 
gr11: 0xc000000000000690 
gr12: 0x9fffffff8adfd140 
gr13: 0x6000000001681510 
gr14: 0x9fffffffbf7d8e98 
gr15:    0x1a 
gr16: 0x60000000044dac60 
gr17:    0x1f 
gr18:     0 
gr19: 0x9fffffff8ad023f0 
gr20: 0x9fffffff8adfd0e0 
gr21: 0x60000000044dac70 
gr22: 0x9fffffff5f668000 
gr23:    0xd 
gr24:    0x1 
gr25: 0xc0000000004341f0 
gr26:    NaT 
gr27:    0x63 
gr28: 0xc00000000c5f801c 
gr29: 0xc00000000029db20 
gr30: 0xc00000000029db20 
gr31:    0x288 
gr32: 0x60000000044796d0 
gr33: 0x6000000001a78910 
gr34:    0x7e 
gr35: 0x6000000001d03a90 
gr36: 0x9fffffffbf716588 
gr37: 0xc000000000000c9d 
gr38: 0xc00000000c0c4f70 
gr39:    0x9 
gr40: 0x6000000004479b60 
gr41: 0x6000000004479b58 
gr42: 0xc00000000c5f8000 
gr43: 0x9fffffffbf7144e0 
gr44: 0x6000000004479b50 
gr45: 0x6000000004479b68 
gr46: 0x6000000001d03a90 
gr47:    0x5 
gr48: 0x6000000001a7892c 
gr49: 0x9fffffff8adfe110 
gr50: 0xc000000000000491 
gr51: 0xc00000000c0c5520 
gr52: 0xc00000000c07dd10 
gr53: 0x9fffffff8adfe120 
gr54: 0x9fffffff8adfe0a0 
gr55: 0xc00000000000058e 
gr56: 0xc00000000042be40 
gr57:    0x39 
gr58:    0x3 
gr59:    0x33 
gr60:     0 
gr61: 0x9fffffffbf7d2598 
gr62:    0x8 
gr63: 0x9fffffffbf716588 
gr64: 0xc000000000000f22 
gr65: 0xc00000000c0c5610 

這是一個更新我以前的帖子。由於我提供的核心文件的副本 ,我用gdb檢查覈心文件,並執行 以下命令:BT

1)

2)幀n < - 發生中止其中框架

3)disas

這裏是結果。

(gdb) bt 
#0 0xc0000000001e5350:0 in _lwp_kill+0x30() 
    from /usr/lib/hpux64/libpthread.so.1 
#1 0xc00000000014c7b0:0 in pthread_kill+0x9d0() 
    from /usr/lib/hpux64/libpthread.so.1 
#2 0xc0000000002e4080:0 in raise+0xe0() from /usr/lib/hpux64/libc.so.1 
#3 0xc0000000003f47f0:0 in abort+0x170() from /usr/lib/hpux64/libc.so.1 
#4 0xc00000000e65e0d0:0 in os::abort() 
    at /CLO/Components/JAVA_HOTSPOT/Src/src/os/hp-ux/vm/os_hp-ux.cpp:2033 
#5 0xc00000000eb473e0:0 in VMError::report_and_die() 
    at /CLO/Components/JAVA_HOTSPOT/Src/src/share/vm/utilities/vmError.cpp:1008 
#6 0xc00000000e66fc90:0 in os::Hpux::JVM_handle_hpux_signal() 
    at /CLO/Components/JAVA_HOTSPOT/Src/src/os_cpu/hp-ux_ia64/vm/os_hp-ux_ia64.cpp:1051 
#7 <signal handler called> 
#8 0xc00000000c0c5670:1 in TFMTrace::PrintTrace() at tfmtrace.cpp:1064 
#9 0xc00000000c0c4f70:0 in FMLogger::WriteLog() at fmlogger.cpp:90 
... 

(gdb) frame 8 
#8 0xc00000000c0c5670:1 in TFMTrace::PrintTrace() at tfmtrace.cpp:1064 
1064       if(dwDataLen <= dwViewLen) { 
Current language: auto; currently c++ 

(gdb) disas $pc-16*4 $pc+16*4 
... 
0xc00000000c0c5660:0 <TFMTrace::PrintTrace(...)+0xd0> :  ld4.a  r27=[r48]  MII, 
;;; 1064    if(dwDataLen <= dwViewLen) { 
0xc00000000c0c5660:1 <TFMTrace::PrintTrace(...)+0xd1> :  adds   r28=28,r42 
0xc00000000c0c5660:2 <TFMTrace::PrintTrace(...)+0xd2> :  cmp.ne.unc p6=r0,r46;; 
0xc00000000c0c5670:0 <TFMTrace::PrintTrace(...)+0xe0> :  ld4.sa  r26=[r28]  MMI, 
0xc00000000c0c5670:1 <TFMTrace::PrintTrace(...)+0xe1> : (p6) ld4   r31=[r28] 
0xc00000000c0c5670:2 <TFMTrace::PrintTrace(...)+0xe2> :  adds   r46=24,r42;; 
0xc00000000c0c5680:0 <TFMTrace::PrintTrace(...)+0xf0> : (p6) st4  [r35]=r31   MI,I 
0xc00000000c0c5680:1 <TFMTrace::PrintTrace(...)+0xf1> :  adds   r59=36,r42;; 
0xc00000000c0c5680:2 <TFMTrace::PrintTrace(...)+0xf2> :  nop.i   0x0 
0xc00000000c0c5690:0 <TFMTrace::PrintTrace(...)+0x100>:  ld4.c.clr r27=[r48]  MIB, 
;;; 1066    dwLen = dwTrcLen ; 
0xc00000000c0c5690:1 <TFMTrace::PrintTrace(...)+0x101>:  cmp4.eq.unc p6,p8=99,r27 
0xc00000000c0c5690:2 <TFMTrace::PrintTrace(...)+0x102>:  nop.b   0x0;; 
0xc00000000c0c56a0:0 <TFMTrace::PrintTrace(...)+0x110>: (p8) ld4.c.clr r26=[r28]  MMI 
;;; 1067    } 
0xc00000000c0c56a0:1 <TFMTrace::PrintTrace(...)+0x111>: (p6) st4  [r48]=r47 
0xc00000000c0c56a0:2 <TFMTrace::PrintTrace(...)+0x112>:  cmp4.geu.unc p7=r26,r27 
End of assemb 

回答

0

在本機代碼「正常」的崩潰會導致這樣的報告:

C [libc.so.6+0x88368] strstr+0x64a 

注意小(在這種情況下strstr)從功能偏移到崩潰點。

在你的情況,JVM決定地址oxc00000000f675671裏面libtracejni.so,但它可以找到最接近的功能是非常從碰撞點(0x5065eff9 == 1.2 GB的距離)。

你的圖書館真的很大嗎?

如果真的有那麼大的話,你有可能已經把它去掉了,所以符號_NZ10TFM07PrintTraceEPi實際上與問題沒有任何關係(在代碼中是1.2GB)。

您需要知道在崩潰時地址oxc00000000f675671處的代碼是什麼。通常hs_err_pid*.log包含所有共享庫的加載地址列表。找到libtracejni.so的加載地址,從pc中減去它。這應該給你一個類似於0x400...675671的地址,你應該可以在你的未解析版本中查找libtracejni.so

另請注意,崩潰地址以ASCII "C8G"結尾,這可能是也可能不是巧合。

更新2011/08/05。

現在你知道哪個指令墜毀:

0x4000000000099670:1 <TFMTrace::PrintTrace(...)+0xe1>: (p6) ld4   r31=[r28] 

這是從內存r28指向4字節整數的負載。

接下來的問題是:崩潰點的r28的值是什麼(應該記錄在hs_err * .log中),以及它來自哪裏(完整的TFM::PrintTrace的反彙編會告訴你)。

+0

查看後續原始帖子。庫的大小是** 2.1MB **。 – ryanb

+0

查看9月12日更新。我能夠獲得核心文件。 – ryanb