2015-04-12 42 views
0

我是來自昨天的彙編程序新手,所以我決定編寫簡單的(正如我所希望的)函數 - memcpy和memset。(N)Asm:memset不能正常工作

Memcpy運行良好,但memset不是,只有有時。一定有小錯,但我找不到它。

memset的

; NASM syntax 


%include 'types.asm' 


SEGMENT .CODE 
section .text 

global trn_memset 
global _trn_memset 


; ========================================================== 
; memset 
; void trn_memset(void* dest, char val, unsigned plong sz) 
_trn_memset: 
trn_memset: 
    %ifdef TRN_64BIT 
     getarg tax, 1 
     getarg tcx, 2 
     getarg tbx, 3 
    %else 
     mov tax, [tsp + ((2 * sizeof_LONG) + sizeof_INT)] ; size 
     mov tcx, [tsp + ( sizeof_LONG + sizeof_INT)] ; tcx = char 
     mov tbx, [tsp + ( sizeof_LONG    )] ; tbx = destination 
    %endif 

    ; dest = [tsp + (1 * sizeof_LONG)] 
    ; src = [tsp + (2 * sizeof_LONG)] 
    ; sz = [tsp + (3 * sizeof_LONG)] 
    mov tdx, 0       ; remainder 
    mov tcx, 8       ; tax/tcx 
    mov tax, [tsp + (3 * sizeof_LONG)]  ; size 
    div tcx 
    ; 
    mov tbx, [tsp + (1 * sizeof_LONG)]  ; tbx = destination 
    mov tcx, [tsp + (2 * sizeof_LONG)]  ; tcx = val 
    push tax       ; save it to stack, we will use al 
    mov al, cl 
    ; create 32bit int from 8bit value 
    ; ((((((val << 8) | val) << 8) | val) << 8) | val) 
    mov cl, al 
    shl tcx, 8 
    mov cl, al 
    shl tcx, 8 
    mov cl, al 
    shl tcx, 8 
    mov cl, al 
    .cmp_remainder: 
    ;cmp tdx, 0 
    or tdx, tdx 
    jz .aftercmp_remainder 
    dec tdx 
    mov [tbx], cl 
    inc tbx 
    jmp .cmp_remainder 
    .aftercmp_remainder: 
    pop tdx       ; restore result from stack to tdx 
             ; because now value in tdx is useless 
    .cmp_result: 
    ;cmp tdx, 0 
    or tdx, tdx 
    jz .aftercmp_result 
    dec tdx 
    ; 2x 32bit set 
    mov [tbx], ecx 
    add tbx, 4 
    ; 
    mov [tbx], ecx 
    add tbx, 4 
    ; 
    jmp .cmp_result 
    .aftercmp_result: 
    mov tax, [tsp + (1 * sizeof_LONG)]  ; tax = destination 
    ret 

types.asm

%ifdef WINDOWS 
    %define TRN_WIN 
%else 
    ; TODO make it better 
    %define TRN_NIX 
%endif 


; C Variable types 
%ifdef TRN_64BIT 
    %define CHAR BYTE 
    %define SHORT WORD 
    %define INT DWORD 
    %define LONG QWORD 
    %define LLONG QWORD 

    %define DCHAR db 
    %define DSHORT dw 
    %define DINT dd 
    %define DLONG dq 
    %define DLLONG dq 

    %define sizeof_CHAR 1 
    %define sizeof_SHORT 2 
    %define sizeof_INT 4 
    %define sizeof_LONG 8 
    %define sizeof_LLONG 8 

    %define sizeof_DCHAR 1 
    %define sizeof_DSHORT 2 
    %define sizeof_DINT 4 
    %define sizeof_DLONG 8 
    %define sizeof_DLLONG 8 
%else 
    %define CHAR BYTE 
    %define SHORT WORD 
    %define INT DWORD 
    %define LONG DWORD 
    %define LLONG QWORD 

    %define DCHAR db 
    %define DSHORT dw 
    %define DINT dd 
    %define DLONG dd 
    %define DLLONG dq 

    %define sizeof_CHAR 1 
    %define sizeof_SHORT 2 
    %define sizeof_INT 4 
    %define sizeof_LONG 4 
    %define sizeof_LLONG 8 

    %define sizeof_DCHAR 1 
    %define sizeof_DSHORT 2 
    %define sizeof_DINT 4 
    %define sizeof_DLONG 4 
    %define sizeof_DLLONG 8 
%endif 


; Registers 
%ifdef TRN_64BIT 
    %define tax rax 
    %define tbx rbx 
    %define tcx rcx 
    %define tdx rdx 
    %define tsp rsp 
%else 
    %define tax eax 
    %define tbx ebx 
    %define tcx ecx 
    %define tdx edx 
    %define tsp esp 
%endif 


; Getting arguments 
; - 64bit nix: rdi, rsi, rdx, rcx, r8, r9 
; - 64bit win: RCX, RDX, R8, R9 
; getarg(out, arg_index) 
; - out can be memory or register 
; - arg_index is starting from 1 
; - bytes_sz - 1,2,4 or 8 bytes 

%ifdef TRN_64BIT 
    %ifdef TRN_WIN 
     %macro getarg 2 
      %if %2 = 1 
       mov %1, rcx 
      %elif %2 = 2 
       mov %1, rdx 
      %elif %2 = 3 
       mov %1, r8 
      %elif %2 = 4 
       mov %1, r9 
      %elif %2 > 4 
       mov %1, [tsp + ((%2 - 4) * 8)] 
      %endif 
     %endmacro 
    %else 
     %macro getarg 2 
      %if %2 = 1 
       mov %1, rdi 
      %elif %2 = 2 
       mov %1, rsi 
      %elif %2 = 3 
       mov %1, rdx 
      %elif %2 = 4 
       mov %1, rcx 
      %elif %2 = 5 
       mov %1, r8 
      %elif %2 = 6 
       mov %1, r9 
      %elif %2 > 6 
       mov %1, [tsp + ((%2 - 6) * 8)] 
      %endif 
     %endmacro 
    %endif 
%endif 



; Custom commands 
%macro pushsz 1 
    sub tsp, %1 
%endmacro 

%macro popsz 1 
    add tsp, %1 
%endmacro 


%macro pushreg 0 
    push LONG tdx 
    push LONG tcx 
    push LONG tbx 
    push LONG tax 
%endmacro 

%macro popreg 0 
    pop LONG tax 
    pop LONG tbx 
    pop LONG tcx 
    pop LONG tdx 
%endmacro 

%macro print 1 
    push LONG %1 
    call printf 
    pop LONG eax 
%endmacro 

編輯

現在,我真的不知道我做錯了。我在那裏有什麼錯誤嗎? 32位

_trn_memset: 
trn_memset: 
    mov eax, DWORD [esp + 12]  ; size 
    mov ecx, DWORD [esp + 8]  ; tcx = val 
    mov ebx, DWORD [esp + 4]  ; tbx = destination 

    .memset_loop: 
    or eax, eax 
    jz .aftermemset_loop 
    mov [ebx], BYTE cl 
    dec eax 
    inc ebx 
    .aftermemset_loop: 
    mov eax, DWORD [esp + 4]   ; Return destionation 
    ret 

當我編譯的內核&測試程序使用C的memset的和彙編語言編寫的memcpy,它的工作,但是當我包括裝配memset的版本,有打印時壞的格式。

+1

你嘗試縮小的問題來與調試器?另外,如果你有工作C版本,你可以通過編譯器輸出程序集來創建程序集版本,然後調整結果以獲得友好的標籤和效率。 – lurker

+0

調試器?我不知道如何,我正在通過終端進行編譯。但請參閱更新 – Possible

+0

您有很多代碼。你需要以某種方式縮小問題的範圍。如果您沒有或不知道如何使用調試器,還有其他技術,例如插入代碼位以顯示中間寄存器值以查看它們是否有意義。 – lurker

回答

0

代碼是好的,除了在開始時所有使用的寄存器(除了第一個eax/rax)必須是push才能堆棧,並且在返回之前它們必須是pop

所以,最簡單的可能的memset爲32位的樣子:

_trn_memset: 
trn_memset: 
    push ebp 
    mov ebp, esp 
    add ebp, 4 ; We pushed one register to stack, count it 
    push ebx ; Save used registers 
    push ecx 
    mov eax, DWORD [ebp + 12]  ; size 
    mov ecx, DWORD [ebp + 8]  ; tcx = val 
    mov ebx, DWORD [ebp + 4]  ; tbx = destination 

    .memset_loop: 
    or eax, eax ; Fast compare to 0 
    jz .aftermemset_loop 
    mov [ebx], BYTE cl 
    dec eax 
    inc ebx 
    .aftermemset_loop: 
    mov eax, DWORD [ebp + 4]   ; Return destionation 
    pop ecx ; Restore used registers 
    pop ebx 
    pop ebp 
    ret