我是來自昨天的彙編程序新手,所以我決定編寫簡單的(正如我所希望的)函數 - memcpy和memset。(N)Asm:memset不能正常工作
; NASM syntax
%include 'types.asm'
section .text
global trn_memset
global _trn_memset
; ==========================================================
; memset
; void trn_memset(void* dest, char val, unsigned plong sz)
%ifdef TRN_64BIT
getarg tax, 1
getarg tcx, 2
getarg tbx, 3
mov tax, [tsp + ((2 * sizeof_LONG) + sizeof_INT)] ; size
mov tcx, [tsp + ( sizeof_LONG + sizeof_INT)] ; tcx = char
mov tbx, [tsp + ( sizeof_LONG )] ; tbx = destination
; dest = [tsp + (1 * sizeof_LONG)]
; src = [tsp + (2 * sizeof_LONG)]
; sz = [tsp + (3 * sizeof_LONG)]
mov tdx, 0 ; remainder
mov tcx, 8 ; tax/tcx
mov tax, [tsp + (3 * sizeof_LONG)] ; size
div tcx
mov tbx, [tsp + (1 * sizeof_LONG)] ; tbx = destination
mov tcx, [tsp + (2 * sizeof_LONG)] ; tcx = val
push tax ; save it to stack, we will use al
mov al, cl
; create 32bit int from 8bit value
; ((((((val << 8) | val) << 8) | val) << 8) | val)
mov cl, al
shl tcx, 8
mov cl, al
shl tcx, 8
mov cl, al
shl tcx, 8
mov cl, al
;cmp tdx, 0
or tdx, tdx
jz .aftercmp_remainder
dec tdx
mov [tbx], cl
inc tbx
jmp .cmp_remainder
pop tdx ; restore result from stack to tdx
; because now value in tdx is useless
;cmp tdx, 0
or tdx, tdx
jz .aftercmp_result
dec tdx
; 2x 32bit set
mov [tbx], ecx
add tbx, 4
mov [tbx], ecx
add tbx, 4
jmp .cmp_result
mov tax, [tsp + (1 * sizeof_LONG)] ; tax = destination
%ifdef WINDOWS
%define TRN_WIN
; TODO make it better
%define TRN_NIX
; C Variable types
%ifdef TRN_64BIT
%define CHAR BYTE
%define SHORT WORD
%define INT DWORD
%define LONG QWORD
%define DCHAR db
%define DSHORT dw
%define DINT dd
%define DLONG dq
%define DLLONG dq
%define sizeof_CHAR 1
%define sizeof_SHORT 2
%define sizeof_INT 4
%define sizeof_LONG 8
%define sizeof_LLONG 8
%define sizeof_DCHAR 1
%define sizeof_DSHORT 2
%define sizeof_DINT 4
%define sizeof_DLONG 8
%define sizeof_DLLONG 8
%define CHAR BYTE
%define SHORT WORD
%define INT DWORD
%define LONG DWORD
%define DCHAR db
%define DSHORT dw
%define DINT dd
%define DLONG dd
%define DLLONG dq
%define sizeof_CHAR 1
%define sizeof_SHORT 2
%define sizeof_INT 4
%define sizeof_LONG 4
%define sizeof_LLONG 8
%define sizeof_DCHAR 1
%define sizeof_DSHORT 2
%define sizeof_DINT 4
%define sizeof_DLONG 4
%define sizeof_DLLONG 8
; Registers
%ifdef TRN_64BIT
%define tax rax
%define tbx rbx
%define tcx rcx
%define tdx rdx
%define tsp rsp
%define tax eax
%define tbx ebx
%define tcx ecx
%define tdx edx
%define tsp esp
; Getting arguments
; - 64bit nix: rdi, rsi, rdx, rcx, r8, r9
; - 64bit win: RCX, RDX, R8, R9
; getarg(out, arg_index)
; - out can be memory or register
; - arg_index is starting from 1
; - bytes_sz - 1,2,4 or 8 bytes
%ifdef TRN_64BIT
%ifdef TRN_WIN
%macro getarg 2
%if %2 = 1
mov %1, rcx
%elif %2 = 2
mov %1, rdx
%elif %2 = 3
mov %1, r8
%elif %2 = 4
mov %1, r9
%elif %2 > 4
mov %1, [tsp + ((%2 - 4) * 8)]
%macro getarg 2
%if %2 = 1
mov %1, rdi
%elif %2 = 2
mov %1, rsi
%elif %2 = 3
mov %1, rdx
%elif %2 = 4
mov %1, rcx
%elif %2 = 5
mov %1, r8
%elif %2 = 6
mov %1, r9
%elif %2 > 6
mov %1, [tsp + ((%2 - 6) * 8)]
; Custom commands
%macro pushsz 1
sub tsp, %1
%macro popsz 1
add tsp, %1
%macro pushreg 0
push LONG tdx
push LONG tcx
push LONG tbx
push LONG tax
%macro popreg 0
pop LONG tax
pop LONG tbx
pop LONG tcx
pop LONG tdx
%macro print 1
push LONG %1
call printf
pop LONG eax
現在,我真的不知道我做錯了。我在那裏有什麼錯誤嗎? 32位
mov eax, DWORD [esp + 12] ; size
mov ecx, DWORD [esp + 8] ; tcx = val
mov ebx, DWORD [esp + 4] ; tbx = destination
or eax, eax
jz .aftermemset_loop
mov [ebx], BYTE cl
dec eax
inc ebx
mov eax, DWORD [esp + 4] ; Return destionation
你嘗試縮小的問題來與調試器?另外,如果你有工作C版本,你可以通過編譯器輸出程序集來創建程序集版本,然後調整結果以獲得友好的標籤和效率。 – lurker
調試器?我不知道如何,我正在通過終端進行編譯。但請參閱更新 – Possible
您有很多代碼。你需要以某種方式縮小問題的範圍。如果您沒有或不知道如何使用調試器,還有其他技術,例如插入代碼位以顯示中間寄存器值以查看它們是否有意義。 – lurker