2012-03-10 45 views
8

我正在嘗試學習x86_64程序集,並且今天正在嘗試使用標準輸入輸出,並偶然發現此帖Learning assembly - echo program name如何從STDIN讀取輸入(使用SYSCALL指令)?特別是如果我知道輸入將始終是一個整數,並且我想將它讀入一個寄存器?如何在x86_64程序集中讀取STDIN的輸入?

編輯: @Daniel Kozar的答案幫助我理解了STDIN和STDOUT是如何與Linux上的SYSCALL指令一起工作的。我試圖編寫一個小程序,它從控制檯輸入中讀取一個數字,並打印與該數字對應的ascii字符。假如你輸入65作爲輸入,你應該得到A作爲輸出。並且還有一個新的字符。如果在所有的,它可以幫助任何其他人:-)

section .text 
    global _start 

_start: 
    mov rdi, 0x0  ; file descriptor = stdin = 0 
    lea rsi, [rsp+8] ; buffer = address to store the bytes read 
    mov rdx, 0x2  ; number of bytes to read 
    mov rax, 0x0  ; SYSCALL number for reading from STDIN 
    syscall   ; make the syscall 

    xor rax, rax  ; clear off rax 
    mov rbx, [rsp+8] ; read the first byte read into rsp+8 by STDIN call to rbp 
    sub rbx, 0x30  ; Since this is read as a character, it is obtained as ASCII value, so subtract by 0x30 to get the number 
    and rbx, 0xff  ; This ensures that everything other than the last byte is set to 0 while the last byte is as is 
    mov rax, rbx  ; move this value to rax since we want to store the final result in rax 
    shl rbx, 0x1  ; We need to multiply this by 10 so that we can add up all the digits read so multiplying the number by 2 and then by 8 and adding them up, so multiply by 2 here 
    shl rax, 0x3  ; multiply by 8 here 
    add rax, rbx  ; add 8 times multiplied value with 2 times multiplied value to get 10 times multiplied value 
    mov rbx, [rsp+9] ; now read the next byte (or digit) 
    sub rbx, 0x30  ; Again get the digit value from ASCII value of that digit's character 
    and rbx, 0xff  ; clear higher bytes 
    add rax, rbx  ; Add this to rax as unit's place value 
    mov [rsp+8], rax ; Move the entire byte to rax 
    mov rdi, 0x1  ; file descriptor = stdout 
    lea rsi, [rsp+8] ; buffer = address to write to console 
    mov rdx, 0x1  ; number of bytes to write 
    mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
    syscall   ; make the syscall 

    xor rax, rax  ; clear off rax 
    mov rax, 0xa  ; move the new line character to rax 
    mov [rsp+8], rax ; put this on the stack 
    mov rdi, 0x1  ; file descriptor = stdout 
    lea rsi, [rsp+8] ; buffer = address to write to console 
    mov rdx, 0x1  ; number of bytes to write 
    mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
    syscall   ; make the syscall 

    mov rdi, 0  ; set exit status = 0 
    mov rax, 60  ; SYSCALL number for EXIT 
    syscall   ; make the syscall 

編輯2:這是我試圖從標準輸入讀取一個無符號的32位十進制整數,它存儲爲整數的計算,然後寫回到原點。

section .text 
     global _start 

_start: 
;Read from STDIN 
     mov rdi, 0x0  ; file descriptor = stdin = 0 
     lea rsi, [rsp+8] ; buffer = address to store the bytes read 
     mov rdx, 0xa  ; number of bytes to read 
     mov rax, 0x0  ; SYSCALL number for reading from STDIN 
     syscall   ; make the syscall 


; Ascii to decimal conversion 
     xor rax, rax  ; clear off rax 
     mov rbx, 0x0  ; initialize the counter which stores the number of bytes in the string representation of the integer 
     lea rsi, [rsp+8] ; Get the address on the stack where the first ASCII byte of the integer is stored. 

rnext: 
     mov rcx, [rsi] ; Read the byte on the stack at the address represented by rsi 
     cmp rcx, 0xa  ; Check if it is a newline character 
     je return  ; If so we are done 
     cmp rbx, 0xa  ; OR check if we have read 10 bytes (the largest 32 bit number contains 10 digits, so we will have to process at most 10 bytes 
     jg return  ; If so we are done 
     sub rcx, 0x30  ; For the byte read, subtract by 0x30/48 to get the value from the ASCII code. 0 == 0x30 in ASCII, 1 == 0x31 in ASCII and so on. 
     and rcx, 0xff  ; Clear off the higher order bytes to ensure there is no interference 
     mov rdx, rax  ; We need to multiple this by 10 to get the next byte which goes to the unit's place and this byte becomes the ten's value. So make a copy 
     shl rax, 0x3  ; Multiply the original by 8 (Shift left by 3 is multiply by 8) 
     shl rdx, 0x1  ; Multiply the copy by 2 (Shift left by 1 is multiply by 2) 
     add rax, rdx  ; Add these a * 8 + a * 2 to get a * 10. 
     add rax, rcx  ; Add the digit to be at the units place to the original number 
     add rsi, 1  ; Advance the memory address by 1 to read the next byte 
     inc rbx   ; Increment the digit counter 
     jmp rnext   ; Loop until we have read all the digits or max is reached. 

return: 
     push rax   ; Push the read number on to the stack 

; write New Line 
     mov rax, 0xa  ; move the new line character to rax 
     mov [rsp+8], rax ; put this on the stack 
     mov rdi, 0x1  ; file descriptor = stdout 
     lea rsi, [rsp+8] ; buffer = address to write to console 
     mov rdx, 0x1  ; number of bytes to write 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 


; Convert from Decimal to bytes 
     xor rdx, rdx  ; Clear rdx which stores obtains a single digit of the number to convert to ASCII bytes 
     mov r8, 0x0  ; Initialize the counter containing the number of digits 

     pop rax   ; Pop the read number from the stack 
     mov rbx, 0xa  ; We store the divisor which is 10 for decimals (base-10) in rbx. rbx will be the divisor. 

wnext: 
     div rbx   ; Divide the number in rdx:rax by rbx to get the remainder in rdx 
     add rdx, 0x30 ; Add 0x30 to get the ASCII byte equivalent of the remainder which is the digit in the number to be written to display. 
     push rdx   ; Push this byte to the stack. We do this because, we get the individial digit bytes in reverse order. So to reverse the order we use the stack 
     xor rdx, rdx  ; Clear rdx preparing it for next division 
     inc r8   ; Increment the digits counter 
     cmp rax, 0x0  ; Continue until the number becomes 0 when there are no more digits to write to the console. 
     jne wnext  ; Loop until there aren't any more digits. 

popnext: 
     cmp r8, 0x0  ; Check if the counter which contains the number of digits to write is 0 
     jle endw   ; If so there are no more digits to write 
     mov rdx, 0x1  ; number of bytes to write 
     mov rsi, rsp  ; buffer = address to write to console 
     mov rdi, 0x1  ; file descriptor = stdout 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 
     dec r8   ; Decrement the counter 
     pop rbx   ; Pop the current digit that was already written to the display preparing the stack pointer for next digit. 
     jmp popnext  ; Loop until the counter which contains the number of digits goes down to 0. 

endw: 
; write New Line 
     xor rax, rax  ; clear off rax 
     mov rax, 0xa  ; move the new line character to rax 
     mov [rsp+9], rax ; put this on the stack 
     mov rdi, 0x1  ; file descriptor = stdout 
     lea rsi, [rsp+9] ; buffer = address to write to console 
     mov rdx, 0x1  ; number of bytes to write 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 

; Exit 
     mov rdi, 0  ; set exit status = 0 
     mov rax, 60  ; SYSCALL number for EXIT 
     syscall   ; make the syscall 
+1

你在使用什麼操作系統?視窗? DOS? Linux呢? – Gabe 2012-03-10 13:26:58

+0

使用'syscall'取決於操作系統。 – hirschhornsalz 2012-03-10 13:49:27

+0

我正在使用Linux。確切的代碼適用於我。 – 2012-03-10 13:53:31

回答

5

首先:沒有變量組裝。只有某些數據的標籤。數據在設計上是無類型的 - 至少在真正的彙編程序中,不是HLA(例如MASM)。

從標準輸入讀取是通過使用系統調用read來實現的。我假設你已經閱讀過你提到的文章,並且知道如何在x64 Linux中調用系統調用。假設您正在使用NASM(或類似於其語法的東西),並且您希望將stdin的輸入存儲在地址buffer(您已預留BUFSIZE字節的內存),則執行系統調用將如下所示:

xor eax, eax ; rax <- 0 (write syscall number) 
xor edi, edi ; rdi <- 0 (stdin file descriptor) 
mov rsi, buffer ; rsi <- address of the buffer 
mov edx, BUFSIZE ; rdx <- size of the buffer 
syscall ; execute 

返回時,rax將包含系統調用的結果。如果您想了解更多關於它的工作原理,請諮詢man 2 read

解析彙編語言中的整數並不那麼簡單。由於read只給出顯示在標準輸入上的純二進制數據,因此您需要自己轉換整數值。請記住,鍵盤上鍵入的內容將作爲ASCII碼(或者您可能使用的任何其他編碼 - 我在此假設爲ASCII)發送到應用程序。因此,您需要將數據從ASCII編碼的十進制轉換爲二進制。

爲這樣的結構轉換成正常無符號整型將看起來像這樣在C A功能:

unsigned int parse_ascii_decimal(char *str,unsigned int strlen) 
{ 
    unsigned int ret = 0, mul = 1; 
    int i = strlen-1; 
    while(i >= 0) 
    { 
     ret += (str[i] & 0xf) * mul; 
     mul *= 10; 
     --i; 
    } 
    return ret; 
} 

此轉換爲組件(和延伸支持符號數)留給讀者作爲練習的讀者。 :)

最後但並非最不重要 - write系統調用要求您始終將指針傳遞到緩衝區,並將數據寫入給定的文件描述符。因此,如果你想輸出一個換行符,除了創建一個包含換行符序列的緩衝區外別無它法。

+0

你太棒了!謝謝!是的,我正在使用NASM。所以,而不是分配一個緩衝區,我可以直接讀取堆棧?就像說say mov rsi,[rsp + 8]? 我會在練習btw ;-) – 2012-03-11 01:39:59

+0

'mov rsi,[rsp + 8]'會將實際內容從堆棧移動到寄存器。你想要的是這個地址,在這種情況下,''''''''''''''''''rss,[rsp + 8]'可以正常工作。是的,您也可以使用堆棧滿足您的所有閱讀/寫作需求。 – 2012-03-11 12:42:42

相關問題