80
\$\begingroup\$

Story time: A week ago I found a question about optimizing Assembly code, then I remembered how awesome Brainfuck was, and the match was made very quickly. I decided to write a Brainfuck Interpreter in Assembly!

For this I have used the NASM assembler with x86 Assembly and is intended to run on an Intel CPU and on the Windows OS. I'm sure you can make it run on other combinations if you really want to. I'm also using the Borland 5.5 C Compiler, which is quite old, but the Assembly Book used it and I decided not to deviate from it.

You can compile the code with the following:

  1. nasm -f obj bf-interpreter.asm
  2. bcc32 bf-interpreter.obj

I wrote the interpreter with performance as main focus, yet I have not read a lot about optimization yet, so I'm not surprised if it turns out to be quite horrible code. As the code is linking with the C stdlib I'm able to call methods from the C stdlib, and essentially only the EBX, EDI and ESI registers are considered "safe", that means they won't be overwritten by calls to the C stdlib, my implementation uses that convention a lot.

This version of the BF Interpreter reads from a file that is passed as argument to the program.

extern _malloc, _calloc, _printf, _fdopen, _fprintf, _scanf, _getchar, _putchar, _fopen, _fseek, _ftell, _rewind, _fgetc, _fclose

%define STDERR                  2
%define SEEK_END                2
%define EOF                     -1    

%define BF_MEMORY_CELL_AMOUNT   30000

%define BF_PROGRAM_END          255

%define JUMP_PAST_CODE          91
%define JUMP_BACK_CODE          93

segment _DATA public align=4 class=DATA use32

format_int              db      "%d", 0

write_mode              db      "w", 0
read_mode               db      "r", 0

bfprogram_jump_table    times 43 dd run_program_loop_end, 
                        dd bfprogram_memory_inc, 
                        dd bfprogram_input, 
                        dd bfprogram_memory_dec,
                        dd bfprogram_output,
                        times 13 dd run_program_loop_end,
                        dd bfprogram_pointer_left,
                        dd run_program_loop_end,
                        dd bfprogram_pointer_right,
                        times 28 dd run_program_loop_end,
                        dd bfprogram_jump_past,
                        dd run_program_loop_end,
                        dd bfprogram_jump_back,
                        times 34 dd run_program_loop_end,
                        times 127 dd run_program_loop_end,      ; 128 (127 + next line) invalid ASCII characters
                        dd run_program_done                     ; if jump address is 255 (BF_PROGRAM_END), then we're done'

error_noargument        db      "Fatal: No argument was provided.", 0
error_notexist          db      "Fatal: The file does not exist.", 0
error_outofmemory       db      "Fatal: The Operating System does not have enough memory available.", 0

segment _BSS public align=4 class=BSS use32

file_name               resd 1

bf_program_size         resd 1
bf_program              resd 1    

bf_memory               resd 1

group DGROUP _BSS _DATA

segment _TEXT public align=1 class=CODE use32
        global  _main
_main:
    mov     ebp, esp                            ; save original stack pointer
;
; read command line arguments
;
    mov     eax, [ebp + 4]                      ; argc

    cmp     eax, 1
    je      error_exit_noargument

    mov     eax, [ebp + 8]                      ; *argv    
    mov     eax, [eax + 4]                      ; argv[1]

    mov     [file_name], eax
;
; open file
;
    push    read_mode
    push    eax
    call    _fopen
    add     esp, 8

    test    eax, eax
    jz      error_exit_notexist

    mov     edi, eax                            ; store file pointer
;
; get file size
;
    push    SEEK_END
    push    0
    push    edi
    call    _fseek
    add     esp, 12

    push    edi
    call    _ftell
    add     esp, 4

    inc     eax                                 ; reserve one extra byte for the BF_PROGRAM_END code
    mov     [bf_program_size], eax
;
; rewind file
;
    push    edi
    call    _rewind
    add     esp, 4
;
; read Brainfuck program from file
;            
    push    dword [bf_program_size]
    call    _malloc
    add     esp, 4

    test    eax, eax
    jz      error_exit_outofmemory

    mov     [bf_program], eax    
    mov     esi, eax

store_program_loop:
    push    edi
    call    _fgetc
    add     esp, 4

    cmp     eax, EOF                            ; stop reading when end of file reached
    jz      short store_program_done

    mov     [esi], al

    inc     esi
    jmp     short store_program_loop

store_program_done:
    mov     [esi], byte BF_PROGRAM_END          ; store program end special code
;
; close file
;
    push    edi
    call    _fclose
    add     esp, 4
;
; zero-initialize BF memory cells
;
    push    dword 1
    push    BF_MEMORY_CELL_AMOUNT
    call    _calloc
    add     esp, 8

    test    eax, eax
    jz      error_exit_outofmemory

    mov     [bf_memory], eax
;
; run the BF program
;
    mov     esi, eax                            ; current memory address
    mov     edi, [bf_program]                   ; current program address    

run_program_loop:        
    movzx   eax, byte [edi]

    jmp     [bfprogram_jump_table + 4*eax]      ; addresses are dword, ASCII is translated to byte offsets

run_program_loop_end:
    inc     edi

    jmp     short run_program_loop

run_program_done:
    jmp     normal_exit

bfprogram_pointer_right:
    inc     esi

    jmp     run_program_loop_end

bfprogram_pointer_left:
    dec     esi

    jmp     run_program_loop_end

bfprogram_memory_inc:
    mov     al, [esi]
    inc     al
    mov     [esi], al

    jmp     run_program_loop_end

bfprogram_memory_dec:
    mov     al, [esi]
    dec     al
    mov     [esi], al

    jmp     run_program_loop_end

bfprogram_output:
    mov     al, [esi]

    push    eax                                 ; safe to do because eax is 000000xxh before the prior mov
    call    _putchar
    add     esp, 4

    jmp     run_program_loop_end

bfprogram_input:
    call    _getchar

    mov     [esi], al

    jmp     run_program_loop_end

bfprogram_jump_past:
    mov     al, [esi]

    test    al, al                              ; check if memory cell is zero
    jnz     run_program_loop_end                ; if not zero, move to next instruction
;
; find matching ]
;
    mov     ebx, 1                              ; when counter reaches zero the ] is found where we need to jump past

bfprogram_jump_past_loop:
    inc     edi
    mov     al, [edi]

    cmp     al, JUMP_PAST_CODE
    jz      short bfprogram_jump_past_loop_found_jump_past

    cmp     al, JUMP_BACK_CODE
    jz      short bfprogram_jump_past_loop_found_jump_back

    jmp     short bfprogram_jump_past_loop

bfprogram_jump_past_loop_found_jump_past:
    inc     ebx

    jmp     short bfprogram_jump_past_loop

bfprogram_jump_past_loop_found_jump_back:
    dec     ebx

    test    ebx, ebx
    jz      run_program_loop_end                ; jumped over matching ]

    jmp     short bfprogram_jump_past_loop

bfprogram_jump_back:
    mov     al, [esi]

    test    al, al                              ; check if memory cell is zero
    jz      run_program_loop_end                ; if zero, move to next instruction
;
; find matching [
;
    mov     ebx, 1                              ; when counter reaches zero the [ is found where we need to jump back to

bfprogram_jump_back_loop:
    dec     edi
    mov     al, [edi]

    cmp     al, JUMP_BACK_CODE
    jz      short bfprogram_jump_back_loop_found_jump_back

    cmp     al, JUMP_PAST_CODE
    jz      short bfprogram_jump_back_loop_found_jump_past

    jmp     short bfprogram_jump_back_loop

bfprogram_jump_back_loop_found_jump_back:
    inc     ebx

    jmp     short bfprogram_jump_back_loop

bfprogram_jump_back_loop_found_jump_past:
    dec     ebx

    test    ebx, ebx
    jz      run_program_loop_end                ; jumped back to matching [

    jmp     short bfprogram_jump_back_loop

error_exit_noargument:
    push    write_mode
    push    2
    call    _fdopen
    add     esp, 8

    push    error_noargument
    push    eax
    call    _fprintf
    add     esp, 8
    mov     eax, -1

    jmp     short exit

error_exit_notexist:
    push    write_mode
    push    2
    call    _fdopen
    add     esp, 8

    push    error_notexist
    push    eax
    call    _fprintf
    add     esp, 8
    mov     eax, -2

    jmp     short exit

error_exit_outofmemory:
    push    write_mode
    push    2
    call    _fdopen
    add     esp, 8

    push    error_outofmemory
    push    eax
    call    _fprintf
    add     esp, 8
    mov     eax, -3

    jmp     short exit

normal_exit:
    mov     eax, 0

exit:
    ret
\$\endgroup\$
6
  • 26
    \$\begingroup\$ Next up: a x86 interpreter written in brainfuck! \$\endgroup\$
    – Cort Ammon
    Commented Nov 15, 2016 at 2:04
  • 2
    \$\begingroup\$ Upvote for the first sentence alone. \$\endgroup\$
    – Buhb
    Commented Nov 15, 2016 at 8:21
  • 4
    \$\begingroup\$ "I wrote the interpreter with performance as main focus" -- You may be interested to know that optimising implementations typically do not interpret and execute characters one by one, but rather convert the whole program into machine code, performing optimisations during that conversion (e.g. converting ++ to increment by 2, rather than 2 separate increment instructions, converting [-] to just set the cell to 0 without any loop, etc.), and then execute the generated code. But that would require a big rewrite and for what you're after, what you have is more than enough. \$\endgroup\$
    – hvd
    Commented Nov 15, 2016 at 13:23
  • 2
    \$\begingroup\$ An interpreter in x86 machine code already exists, thanks to your friendly PPCG neighbors. If you can get yours shorter, feel free to post it there. \$\endgroup\$
    – mbomb007
    Commented Nov 15, 2016 at 16:55
  • \$\begingroup\$ we want a bf jit engine that generates assembly. This project was waaaaay to easy \$\endgroup\$
    – pm100
    Commented Nov 16, 2016 at 2:09

2 Answers 2

45
\$\begingroup\$
  • The comment ; *argv should be ; argv, since you are not yet dereferencing the pointer.

  • After a cmp instruction, you should prefer je over jz, since it is nicer to the human reader.

    Oh, the old times, where you had to tell the assembler to jmp short because it couldn't figure it out on its own. :)

  • In the run the BF program section, I would have changed esi and edi so that the s register points to the source code and the d register points to the data. But that is just for fun.

  • In bfprogram_memory_inc, you can just say inc byte [esi]. The inc instruction has an r/m8 encoding that allows incrementing a value in memory directly, without needing the indirection.

  • The safe to do optimization is nice.

  • Since you rely on the ASCII encoding anyway, you should define JUMP_PAST_CODE as '[' instead of 91, if possible. Not every reader knows the ASCII codes by heart.

  • Does NASM support local labels? That would make the label names in bfprogram_jump_past_loop a little shorter and thereby easier to read.

  • Instead of calling _fdopen, is stderr a linker-visible symbol so that you can access it directly?

  • Since your error messages don't contain percent characters, you should call fputs instead of fprintf.

  • The error messages should include a trailing newline.

    (Or are they missing because Windows adds this newline anyway? If so, then it's ok, since it is not necessary to write strictly conforming ANSI C code when you target a specific platform.)

  • The bfprogram_jump_table is very large. You can probably make it smaller by encoding the jump target relative to a known location, which should only take a single byte per entry. times 43 db 0, db bfprogram_memory_inc - run_program_loop_end, ....

  • format_int seems to be unused.

  • STDERR should be called STDERR_FILENO (although that name comes from POSIX, not from Windows, it is still widely known).

  • Since 0xFF is a valid instruction in a Brainfuck program, using it as the EOF marker is not a good idea. When loading the code from the file, you could replace every character above 93 or below 43 with 44. This change would also allow you to make the jump table smaller.

Overall, it's a nice little program and the code does the obvious thing. It's a pleasure to read.

\$\endgroup\$
1
  • 4
    \$\begingroup\$ If by local labels you mean .foo_bar:, then yes; NASM does support them. \$\endgroup\$
    – SirPython
    Commented Nov 14, 2016 at 22:53
4
\$\begingroup\$

Jump instructions are slow and should be minimized. One jump instruction in the interpreter loop can be removed as follows:

    dec      edi       ; on first pass, compensate for inc edi
run_program_loop:      ; jump here after executing an opcode
run_program_loop_end:  ; old label for reference
    inc     edi
    movzx   eax, byte [edi]

    jmp     [bfprogram_jump_table + 4*eax]      ; addresses are dword, ASCII is translated to byte offsets
\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.