Compiler construction 从硬件编写低级语言
我感兴趣的是如何编译/创建一种非常简单的语言(即brainfuck),而无需借助unix系统调用等高级函数。我想在一些依赖CPU的低级汇编中为该语言编写一个编译器,这样我就可以用简单的语言提供源代码,并最终得到一个二进制代码。不确定这是否清楚,但基本问题是如何在没有硬件中尚未出现的任何帮助的情况下将源代码转换为二进制代码 编辑:一个更简洁的问题陈述 给定: -硬件(主板/CPU等) 未给出: -UNIX/DOS -C/FORTRAN/任何其他语言 我该如何实现像brainfuck这样的简单语言 我知道有很多更实用的编译方法,但出于教育目的,我对此感兴趣Compiler construction 从硬件编写低级语言,compiler-construction,low-level,hardware-programming,Compiler Construction,Low Level,Hardware Programming,我感兴趣的是如何编译/创建一种非常简单的语言(即brainfuck),而无需借助unix系统调用等高级函数。我想在一些依赖CPU的低级汇编中为该语言编写一个编译器,这样我就可以用简单的语言提供源代码,并最终得到一个二进制代码。不确定这是否清楚,但基本问题是如何在没有硬件中尚未出现的任何帮助的情况下将源代码转换为二进制代码 编辑:一个更简洁的问题陈述 给定: -硬件(主板/CPU等) 未给出: -UNIX/DOS -C/FORTRAN/任何其他语言 我该如何实现像brainfuck这样的简单语言
对不起,如果这个问题是多余的或明显的-我不是一个计算机科学家,所以也许我只是不知道正确的词汇,以找到在线解决问题的办法。如果有人能提供有关该主题的链接或文本,我们将不胜感激。规范编译器学习书籍是《龙之书》 然而,它确实指向了更多。。。复杂的语言。你可能不想谈论上下文无关的解析之类的东西(虽然我推荐这本书,但它很难,但是很棒。)
考虑到这一点,您可能希望首先为一种非常简单的语言找到一个解释器或编译器——可能是Brainfuck本身,可能是一些更有用的东西,比如Scheme实现。阅读,分析,了解它的作用。实现编译器使用的任何较低级别的库函数,调整其代码生成器以输出您想要针对的任何品牌的机器代码,就完成了。查看wikipedia上的描述这不是一项困难的任务。我可能还是会用一些你知道的语言开始,也许喜欢,也许不喜欢。C是个不错的选择。文件I/O是一个小型或大型项目,取决于平台等。稍后,请使用该语言的“源代码”进行编译。对于该源中的每个字符,执行该任务
> ++ptr;
< --ptr;
+ ++*ptr;
etc
减号
ldr r1,[r0]
sub r1,#1
str r1,[r0]
r0是ptr寄存器,r1只是帮助
如果您确实反对使用printf之类的调用,那么将此代码的输出设置为字节数组,这些字节是asm源的ascii,输出每个字符a、d、d、空格、r、0、逗号、#、1、cr、lf等。在asm和一些高级语言中相当容易实现。如果您想直接使用二进制,那么只需输出机器代码,甚至更容易
将源字符串放入这个编译器,并将输出放入某个文件中,以便以后执行,这可能需要进行系统调用。如果在同一平台上运行,可以避免输出为文件,并且可以执行自修改代码,即在某个地址构建机器代码,然后在完成解析后跳转到该地址执行
编写这个答案所花费的时间比用C或asm实现解决方案所花费的时间要长很多倍。你到底有什么困难?你可以很容易地将brainfuck源代码编译成DOS.COM应用程序(你还需要NASM或一些额外的代码来发出指令操作码和计算跳转)。下面是一个稍加修改的bf解释器,它被转换成各种编译器:
// file: bfcompil.c
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_CODE_SIZE 30000
char code[MAX_CODE_SIZE];
char* pc = &code[0];
char* pcEnd = &code[0];
#define MAX_DATA_SIZE 30000
char data[MAX_DATA_SIZE] = { 0 };
char* pd = &data[0];
// Structures for quick bracket matching
unsigned brStack[MAX_CODE_SIZE];
unsigned brSptr = 0;
unsigned brMatch[MAX_CODE_SIZE];
int main(int argc, char** argv)
{
FILE* f = NULL;
int ch;
if (argc != 2)
{
fprintf(stderr, "usage:\n bfcompil <brainfuck-source-code-file>\n"
"bfcompil will output NASM-compilable source code for"
"a DOS program\n");
return EXIT_FAILURE;
}
if ((f = fopen(argv[1], "rb")) == NULL)
{
fprintf(stderr, "can't open file \"%s\" for reading\n", argv[1]);
return EXIT_FAILURE;
}
while ((ch = getc(f)) != EOF)
{
if (strchr(" \t\r\n", ch) != NULL) // skip white space
{
continue;
}
else if (strchr("><+-.,[]", ch) != NULL) // store valid commands
{
if (pcEnd >= &code[sizeof(code)])
{
fprintf(stderr, "too many commands in file \"%s\", expected at most "
"%u commands\n", argv[1], (unsigned)sizeof(code));
fclose(f);
return EXIT_FAILURE;
}
if (ch == '[')
{
brStack[brSptr++] = (unsigned)(pcEnd - &code[0]);
}
else if (ch == ']')
{
if (brSptr == 0)
{
fprintf(stderr, "unmatched ']' in file \"%s\"\n", argv[1]);
fclose(f);
return EXIT_FAILURE;
}
brSptr--;
brMatch[brStack[brSptr]] = (unsigned)(pcEnd - &code[0]);
brMatch[pcEnd - &code[0]] = brStack[brSptr];
}
*pcEnd++ = ch;
}
else // fail on invalid commands
{
fprintf(stderr, "unexpected character '%c' in file \"%s\", valid command "
"set is: \"><+-.,[]\"\n", ch, argv[1]);
fclose(f);
return EXIT_FAILURE;
}
}
fclose(f);
if (brSptr != 0)
{
fprintf(stderr, "unmatched '[' in file \"%s\"\n", argv[1]);
return EXIT_FAILURE;
}
if (pcEnd == &code[0])
{
fprintf(stderr, "no commands found in file \"%s\"\n", argv[1]);
return EXIT_FAILURE;
}
printf("; how to compile: nasm -f bin <input file with this code.asm> -o "
"<output executable.com>\n\n"
"org 0x100\n"
"bits 16\n\n"
" mov bx, data\n"
" mov di, bx\n"
" mov cx, 30000\n"
" xor al, al\n"
" cld\n"
" rep stosb\n\n"
" jmp code\n\n"
"print:\n"
" mov ah, 2\n"
" cmp byte [bx], 10\n"
" jne lprint1\n"
" mov dl, 13\n"
" int 0x21\n"
"lprint1:\n"
" mov dl, [bx]\n"
" int 0x21\n"
" ret\n\n"
#if 01
// buffered input
"input:\n"
" cmp byte [kbdbuf+1], 0\n"
" jne linput1\n"
" mov ah, 0xa\n"
" mov dx, kbdbuf\n"
" int 0x21\n"
" inc byte [kbdbuf+1]\n"
"linput1:\n"
" mov al, [kbdbuf+2]\n"
" cmp al, 13\n"
" jne linput4\n"
" mov al, 10\n"
"linput4:\n"
" mov [bx], al\n"
" mov si, kbdbuf+3\n"
" mov di, kbdbuf+2\n"
" xor cx, cx\n"
" dec byte [kbdbuf+1]\n"
" mov cl, [kbdbuf+1]\n"
" jz linput3\n"
"linput2:\n"
" lodsb\n"
" stosb\n"
" loop linput2\n"
"linput3:\n"
" ret\n\n"
#else
// unbuffered input
"input:\n"
" mov ah, 1\n"
" int 0x21\n"
" cmp al, 13\n"
" jne linput\n"
" mov al, 10\n"
"linput:\n"
" mov [bx], al\n"
" ret\n\n"
#endif
"code:\n\n");
for (pc = &code[0]; pc < pcEnd; pc++)
{
switch (*pc)
{
case '>':
printf(" inc bx\n");
break;
case '<':
printf(" dec bx\n");
break;
case '+':
printf(" inc byte [bx]\n");
break;
case '-':
printf(" dec byte [bx]\n");
break;
case '.':
printf(" call print\n");
break;
case ',':
printf(" call input\n");
break;
case '[':
printf("label%u:\n", (unsigned)(pc - &code[0]));
printf(" cmp byte [bx], 0\n");
printf(" je label%u\n", (unsigned)brMatch[pc - &code[0]]);
break;
case ']':
printf(" jmp label%u\n", brMatch[pc - &code[0]]);
printf("label%u:\n", (unsigned)(pc - &code[0]));
break;
}
}
printf("\n ret\n\n");
printf("kbdbuf:\n"
" db 254\n"
" db 0\n"
" times 256 db 0\n\n");
printf("data:\n");
return EXIT_SUCCESS;
}
更新:上述代码中基于DOS的输入和输出例程可以被直接访问屏幕缓冲区和键盘端口所取代。键盘代码还需要处理键盘中断。在x86个人电脑上做这件事并不难。你真的可以实现一个编译器,让一种语言在没有操作系统的裸硬件上运行
您还应该看看Forth
,因为这正是给定环境的语言类型。而且很容易实现。比C容易多了。比brainfuck难多了,有点像组装
更新2:这里有一个小型(约1KB大小)brainfuck解释器,它不使用任何DOS或BIOS功能:
; file: bfint.asm
; compile: nasm.exe -f bin bfint.asm -o bfint.com
; run in: DOS, DosBox or equivalent
bits 16
org 0x100
section .text
SCREEN_WIDTH equ 80
SCREEN_HEIGHT equ 25
SCAN_BUF_SIZE equ 256
MAX_CODE_SIZE equ 20000
MAX_DATA_SIZE equ 30000
cld
; set new keyboard (IRQ1) ISR
push byte 0
pop es
cli ; update ISR address w/ ints disabled
mov word [es:9*4], Irq1Isr
mov [es:9*4+2], cs
sti
push cs
pop es
Restart:
call ClearScreen
mov si, MsgHello
call PrintStr
mov word [CodeSize], 0
mov byte [EnterCount], 0
WaitForKey:
call GetKey
; Escape erases code
cmp ah, 1 ; Escape
je Restart
; Non-characters are ignored
cmp al, 0 ; non-character key
je WaitForKey
; Enter is "printed" but not stored, use for formatting
cmp al, 10 ; Enter
je KeyEnter
mov byte [EnterCount], 0
; Backspace deletes last character
cmp al, 8 ; Backspace
je KeyBackspace
; Space is printed but not stored, use for formatting
cmp al, " " ; Space
je PrintOnly
; 0 runs a test program
cmp al, "0"
je TestProgram
; Other chracters are stored as code
mov bx, [CodeSize]
cmp bx, MAX_CODE_SIZE
jae ErrCodeTooBig
mov [Code + bx], al
inc word [CodeSize]
PrintOnly:
call PrintChar
jmp WaitForKey
ErrCodeTooBig:
mov si, MsgCodeTooBig
call PrintStr
mov word [CodeSize], 0
jmp WaitForKey
KeyEnter:
call PrintChar
inc byte [EnterCount]
cmp byte [EnterCount], 1
je WaitForKey
mov byte [EnterCount], 0
call Execute
jmp WaitForKey
KeyBackspace:
call PrintChar
cmp word [CodeSize], 0
je WaitForKey
dec word [CodeSize]
jmp WaitForKey
TestProgram:
mov si, TestCode
mov di, Code
mov cx, TestCodeEnd - TestCode
mov [CodeSize], cx
rep movsb
call Execute
jmp WaitForKey
Execute:
mov si, Code ; code start
xor bp, bp ; instruction index
mov di, Data ; data start
mov cx, MAX_DATA_SIZE
xor al, al
rep stosb
sub di, MAX_DATA_SIZE
xor bx, bx ; data index
ExecuteLoop:
cmp bp, [CodeSize]
jae ExecuteDone
mov al, [bp+si]
cmp al, ">"
je IncPtr
cmp al, "<"
je DecPtr
cmp al, "+"
je IncData
cmp al, "-"
je DecData
cmp al, "."
je PrintData
cmp al, ","
je InputData
cmp al, "["
je While
cmp al, "]"
je EndWhile
mov si, MsgInvalidChar
call PrintStr
call PrintChar
mov al, 10
call PrintChar
jmp ExecuteDone
IncPtr:
inc bx
jmp ExecuteContinue
DecPtr:
dec bx
jmp ExecuteContinue
IncData:
inc byte [bx+di]
jmp ExecuteContinue
DecData:
dec byte [bx+di]
jmp ExecuteContinue
PrintData:
mov al, [bx+di]
call PrintChar
jmp ExecuteContinue
InputData:
call GetKey
or al, al
jz InputData
mov [bx+di], al
jmp ExecuteContinue
While:
cmp byte [bx+di], 0
jne ExecuteContinue
mov ax, 1
mov dx, "[]"
call FindMatchingBracket
ExecuteContinue:
inc bp
jmp ExecuteLoop
EndWhile:
mov ax, -1
mov dx, "]["
call FindMatchingBracket
jmp ExecuteLoop
ExecuteDone:
mov word [CodeSize], 0
mov si, MsgCompleted
jmp PrintStr
FindMatchingBracket:
xor cx, cx
FindMatchingBracket1:
cmp byte [bp+si], dl
jne FindMatchingBracket2
inc cx
jmp FindMatchingBracket3
FindMatchingBracket2:
cmp byte [bp+si], dh
jne FindMatchingBracket3
dec cx
jnz FindMatchingBracket3
ret
FindMatchingBracket3:
add bp, ax
jmp FindMatchingBracket1
; Inputs:
; AL = ASCII character code
PrintChar:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push word 0xb800
pop es
mov bx, [CursorPos]
cmp al, 8
je PrintCharBackSpace
cmp al, 10
je PrintCharBackLF
cmp al, 13
je PrintCharBackCR
mov [es:bx], al
call AdvanceCursorPosition
jmp PrintCharDone
PrintCharBackSpace:
; move the cursor back and erase the last character
or bx, bx
jz PrintCharDone
dec bx
dec bx
mov word [es:bx], 0x0720
jmp PrintCharSetCursorPos
PrintCharBackLF:
; move the cursor to the beginning of the next line - '\n' behavior
add bx, SCREEN_WIDTH * 2
cmp bx, SCREEN_WIDTH * SCREEN_HEIGHT * 2
jc PrintCharBackCR
sub bx, SCREEN_WIDTH * 2
call ScrollUp
PrintCharBackCR:
; move the cursor to the beginning of the current line - '\r' behavior
mov ax, SCREEN_WIDTH * 2
xchg ax, bx
xor dx, dx
div bx
mul bx
mov bx, ax
PrintCharSetCursorPos:
mov [CursorPos], bx
shr bx, 1
call SetCursorPosition
PrintCharDone:
PopEsAllRet:
pop es
popa
ret
ClearScreen:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push word 0xb800
pop es
xor di, di
mov cx, SCREEN_WIDTH * SCREEN_HEIGHT
mov ax, 0x0720 ; character = space, color = lightgray on black
rep stosw
xor bx, bx
mov [CursorPos], bx
call SetCursorPosition
jmp PopEsAllRet
ScrollUp:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push ds
push word 0xb800
pop es
push es
pop ds
mov si, SCREEN_WIDTH * 2
xor di, di
mov cx, SCREEN_WIDTH * (SCREEN_HEIGHT - 1)
rep movsw
mov cx, SCREEN_WIDTH
mov ax, 0x0720 ; character = space, color = lightgray on black
rep stosw
pop ds
jmp PopEsAllRet
; Inputs:
; DS:SI = address of NUL-terminated ASCII string
PrintStr:
pusha
PrintStr1:
lodsb
or al, al
jz PrintStrDone
call PrintChar
jmp PrintStr1
PrintStrDone:
popa
ret
; Inputs:
; BX = Y * SCREEN_WIDTH + X
SetCursorPosition:
; assuming it's a color text mode (not monochrome or graphics)
pusha
%if 0
mov dx, 0x3d4
mov al, 0x0f
out dx, al
inc dx
mov al, bl
out dx, al
dec dx
mov al, 0x0e
out dx, al
inc dx
mov al, bh
out dx, al
%else
mov dx, 0x3d4
mov al, 0x0f
mov ah, bl
out dx, ax
dec al
mov ah, bh
out dx, ax
%endif
popa
ret
AdvanceCursorPosition:
; assuming it's a color text mode (not monochrome or graphics)
pusha
mov ax, [CursorPos]
inc ax
inc ax
cmp ax, SCREEN_WIDTH * SCREEN_HEIGHT * 2
jc AdvanceCursorPosition1
sub ax, SCREEN_WIDTH * 2
call ScrollUp
AdvanceCursorPosition1:
mov [CursorPos], ax
shr ax, 1
xchg ax, bx
call SetCursorPosition
popa
ret
; Outputs:
; AH = scan code
; AL = character
GetKey:
push bx
push si
GetKeyRepeat:
mov ax, [ScanWriteIdx]
mov si, [ScanReadIdx]
sub ax, si
jz GetKeyRepeat
mov bx, si
mov ax, [ScanBuf + bx + si]
inc si
and si, SCAN_BUF_SIZE - 1
mov [ScanReadIdx], si
pop si
pop bx
ret
Irq1Isr:
pusha
push ds
push cs
pop ds
; read keyboard scan code
in al, 0x60
cmp al, 0x2a ; Left Shift down
jne Irq1Isr1
or byte [Shift], 1
Irq1Isr1:
cmp al, 0x36 ; Right Shift down
jne Irq1Isr2
or byte [Shift], 2
Irq1Isr2:
cmp al, 0xaa ; Left Shift up
jne Irq1Isr3
and byte [Shift], ~1
Irq1Isr3:
cmp al, 0xb6 ; Right Shift up
jne Irq1Isr4
and byte [Shift], ~2
Irq1Isr4:
test al, 0x80
jnz Irq1IsrEois ; key released
mov ah, al
cmp al, 58
jc Irq1Isr5
xor al, al ; don't translate non-character keys
jmp Irq1Isr7
Irq1Isr5:
mov bx, ScanToChar
cmp byte [Shift], 0
je Irq1Isr6
add bx, ScanToCharShift - ScanToChar
Irq1Isr6:
xlatb
Irq1Isr7:
mov bx, [ScanWriteIdx]
mov di, bx
mov [ScanBuf + bx + di], ax
inc bx
and bx, SCAN_BUF_SIZE - 1
mov [ScanWriteIdx], bx
Irq1IsrEois:
%if 0
; send EOI to XT keyboard
in al, 0x61
mov ah, al
or al, 0x80
out 0x61, al
mov al, ah
out 0x61, al
%endif
; send EOI to master PIC
mov al, 0x20
out 0x20, al
pop ds
popa
iret
ScanToChar:
db 0 ; unused
db 0 ; Escape
db "1234567890-="
db 8 ; Backspace
db 9 ; Tab
db "qwertyuiop[]"
db 10 ; Enter
db 0 ; Ctrl
db "asdfghjkl;'`"
db 0 ; Left Shift
db "\zxcvbnm,./"
db 0 ; Right Shift
db 0 ; Print Screen
db 0 ; Alt
db " " ; Space
ScanToCharShift:
db 0 ; unused
db 0 ; Escape
db "!@#$%^&*()_+"
db 8 ; Backspace
db 9 ; Tab
db "QWERTYUIOP{}"
db 10 ; Enter
db 0 ; Ctrl
db 'ASDFGHJKL:"~'
db 0 ; Left Shift
db "|ZXCVBNM<>?"
db 0 ; Right Shift
db 0 ; Print Screen
db 0 ; Alt
db " " ; Space
MsgHello:
db "Brainfuck Interpreter", 10, 10
db "Press 0 to run test code OR", 10
db "Type your code.", 10
db "Use Esc to erase it all or Backspace to delete last character.", 10
db "Press Enter twice to run it.", 10, 10, 0
MsgCodeTooBig:
db 10, "Code's too big", 10, 0
MsgCompleted:
db 10, "Code's completed", 10, 0
MsgInvalidChar:
db 10, "Invalid character: ", 0
Shift db 0
CursorPos dw 0
ScanReadIdx dw 0
ScanWriteIdx dw 0
EnterCount db 0
CodeSize dw 0
TestCode:
; Hello World!
db "++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>."
; Squares of 0 through 100
; db "++++[>+++++<-]>[<+++++>-]+<+[>[>+>+<<-]++>>[<<+>>-]>>>[-]++>[-]+>>>+[[-]++++++>>>]<<<[[<++++++++<++>>-]+<.<[>----<-]<]<<[>>>>>[>>>[-]+++++++++<[>-<-]+++++++++>[-[<->-]+[<<<]]<[>+<-]>]<<-]<<-]"
; ROT13
; db "+[,+[-[>+>+<<-]>[<+>-]+>>++++++++[<-------->-]<-[<[-]>>>+[<+<+>>-]<[>+<-]<[<++>>>+[<+<->>-]<[>+<-]]>[<]<]>>[-]<<<[[-]<[>>+>+<<<-]>>[<<+>>-]>>++++++++[<-------->-]<->>++++[<++++++++>-]<-<[>>>+<<[>+>[-]<<-]>[<+>-]>[<<<<<+>>>>++++[<++++++++>-]>-]<<-<-]>[<<<<[-]>>>>[<<<<->>>>-]]<<++++[<<++++++++>>-]<<-[>>+>+<<<-]>>[<<+>>-]+>>+++++[<----->-]<-[<[-]>>>+[<+<->>-]<[>+<-]<[<++>>>+[<+<+>>-]<[>+<-]]>[<]<]>>[-]<<<[[-]<<[>>+>+<<<-]>>[<<+>>-]+>------------[<[-]>>>+[<+<->>-]<[>+<-]<[<++>>>+[<+<+>>-]<[>+<-]]>[<]<]>>[-]<<<<<------------->>[[-]+++++[<<+++++>>-]<<+>>]<[>++++[<<++++++++>>-]<-]>]<[-]++++++++[<++++++++>-]<+>]<.[-]+>>+<]>[[-]<]<]"
TestCodeEnd:
section .bss
ScanBuf:
resw SCAN_BUF_SIZE
Code:
resb MAX_CODE_SIZE
Data:
resb MAX_DATA_SIZE
;文件:bfint.asm
; 编译:nasm.exe-f bin bfint.asm-o bfint.com
; 磨合:DOS、DosBox或同等产品
第16位
组织0x100
第节.案文
屏幕宽度等于80
屏幕高度等于25
扫描大小等于256
最大代码大小等于20000
最大数据大小等于30000
cld
; 设置新键盘(IRQ1)ISR
推送字节0
流行音乐
cli;禁用ints时更新ISR地址
mov字[es:9*4],Irq1Isr
mov[es:9*4+2],cs
性病
推送cs
流行音乐
重新启动:
呼叫清除屏幕
莫夫·西女士
调用PrintStr
mov字[CodeSize],0
mov字节[EnterCount],0
WaitForKey:
调用GetKey
; 转义删除代码
cmp-ah,1;逃跑
日本脑炎重启
; 忽略非字符
cmp-al,0;非字符键
杰维特福基
; 输入为“打印”但未存储,用于格式化
cmp-al,10;进入
je键盘输入
mov字节[EnterCount],0
; 退格删除最后一个字符
cmp-al,8;退格
键退格
; 空间已打印但未存储,用于格式化
cmp-al,“;空间
je打印版
; 0运行测试程序
cmp铝,“0”
日本脑炎测试计划
; 其他字符存储为代码
mov bx,[代码大小]
cmp bx,最大代码大小
jae ErrCodeTooBig
mov[代码+bx],al
inc字[码大小]
仅打印:
调用PrintChar
韦特福基
ErrCodeTooBig:
mov si,MsgCodeTooBig
调用PrintStr
mov字[CodeSize],0
韦特福基
键盘输入:
调用PrintChar
inc字节[输入计数]
cmp字节[Ente
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.
; how to compile: nasm -f bin <input file with this code.asm> -o <output executable.com>
org 0x100
bits 16
mov bx, data
mov di, bx
mov cx, 30000
xor al, al
cld
rep stosb
jmp code
print:
mov ah, 2
cmp byte [bx], 10
jne lprint1
mov dl, 13
int 0x21
lprint1:
mov dl, [bx]
int 0x21
ret
input:
cmp byte [kbdbuf+1], 0
jne linput1
mov ah, 0xa
mov dx, kbdbuf
int 0x21
inc byte [kbdbuf+1]
linput1:
mov al, [kbdbuf+2]
cmp al, 13
jne linput4
mov al, 10
linput4:
mov [bx], al
mov si, kbdbuf+3
mov di, kbdbuf+2
xor cx, cx
dec byte [kbdbuf+1]
mov cl, [kbdbuf+1]
jz linput3
linput2:
lodsb
stosb
loop linput2
linput3:
ret
code:
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
label10:
cmp byte [bx], 0
je label41
inc bx
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc bx
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc bx
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc bx
inc byte [bx]
dec bx
dec bx
dec bx
dec bx
dec byte [bx]
jmp label10
label41:
inc bx
inc byte [bx]
inc byte [bx]
call print
inc bx
inc byte [bx]
call print
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
call print
call print
inc byte [bx]
inc byte [bx]
inc byte [bx]
call print
inc bx
inc byte [bx]
inc byte [bx]
call print
dec bx
dec bx
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
inc byte [bx]
call print
inc bx
call print
inc byte [bx]
inc byte [bx]
inc byte [bx]
call print
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
call print
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
dec byte [bx]
call print
inc bx
inc byte [bx]
call print
inc bx
call print
ret
kbdbuf:
db 254
db 0
times 256 db 0
data:
Hello World!
; file: bfint.asm
; compile: nasm.exe -f bin bfint.asm -o bfint.com
; run in: DOS, DosBox or equivalent
bits 16
org 0x100
section .text
SCREEN_WIDTH equ 80
SCREEN_HEIGHT equ 25
SCAN_BUF_SIZE equ 256
MAX_CODE_SIZE equ 20000
MAX_DATA_SIZE equ 30000
cld
; set new keyboard (IRQ1) ISR
push byte 0
pop es
cli ; update ISR address w/ ints disabled
mov word [es:9*4], Irq1Isr
mov [es:9*4+2], cs
sti
push cs
pop es
Restart:
call ClearScreen
mov si, MsgHello
call PrintStr
mov word [CodeSize], 0
mov byte [EnterCount], 0
WaitForKey:
call GetKey
; Escape erases code
cmp ah, 1 ; Escape
je Restart
; Non-characters are ignored
cmp al, 0 ; non-character key
je WaitForKey
; Enter is "printed" but not stored, use for formatting
cmp al, 10 ; Enter
je KeyEnter
mov byte [EnterCount], 0
; Backspace deletes last character
cmp al, 8 ; Backspace
je KeyBackspace
; Space is printed but not stored, use for formatting
cmp al, " " ; Space
je PrintOnly
; 0 runs a test program
cmp al, "0"
je TestProgram
; Other chracters are stored as code
mov bx, [CodeSize]
cmp bx, MAX_CODE_SIZE
jae ErrCodeTooBig
mov [Code + bx], al
inc word [CodeSize]
PrintOnly:
call PrintChar
jmp WaitForKey
ErrCodeTooBig:
mov si, MsgCodeTooBig
call PrintStr
mov word [CodeSize], 0
jmp WaitForKey
KeyEnter:
call PrintChar
inc byte [EnterCount]
cmp byte [EnterCount], 1
je WaitForKey
mov byte [EnterCount], 0
call Execute
jmp WaitForKey
KeyBackspace:
call PrintChar
cmp word [CodeSize], 0
je WaitForKey
dec word [CodeSize]
jmp WaitForKey
TestProgram:
mov si, TestCode
mov di, Code
mov cx, TestCodeEnd - TestCode
mov [CodeSize], cx
rep movsb
call Execute
jmp WaitForKey
Execute:
mov si, Code ; code start
xor bp, bp ; instruction index
mov di, Data ; data start
mov cx, MAX_DATA_SIZE
xor al, al
rep stosb
sub di, MAX_DATA_SIZE
xor bx, bx ; data index
ExecuteLoop:
cmp bp, [CodeSize]
jae ExecuteDone
mov al, [bp+si]
cmp al, ">"
je IncPtr
cmp al, "<"
je DecPtr
cmp al, "+"
je IncData
cmp al, "-"
je DecData
cmp al, "."
je PrintData
cmp al, ","
je InputData
cmp al, "["
je While
cmp al, "]"
je EndWhile
mov si, MsgInvalidChar
call PrintStr
call PrintChar
mov al, 10
call PrintChar
jmp ExecuteDone
IncPtr:
inc bx
jmp ExecuteContinue
DecPtr:
dec bx
jmp ExecuteContinue
IncData:
inc byte [bx+di]
jmp ExecuteContinue
DecData:
dec byte [bx+di]
jmp ExecuteContinue
PrintData:
mov al, [bx+di]
call PrintChar
jmp ExecuteContinue
InputData:
call GetKey
or al, al
jz InputData
mov [bx+di], al
jmp ExecuteContinue
While:
cmp byte [bx+di], 0
jne ExecuteContinue
mov ax, 1
mov dx, "[]"
call FindMatchingBracket
ExecuteContinue:
inc bp
jmp ExecuteLoop
EndWhile:
mov ax, -1
mov dx, "]["
call FindMatchingBracket
jmp ExecuteLoop
ExecuteDone:
mov word [CodeSize], 0
mov si, MsgCompleted
jmp PrintStr
FindMatchingBracket:
xor cx, cx
FindMatchingBracket1:
cmp byte [bp+si], dl
jne FindMatchingBracket2
inc cx
jmp FindMatchingBracket3
FindMatchingBracket2:
cmp byte [bp+si], dh
jne FindMatchingBracket3
dec cx
jnz FindMatchingBracket3
ret
FindMatchingBracket3:
add bp, ax
jmp FindMatchingBracket1
; Inputs:
; AL = ASCII character code
PrintChar:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push word 0xb800
pop es
mov bx, [CursorPos]
cmp al, 8
je PrintCharBackSpace
cmp al, 10
je PrintCharBackLF
cmp al, 13
je PrintCharBackCR
mov [es:bx], al
call AdvanceCursorPosition
jmp PrintCharDone
PrintCharBackSpace:
; move the cursor back and erase the last character
or bx, bx
jz PrintCharDone
dec bx
dec bx
mov word [es:bx], 0x0720
jmp PrintCharSetCursorPos
PrintCharBackLF:
; move the cursor to the beginning of the next line - '\n' behavior
add bx, SCREEN_WIDTH * 2
cmp bx, SCREEN_WIDTH * SCREEN_HEIGHT * 2
jc PrintCharBackCR
sub bx, SCREEN_WIDTH * 2
call ScrollUp
PrintCharBackCR:
; move the cursor to the beginning of the current line - '\r' behavior
mov ax, SCREEN_WIDTH * 2
xchg ax, bx
xor dx, dx
div bx
mul bx
mov bx, ax
PrintCharSetCursorPos:
mov [CursorPos], bx
shr bx, 1
call SetCursorPosition
PrintCharDone:
PopEsAllRet:
pop es
popa
ret
ClearScreen:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push word 0xb800
pop es
xor di, di
mov cx, SCREEN_WIDTH * SCREEN_HEIGHT
mov ax, 0x0720 ; character = space, color = lightgray on black
rep stosw
xor bx, bx
mov [CursorPos], bx
call SetCursorPosition
jmp PopEsAllRet
ScrollUp:
; assuming it's a color text mode (not monochrome or graphics)
pusha
push es
push ds
push word 0xb800
pop es
push es
pop ds
mov si, SCREEN_WIDTH * 2
xor di, di
mov cx, SCREEN_WIDTH * (SCREEN_HEIGHT - 1)
rep movsw
mov cx, SCREEN_WIDTH
mov ax, 0x0720 ; character = space, color = lightgray on black
rep stosw
pop ds
jmp PopEsAllRet
; Inputs:
; DS:SI = address of NUL-terminated ASCII string
PrintStr:
pusha
PrintStr1:
lodsb
or al, al
jz PrintStrDone
call PrintChar
jmp PrintStr1
PrintStrDone:
popa
ret
; Inputs:
; BX = Y * SCREEN_WIDTH + X
SetCursorPosition:
; assuming it's a color text mode (not monochrome or graphics)
pusha
%if 0
mov dx, 0x3d4
mov al, 0x0f
out dx, al
inc dx
mov al, bl
out dx, al
dec dx
mov al, 0x0e
out dx, al
inc dx
mov al, bh
out dx, al
%else
mov dx, 0x3d4
mov al, 0x0f
mov ah, bl
out dx, ax
dec al
mov ah, bh
out dx, ax
%endif
popa
ret
AdvanceCursorPosition:
; assuming it's a color text mode (not monochrome or graphics)
pusha
mov ax, [CursorPos]
inc ax
inc ax
cmp ax, SCREEN_WIDTH * SCREEN_HEIGHT * 2
jc AdvanceCursorPosition1
sub ax, SCREEN_WIDTH * 2
call ScrollUp
AdvanceCursorPosition1:
mov [CursorPos], ax
shr ax, 1
xchg ax, bx
call SetCursorPosition
popa
ret
; Outputs:
; AH = scan code
; AL = character
GetKey:
push bx
push si
GetKeyRepeat:
mov ax, [ScanWriteIdx]
mov si, [ScanReadIdx]
sub ax, si
jz GetKeyRepeat
mov bx, si
mov ax, [ScanBuf + bx + si]
inc si
and si, SCAN_BUF_SIZE - 1
mov [ScanReadIdx], si
pop si
pop bx
ret
Irq1Isr:
pusha
push ds
push cs
pop ds
; read keyboard scan code
in al, 0x60
cmp al, 0x2a ; Left Shift down
jne Irq1Isr1
or byte [Shift], 1
Irq1Isr1:
cmp al, 0x36 ; Right Shift down
jne Irq1Isr2
or byte [Shift], 2
Irq1Isr2:
cmp al, 0xaa ; Left Shift up
jne Irq1Isr3
and byte [Shift], ~1
Irq1Isr3:
cmp al, 0xb6 ; Right Shift up
jne Irq1Isr4
and byte [Shift], ~2
Irq1Isr4:
test al, 0x80
jnz Irq1IsrEois ; key released
mov ah, al
cmp al, 58
jc Irq1Isr5
xor al, al ; don't translate non-character keys
jmp Irq1Isr7
Irq1Isr5:
mov bx, ScanToChar
cmp byte [Shift], 0
je Irq1Isr6
add bx, ScanToCharShift - ScanToChar
Irq1Isr6:
xlatb
Irq1Isr7:
mov bx, [ScanWriteIdx]
mov di, bx
mov [ScanBuf + bx + di], ax
inc bx
and bx, SCAN_BUF_SIZE - 1
mov [ScanWriteIdx], bx
Irq1IsrEois:
%if 0
; send EOI to XT keyboard
in al, 0x61
mov ah, al
or al, 0x80
out 0x61, al
mov al, ah
out 0x61, al
%endif
; send EOI to master PIC
mov al, 0x20
out 0x20, al
pop ds
popa
iret
ScanToChar:
db 0 ; unused
db 0 ; Escape
db "1234567890-="
db 8 ; Backspace
db 9 ; Tab
db "qwertyuiop[]"
db 10 ; Enter
db 0 ; Ctrl
db "asdfghjkl;'`"
db 0 ; Left Shift
db "\zxcvbnm,./"
db 0 ; Right Shift
db 0 ; Print Screen
db 0 ; Alt
db " " ; Space
ScanToCharShift:
db 0 ; unused
db 0 ; Escape
db "!@#$%^&*()_+"
db 8 ; Backspace
db 9 ; Tab
db "QWERTYUIOP{}"
db 10 ; Enter
db 0 ; Ctrl
db 'ASDFGHJKL:"~'
db 0 ; Left Shift
db "|ZXCVBNM<>?"
db 0 ; Right Shift
db 0 ; Print Screen
db 0 ; Alt
db " " ; Space
MsgHello:
db "Brainfuck Interpreter", 10, 10
db "Press 0 to run test code OR", 10
db "Type your code.", 10
db "Use Esc to erase it all or Backspace to delete last character.", 10
db "Press Enter twice to run it.", 10, 10, 0
MsgCodeTooBig:
db 10, "Code's too big", 10, 0
MsgCompleted:
db 10, "Code's completed", 10, 0
MsgInvalidChar:
db 10, "Invalid character: ", 0
Shift db 0
CursorPos dw 0
ScanReadIdx dw 0
ScanWriteIdx dw 0
EnterCount db 0
CodeSize dw 0
TestCode:
; Hello World!
db "++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>."
; Squares of 0 through 100
; db "++++[>+++++<-]>[<+++++>-]+<+[>[>+>+<<-]++>>[<<+>>-]>>>[-]++>[-]+>>>+[[-]++++++>>>]<<<[[<++++++++<++>>-]+<.<[>----<-]<]<<[>>>>>[>>>[-]+++++++++<[>-<-]+++++++++>[-[<->-]+[<<<]]<[>+<-]>]<<-]<<-]"
; ROT13
; db "+[,+[-[>+>+<<-]>[<+>-]+>>++++++++[<-------->-]<-[<[-]>>>+[<+<+>>-]<[>+<-]<[<++>>>+[<+<->>-]<[>+<-]]>[<]<]>>[-]<<<[[-]<[>>+>+<<<-]>>[<<+>>-]>>++++++++[<-------->-]<->>++++[<++++++++>-]<-<[>>>+<<[>+>[-]<<-]>[<+>-]>[<<<<<+>>>>++++[<++++++++>-]>-]<<-<-]>[<<<<[-]>>>>[<<<<->>>>-]]<<++++[<<++++++++>>-]<<-[>>+>+<<<-]>>[<<+>>-]+>>+++++[<----->-]<-[<[-]>>>+[<+<->>-]<[>+<-]<[<++>>>+[<+<+>>-]<[>+<-]]>[<]<]>>[-]<<<[[-]<<[>>+>+<<<-]>>[<<+>>-]+>------------[<[-]>>>+[<+<->>-]<[>+<-]<[<++>>>+[<+<+>>-]<[>+<-]]>[<]<]>>[-]<<<<<------------->>[[-]+++++[<<+++++>>-]<<+>>]<[>++++[<<++++++++>>-]<-]>]<[-]++++++++[<++++++++>-]<+>]<.[-]+>>+<]>[[-]<]<]"
TestCodeEnd:
section .bss
ScanBuf:
resw SCAN_BUF_SIZE
Code:
resb MAX_CODE_SIZE
Data:
resb MAX_DATA_SIZE