diff options
| author | uvok | 2026-01-12 19:16:17 +0100 |
|---|---|---|
| committer | uvok | 2026-01-12 19:16:17 +0100 |
| commit | 7e2ecc6e1f9e49439696adc72dcbefd699168e44 (patch) | |
| tree | a72a262bc8742137258237f19d72933d6d4cf0dd /nandgame/assembler | |
| parent | 9b402d2bd6eb5d5d37e892d0066b2d2990341889 (diff) | |
Add first version of assembler
Diffstat (limited to 'nandgame/assembler')
| -rw-r--r-- | nandgame/assembler/lexer.py | 67 | ||||
| -rw-r--r-- | nandgame/assembler/parser.py | 115 | ||||
| -rw-r--r-- | nandgame/assembler/tlexer.py | 14 | ||||
| -rw-r--r-- | nandgame/assembler/tparser.py | 8 |
4 files changed, 204 insertions, 0 deletions
diff --git a/nandgame/assembler/lexer.py b/nandgame/assembler/lexer.py new file mode 100644 index 0000000..eb035f2 --- /dev/null +++ b/nandgame/assembler/lexer.py @@ -0,0 +1,67 @@ + +import ply.lex as lex + +# List of token names. This is always required +tokens = ( + 'OP', + 'JUMP', + 'COMMA', + 'COLON', + 'SYMBOL', + 'NUMBER', + 'HEXNUMBER', + 'DOT', + 'REG' +) + +# Regular expression rules for simple tokens +t_COMMA = r',' +t_COLON = r':' +t_DOT = r'\.' + +def t_OP(t): + r"mov|and|dec|hlt|add|sub|inc" + return t + +def t_REG(t): + r"\b(AD?M?|DM?|M|_)\b" + return t + +def t_JUMP(t): + r"jmp|jlt|jgt|jle|jge|jeq|jne" + return t + +def t_NUMBER(t): + r'\#\d+' + t.value = int(t.value[1:]) + return t + +def t_HEXNUMBER(t): + r'\#0x[0-9a-fA-F]+' + t.value = int(t.value[1:], 16) + return t + +def t_SYMBOL(t): + r'[a-z][A-Za-z0-9_]+' + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' +#t_ignore_COMMENT = r';.*' + +def t_COMMENT(t): + r';.*' + pass + +# Error handling rule +def t_error(t): + print("!!! Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex() diff --git a/nandgame/assembler/parser.py b/nandgame/assembler/parser.py new file mode 100644 index 0000000..b2cc3f9 --- /dev/null +++ b/nandgame/assembler/parser.py @@ -0,0 +1,115 @@ +import sys + +import ply.yacc as yacc +from ply.lex import LexToken + +# Get the token map from the lexer. This is required. +from lexer import tokens + +# Error rule for syntax errors + +def p_error(p: LexToken): + if p: + print(f"Unexpected {p.value} on line {p.lineno}") + else: + print("Unexpected end of file.") + +def p_program(p): + '''program : instruction_list + | empty''' + pass + +def p_empty(p): + '''empty :''' + pass + +def p_inss(p): + '''instruction_list : instruction_list instruction + | instruction + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + [p[2]] + + # print(f"INSes: {len(p)}") + # print(f" {p[1]}") + # if len(p) > 2: + # print(f" {p[2]}") + pass + +def p_inss2(p): + '''instruction_list2 : instruction instruction_list2 + | instruction + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + p[2] + + print(f"INSes2: {len(p)}") + print(f" {p[1]}") + if len(p) > 2: + print(f" {p[2]}") + pass + +def p_instruction(p): + '''instruction : noarg + | onearg + | twoarg + | jumpdest + ''' + # | invalid_arg + print(f"INS: {p[1]}") + p[0] = p[1] + pass + +def p_jumpdest(p): + '''jumpdest : SYMBOL COLON''' + p[0] = (p[1], ) + +def p_twoarg(p): + '''twoarg : op REG COMMA argument COMMA argument''' + p[0] = (*p[1], p[2], p[4], p[6]) + pass + +def p_argument(p): + '''argument : number + | REG + | SYMBOL + ''' + pass + +def p_onearg(p): + '''onearg : op REG COMMA argument''' + p[0] = (*p[1], p[2], p[4], None) + pass + +def p_noarg(p): + '''noarg : op''' + p[0] = (*p[1], None, None, None) + pass + +def p_invalid_arg(p): + '''invalid_arg : op argument''' + print(f"Invalid opcode, destination missing: {p[1:]}") + sys.exit(1) + pass + +def p_op(p): + '''op : OP DOT JUMP + | OP + ''' + if len(p) == 2: + p[0] = (p[1], None) + else: + p[0] = (p[1], p[3]) + + +def p_number(p): + '''number : NUMBER + | HEXNUMBER + ''' + p[0] = p[1] + +parser = yacc.yacc() diff --git a/nandgame/assembler/tlexer.py b/nandgame/assembler/tlexer.py new file mode 100644 index 0000000..6203b1a --- /dev/null +++ b/nandgame/assembler/tlexer.py @@ -0,0 +1,14 @@ +import sys + +from lexer import lexer + +with open(sys.argv[1], 'rb') as f: + data = f.read() + data2 = data.decode('ascii') + lexer.input(data2) + +while True: + tok = lexer.token() + if not tok: + break # No more input + print(tok) diff --git a/nandgame/assembler/tparser.py b/nandgame/assembler/tparser.py new file mode 100644 index 0000000..c5b2e9f --- /dev/null +++ b/nandgame/assembler/tparser.py @@ -0,0 +1,8 @@ +import sys + +from parser import parser + +with open(sys.argv[1], 'rb') as f: + data = f.read() + data2 = data.decode('ascii') + parser.parse(data2, tracking=True) |
