From 7e2ecc6e1f9e49439696adc72dcbefd699168e44 Mon Sep 17 00:00:00 2001 From: uvok Date: Mon, 12 Jan 2026 19:16:17 +0100 Subject: Add first version of assembler --- nandgame/assembler/lexer.py | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 nandgame/assembler/lexer.py (limited to 'nandgame/assembler/lexer.py') diff --git a/nandgame/assembler/lexer.py b/nandgame/assembler/lexer.py new file mode 100644 index 0000000..eb035f2 --- /dev/null +++ b/nandgame/assembler/lexer.py @@ -0,0 +1,67 @@ + +import ply.lex as lex + +# List of token names. This is always required +tokens = ( + 'OP', + 'JUMP', + 'COMMA', + 'COLON', + 'SYMBOL', + 'NUMBER', + 'HEXNUMBER', + 'DOT', + 'REG' +) + +# Regular expression rules for simple tokens +t_COMMA = r',' +t_COLON = r':' +t_DOT = r'\.' + +def t_OP(t): + r"mov|and|dec|hlt|add|sub|inc" + return t + +def t_REG(t): + r"\b(AD?M?|DM?|M|_)\b" + return t + +def t_JUMP(t): + r"jmp|jlt|jgt|jle|jge|jeq|jne" + return t + +def t_NUMBER(t): + r'\#\d+' + t.value = int(t.value[1:]) + return t + +def t_HEXNUMBER(t): + r'\#0x[0-9a-fA-F]+' + t.value = int(t.value[1:], 16) + return t + +def t_SYMBOL(t): + r'[a-z][A-Za-z0-9_]+' + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' +#t_ignore_COMMENT = r';.*' + +def t_COMMENT(t): + r';.*' + pass + +# Error handling rule +def t_error(t): + print("!!! Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex() -- cgit v1.2.3