diff options
| author | uvok | 2026-01-12 19:16:17 +0100 |
|---|---|---|
| committer | uvok | 2026-01-12 19:16:17 +0100 |
| commit | 7e2ecc6e1f9e49439696adc72dcbefd699168e44 (patch) | |
| tree | a72a262bc8742137258237f19d72933d6d4cf0dd /nandgame/assembler/lexer.py | |
| parent | 9b402d2bd6eb5d5d37e892d0066b2d2990341889 (diff) | |
Add first version of assembler
Diffstat (limited to 'nandgame/assembler/lexer.py')
| -rw-r--r-- | nandgame/assembler/lexer.py | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/nandgame/assembler/lexer.py b/nandgame/assembler/lexer.py new file mode 100644 index 0000000..eb035f2 --- /dev/null +++ b/nandgame/assembler/lexer.py @@ -0,0 +1,67 @@ + +import ply.lex as lex + +# List of token names. This is always required +tokens = ( + 'OP', + 'JUMP', + 'COMMA', + 'COLON', + 'SYMBOL', + 'NUMBER', + 'HEXNUMBER', + 'DOT', + 'REG' +) + +# Regular expression rules for simple tokens +t_COMMA = r',' +t_COLON = r':' +t_DOT = r'\.' + +def t_OP(t): + r"mov|and|dec|hlt|add|sub|inc" + return t + +def t_REG(t): + r"\b(AD?M?|DM?|M|_)\b" + return t + +def t_JUMP(t): + r"jmp|jlt|jgt|jle|jge|jeq|jne" + return t + +def t_NUMBER(t): + r'\#\d+' + t.value = int(t.value[1:]) + return t + +def t_HEXNUMBER(t): + r'\#0x[0-9a-fA-F]+' + t.value = int(t.value[1:], 16) + return t + +def t_SYMBOL(t): + r'[a-z][A-Za-z0-9_]+' + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' +#t_ignore_COMMENT = r';.*' + +def t_COMMENT(t): + r';.*' + pass + +# Error handling rule +def t_error(t): + print("!!! Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex() |
