summaryrefslogtreecommitdiff
path: root/nandgame/assembler/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'nandgame/assembler/lexer.py')
-rw-r--r--nandgame/assembler/lexer.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/nandgame/assembler/lexer.py b/nandgame/assembler/lexer.py
new file mode 100644
index 0000000..eb035f2
--- /dev/null
+++ b/nandgame/assembler/lexer.py
@@ -0,0 +1,67 @@
+
+import ply.lex as lex
+
+# List of token names. This is always required
+tokens = (
+ 'OP',
+ 'JUMP',
+ 'COMMA',
+ 'COLON',
+ 'SYMBOL',
+ 'NUMBER',
+ 'HEXNUMBER',
+ 'DOT',
+ 'REG'
+)
+
+# Regular expression rules for simple tokens
+t_COMMA = r','
+t_COLON = r':'
+t_DOT = r'\.'
+
+def t_OP(t):
+ r"mov|and|dec|hlt|add|sub|inc"
+ return t
+
+def t_REG(t):
+ r"\b(AD?M?|DM?|M|_)\b"
+ return t
+
+def t_JUMP(t):
+ r"jmp|jlt|jgt|jle|jge|jeq|jne"
+ return t
+
+def t_NUMBER(t):
+ r'\#\d+'
+ t.value = int(t.value[1:])
+ return t
+
+def t_HEXNUMBER(t):
+ r'\#0x[0-9a-fA-F]+'
+ t.value = int(t.value[1:], 16)
+ return t
+
+def t_SYMBOL(t):
+ r'[a-z][A-Za-z0-9_]+'
+ return t
+
+# Define a rule so we can track line numbers
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = ' \t'
+#t_ignore_COMMENT = r';.*'
+
+def t_COMMENT(t):
+ r';.*'
+ pass
+
+# Error handling rule
+def t_error(t):
+ print("!!! Illegal character '%s'" % t.value[0])
+ t.lexer.skip(1)
+
+# Build the lexer
+lexer = lex.lex()