summaryrefslogtreecommitdiff
path: root/nandgame/assembler
diff options
context:
space:
mode:
authoruvok2026-01-12 19:16:17 +0100
committeruvok2026-01-12 19:16:17 +0100
commit7e2ecc6e1f9e49439696adc72dcbefd699168e44 (patch)
treea72a262bc8742137258237f19d72933d6d4cf0dd /nandgame/assembler
parent9b402d2bd6eb5d5d37e892d0066b2d2990341889 (diff)
Add first version of assembler
Diffstat (limited to 'nandgame/assembler')
-rw-r--r--nandgame/assembler/lexer.py67
-rw-r--r--nandgame/assembler/parser.py115
-rw-r--r--nandgame/assembler/tlexer.py14
-rw-r--r--nandgame/assembler/tparser.py8
4 files changed, 204 insertions, 0 deletions
diff --git a/nandgame/assembler/lexer.py b/nandgame/assembler/lexer.py
new file mode 100644
index 0000000..eb035f2
--- /dev/null
+++ b/nandgame/assembler/lexer.py
@@ -0,0 +1,67 @@
+
+import ply.lex as lex
+
+# List of token names. This is always required
+tokens = (
+ 'OP',
+ 'JUMP',
+ 'COMMA',
+ 'COLON',
+ 'SYMBOL',
+ 'NUMBER',
+ 'HEXNUMBER',
+ 'DOT',
+ 'REG'
+)
+
+# Regular expression rules for simple tokens
+t_COMMA = r','
+t_COLON = r':'
+t_DOT = r'\.'
+
+def t_OP(t):
+ r"mov|and|dec|hlt|add|sub|inc"
+ return t
+
+def t_REG(t):
+ r"\b(AD?M?|DM?|M|_)\b"
+ return t
+
+def t_JUMP(t):
+ r"jmp|jlt|jgt|jle|jge|jeq|jne"
+ return t
+
+def t_NUMBER(t):
+ r'\#\d+'
+ t.value = int(t.value[1:])
+ return t
+
+def t_HEXNUMBER(t):
+ r'\#0x[0-9a-fA-F]+'
+ t.value = int(t.value[1:], 16)
+ return t
+
+def t_SYMBOL(t):
+ r'[a-z][A-Za-z0-9_]+'
+ return t
+
+# Define a rule so we can track line numbers
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = ' \t'
+#t_ignore_COMMENT = r';.*'
+
+def t_COMMENT(t):
+ r';.*'
+ pass
+
+# Error handling rule
+def t_error(t):
+ print("!!! Illegal character '%s'" % t.value[0])
+ t.lexer.skip(1)
+
+# Build the lexer
+lexer = lex.lex()
diff --git a/nandgame/assembler/parser.py b/nandgame/assembler/parser.py
new file mode 100644
index 0000000..b2cc3f9
--- /dev/null
+++ b/nandgame/assembler/parser.py
@@ -0,0 +1,115 @@
+import sys
+
+import ply.yacc as yacc
+from ply.lex import LexToken
+
+# Get the token map from the lexer. This is required.
+from lexer import tokens
+
+# Error rule for syntax errors
+
+def p_error(p: LexToken):
+ if p:
+ print(f"Unexpected {p.value} on line {p.lineno}")
+ else:
+ print("Unexpected end of file.")
+
+def p_program(p):
+ '''program : instruction_list
+ | empty'''
+ pass
+
+def p_empty(p):
+ '''empty :'''
+ pass
+
+def p_inss(p):
+ '''instruction_list : instruction_list instruction
+ | instruction
+ '''
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1] + [p[2]]
+
+ # print(f"INSes: {len(p)}")
+ # print(f" {p[1]}")
+ # if len(p) > 2:
+ # print(f" {p[2]}")
+ pass
+
+def p_inss2(p):
+ '''instruction_list2 : instruction instruction_list2
+ | instruction
+ '''
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = [p[1]] + p[2]
+
+ print(f"INSes2: {len(p)}")
+ print(f" {p[1]}")
+ if len(p) > 2:
+ print(f" {p[2]}")
+ pass
+
+def p_instruction(p):
+ '''instruction : noarg
+ | onearg
+ | twoarg
+ | jumpdest
+ '''
+ # | invalid_arg
+ print(f"INS: {p[1]}")
+ p[0] = p[1]
+ pass
+
+def p_jumpdest(p):
+ '''jumpdest : SYMBOL COLON'''
+ p[0] = (p[1], )
+
+def p_twoarg(p):
+ '''twoarg : op REG COMMA argument COMMA argument'''
+ p[0] = (*p[1], p[2], p[4], p[6])
+ pass
+
+def p_argument(p):
+ '''argument : number
+ | REG
+ | SYMBOL
+ '''
+ pass
+
+def p_onearg(p):
+ '''onearg : op REG COMMA argument'''
+ p[0] = (*p[1], p[2], p[4], None)
+ pass
+
+def p_noarg(p):
+ '''noarg : op'''
+ p[0] = (*p[1], None, None, None)
+ pass
+
+def p_invalid_arg(p):
+ '''invalid_arg : op argument'''
+ print(f"Invalid opcode, destination missing: {p[1:]}")
+ sys.exit(1)
+ pass
+
+def p_op(p):
+ '''op : OP DOT JUMP
+ | OP
+ '''
+ if len(p) == 2:
+ p[0] = (p[1], None)
+ else:
+ p[0] = (p[1], p[3])
+
+
+def p_number(p):
+ '''number : NUMBER
+ | HEXNUMBER
+ '''
+ p[0] = p[1]
+
+parser = yacc.yacc()
diff --git a/nandgame/assembler/tlexer.py b/nandgame/assembler/tlexer.py
new file mode 100644
index 0000000..6203b1a
--- /dev/null
+++ b/nandgame/assembler/tlexer.py
@@ -0,0 +1,14 @@
+import sys
+
+from lexer import lexer
+
+with open(sys.argv[1], 'rb') as f:
+ data = f.read()
+ data2 = data.decode('ascii')
+ lexer.input(data2)
+
+while True:
+ tok = lexer.token()
+ if not tok:
+ break # No more input
+ print(tok)
diff --git a/nandgame/assembler/tparser.py b/nandgame/assembler/tparser.py
new file mode 100644
index 0000000..c5b2e9f
--- /dev/null
+++ b/nandgame/assembler/tparser.py
@@ -0,0 +1,8 @@
+import sys
+
+from parser import parser
+
+with open(sys.argv[1], 'rb') as f:
+ data = f.read()
+ data2 = data.decode('ascii')
+ parser.parse(data2, tracking=True)