summaryrefslogtreecommitdiff
path: root/nandgame/assembler/py_nand_ass
diff options
context:
space:
mode:
Diffstat (limited to 'nandgame/assembler/py_nand_ass')
-rw-r--r--nandgame/assembler/py_nand_ass/__init__.py0
-rw-r--r--nandgame/assembler/py_nand_ass/assembler.py164
-rwxr-xr-xnandgame/assembler/py_nand_ass/disas.py201
-rwxr-xr-xnandgame/assembler/py_nand_ass/disas_test.py198
-rwxr-xr-xnandgame/assembler/py_nand_ass/lexer.py91
-rwxr-xr-xnandgame/assembler/py_nand_ass/parser.py184
-rw-r--r--nandgame/assembler/py_nand_ass/parser_types.py64
-rwxr-xr-xnandgame/assembler/py_nand_ass/simple_assembler.py324
8 files changed, 1226 insertions, 0 deletions
diff --git a/nandgame/assembler/py_nand_ass/__init__.py b/nandgame/assembler/py_nand_ass/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/__init__.py
diff --git a/nandgame/assembler/py_nand_ass/assembler.py b/nandgame/assembler/py_nand_ass/assembler.py
new file mode 100644
index 0000000..ee8baf0
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/assembler.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+
+from dataclasses import dataclass
+import sys
+from typing import Callable, Iterable
+
+from . import parser_types as pt
+from .simple_assembler import encode_instruction
+
+
+@dataclass
+class MnemonicInfo:
+ opcode: str
+ num_args: int
+ supports_jmp: bool
+
+
+class Program:
+
+ def __init__(self):
+ self.labels: dict[str, int] = {}
+ self.instructions: bytes = b""
+ self.pc: int = 0
+
+ def encode(self, ins: pt.Instruction):
+ arg1 = self._resolve(ins, lambda: ins.arg1)
+ arg2 = self._resolve(ins, lambda: ins.arg2)
+
+ op = encode_instruction(
+ ins.opcode,
+ ins.dest.name if ins.dest else "",
+ arg1,
+ arg2,
+ (ins.jumptarget or ""),
+ )
+ self.instructions += op.to_bytes(length=2, byteorder="little")
+
+ def _resolve(
+ self,
+ ins: pt.Instruction,
+ get_prop: Callable[[], pt.Symbol | pt.Register | pt.Immediate | str | None],
+ ):
+ arg = get_prop()
+ if isinstance(arg, pt.Symbol):
+ ret = self.labels.get(arg.name, None)
+ if ret is None:
+ raise ValueError(f"Line {ins.lineno}: Label {arg.name} not defined")
+ elif isinstance(arg, pt.Register):
+ ret = arg.name
+ elif isinstance(arg, pt.Immediate):
+ ret = arg.value
+ else:
+ ret = arg
+
+ return ret
+
+ def write_to_file(self, filename: str) -> None:
+ with open(filename, "wb") as outfile:
+ _ = outfile.write(self.instructions)
+ print(f"Output written to {filename}")
+
+
+opcode_infos: dict[str, MnemonicInfo] = {
+ "and": MnemonicInfo("and", 3, True),
+ "or": MnemonicInfo("or", 3, True),
+ "xor": MnemonicInfo("xor", 3, True),
+ "not": MnemonicInfo("not", 2, True),
+ "mov": MnemonicInfo("mov", 2, True),
+ "add": MnemonicInfo("add", 3, True),
+ "inc": MnemonicInfo("inc", 2, True),
+ "sub": MnemonicInfo("sub", 3, True),
+ "dec": MnemonicInfo("dec", 2, True),
+ "cmp": MnemonicInfo("cmp", 2, True),
+ "neg": MnemonicInfo("neg", 2, True),
+ "hlt": MnemonicInfo("hlt", 0, False),
+ "nop": MnemonicInfo("nop", 0, False),
+}
+
+
+def get_op_info(instruction: pt.Instruction) -> MnemonicInfo | None:
+ """Get information about a given opcode in a instruction."""
+ return opcode_infos.get(instruction.opcode, None)
+
+
+def check_instructions(
+ instructions: Iterable[pt.AsmLine],
+) -> Iterable[pt.ErrorInstruction]:
+ """Given an iterable of assembler lines, check for errors."""
+ for ins in instructions:
+ # If instruction already is an error generated by the parser, just return that.
+ if isinstance(ins, pt.ErrorInstruction):
+ yield ins
+ continue
+
+ if not isinstance(ins, pt.Instruction):
+ continue
+
+ if (
+ ins.arg1 is not None
+ and ins.arg2 is not None
+ and not isinstance(ins.arg1, pt.Register)
+ and not isinstance(ins.arg2, pt.Register)
+ ):
+ yield pt.ErrorInstruction(
+ lineno=ins.lineno,
+ opcode=ins.opcode,
+ error_message="At least one argument must be a register.",
+ )
+
+ opinfo = get_op_info(ins)
+ if opinfo is None:
+ yield pt.ErrorInstruction(
+ lineno=ins.lineno,
+ opcode=ins.opcode,
+ error_message="Unknown instruction",
+ )
+ continue
+
+ if opinfo.num_args != ins.num_args:
+ yield pt.ErrorInstruction(
+ lineno=ins.lineno,
+ opcode=ins.opcode,
+ error_message=f"Expected {opinfo.num_args} args, got {ins.num_args}.",
+ )
+
+ if not opinfo.supports_jmp and ins.jumptarget:
+ yield pt.ErrorInstruction(
+ lineno=ins.lineno,
+ opcode=ins.opcode,
+ error_message="OPcode got a jump, but it's not supported here.",
+ )
+
+
+def assemble(instructions: Iterable[pt.AsmLine]) -> Program:
+ prog = Program()
+
+ prog.pc = 0
+ # first pass: populate symbols
+ for ins in instructions:
+ match ins:
+ case pt.JumpTarget():
+ lblname = ins.label.name
+ if lblname in prog.labels:
+ print(
+ f"WARNING: Label {lblname} redefined on line {ins.lineno}. Using previous definition.",
+ file=sys.stderr,
+ )
+ else:
+ prog.labels[lblname] = prog.pc
+ case pt.Instruction():
+ prog.pc += 1
+ case _:
+ pass
+
+ prog.pc = 0
+ # second pass: assemble with resolve
+ for ins in instructions:
+ match ins:
+ case pt.Instruction():
+ prog.encode(ins)
+ prog.pc += 1
+ case _:
+ pass
+ return prog
diff --git a/nandgame/assembler/py_nand_ass/disas.py b/nandgame/assembler/py_nand_ass/disas.py
new file mode 100755
index 0000000..116aea4
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/disas.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+"""
+Disassembler for nandgame.
+
+Using my own flavor of assembly language.
+I don't like the "C-style" one nandgame introduces.
+"""
+
+import sys
+
+ZERO = "#0"
+DEST_NONE = "_"
+JUMP_NONE = ""
+JUMPS_IF_NZERO = ["jgt", "jlt", "jne", "jmp"]
+JUMPS_IF_ZERO = ["jge", "jle", "jeq", "jmp"]
+
+ENDIANNESS = "little"
+
+
+def decode_jump(ins: int) -> str:
+ if (ins & 0x7) == 0:
+ return JUMP_NONE
+
+ if (ins & 0x7) == 0x7:
+ return "jmp"
+
+ jl = (ins & (1 << 2)) != 0
+ je = (ins & (1 << 1)) != 0
+ jg = (ins & (1 << 0)) != 0
+
+ # implied: and not jg
+ if jl and je:
+ return "jle"
+ # implied: and not je
+ if jl and jg:
+ return "jne"
+ # implied: and not je
+ if je and jg:
+ return "jge"
+
+ # implied: only one flag is 1
+ if jl:
+ return "jlt"
+ if je:
+ return "jeq"
+ if jg:
+ return "jgt"
+
+ return "<unknown>"
+
+
+# return op, and whether it's a one-op or two-op
+def decode_ins(ins: int) -> tuple[str, bool]:
+ opcode = (ins >> 8) & 0x03
+ ar_n_log = (ins & (1 << 10)) != 0
+ opcode |= ar_n_log << 2
+
+ if opcode == 0b000:
+ return "and", True
+ if opcode == 0b001:
+ return "or", True
+ if opcode == 0b010:
+ return "xor", True
+ if opcode == 0b011:
+ return "not", False
+ if opcode == 0b100:
+ return "add", True
+ if opcode == 0b101:
+ return "inc", False
+ if opcode == 0b110:
+ return "sub", True
+ if opcode == 0b111:
+ return "dec", False
+
+ return "<?>", False
+
+
+# normally, X = arg1 = D
+def decode_arg1(ins: int) -> str:
+ use_mem = (ins & (1 << 12)) != 0
+ zx = (ins & (1 << 7)) != 0
+ sw = (ins & (1 << 6)) != 0
+
+ if zx:
+ return ZERO
+ if not sw:
+ return "D"
+ return "M" if use_mem else "A"
+
+
+# normally, Y = arg2 = A
+def decode_arg2(ins: int) -> str:
+ use_mem = (ins & (1 << 12)) != 0
+
+ # don't care, only X is zeroed
+ # zx = (ins & (1 << 7)) != 0
+ sw = (ins & (1 << 6)) != 0
+
+ if sw:
+ return "D"
+ return "M" if use_mem else "A"
+
+
+def decode_dest(ins: int) -> str:
+ dA = (ins & (1 << 5)) != 0
+ dD = (ins & (1 << 4)) != 0
+ dM = (ins & (1 << 3)) != 0
+ dest = ""
+ if dA:
+ dest += "A"
+ if dD:
+ dest += "D"
+ if dM:
+ dest += "M"
+
+ return dest if dest else DEST_NONE
+
+
+def decode_instruction(ins: int) -> list[str]:
+ """
+ Will return a 5 element list/tuple/whatever
+ mnemonic, destination, X, Y, jumpdest
+ """
+ if ins & 0x8000 == 0:
+ # mov? ldr? ldi? aaaaaaaaaaa....
+ return ["mov", "A", f"#{ins}", "", ""]
+ else:
+ mnemonic, two_op = decode_ins(ins)
+ dest = decode_dest(ins)
+ op1 = decode_arg1(ins)
+ op2 = decode_arg2(ins) if two_op else ""
+ jumpdest = decode_jump(ins)
+ return [mnemonic, dest, op1, op2, jumpdest]
+
+
+def fixup_ins(ins: int) -> list[str]:
+ (mnemonic, dest, op1, op2, jumpdest) = decode_instruction(ins)
+ # fixups
+ if op1 == ZERO:
+ # subtract something from #0 - subtraction
+ if mnemonic == "sub":
+ return ["neg", dest, op2, "", jumpdest]
+
+ # 0 AND something = 0
+ if mnemonic == "and":
+ # if no dest, only jump matters
+ if dest == DEST_NONE:
+ # jump always or jump-if-zero --> always jump
+ if jumpdest in JUMPS_IF_ZERO:
+ return ["jmp", "", "", "", ""]
+ # all other jumps? <, >, <>, nojmp
+ else:
+ return ["nop", "", "", "", ""]
+ else:
+ if jumpdest in JUMPS_IF_ZERO:
+ newjmp = "jmp"
+ else:
+ newjmp = ""
+ return ["mov", dest, ZERO, "", newjmp]
+
+ # 0 +/|/^ something = something
+ if mnemonic in ["add", "or", "xor"]:
+ if dest == DEST_NONE and jumpdest == JUMP_NONE:
+ return ["nop", "", "", "", ""]
+ else:
+ return ["mov", dest, op2, "", jumpdest]
+
+ # basically, not 0 == 0xFFFF....
+ # opposite of what AND is doing?
+ if mnemonic == "not":
+ # if no dest, only jump matters
+ if dest == DEST_NONE:
+ # 0xFFFF is not jgt, since highest bit is always signed.
+ if jumpdest in ["jeq", "jgt", "jge", JUMP_NONE]:
+ return ["nop", "", "", "", ""]
+ else:
+ return ["jmp", "", "", "", ""]
+ elif dest == DEST_NONE:
+ if mnemonic == "sub":
+ return ["cmp", "", op1, op2, jumpdest]
+
+ return [mnemonic, dest, op1, op2, jumpdest]
+
+
+def print_decoded(ins: int, simplify: bool) -> str:
+ # illegal instruction
+ if ins & 0xC000 == 0x8000 and simplify:
+ return "halt"
+
+ if simplify:
+ (mnemonic, dest, op1, op2, jumpdest) = fixup_ins(ins)
+ else:
+ (mnemonic, dest, op1, op2, jumpdest) = decode_instruction(ins)
+
+ jumpdest_str = f".{jumpdest}" if jumpdest else ""
+ opcode_str = f"{mnemonic}{jumpdest_str}"
+ dest_str = f"{dest}, " if dest else 7 * " "
+ op2_str = ", " if op2 else ""
+ op1_str = f"{op1}{op2_str}"
+ return f"{opcode_str:<9}{dest_str:<6}{op1_str:<4}{op2}"
diff --git a/nandgame/assembler/py_nand_ass/disas_test.py b/nandgame/assembler/py_nand_ass/disas_test.py
new file mode 100755
index 0000000..ed4430c
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/disas_test.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+
+"""
+Test bench for dsassembler.
+"""
+
+import unittest
+
+from .disas import decode_instruction as di
+
+
+def make_instruction(ins):
+ ins <<= 6
+ ins |= 0x8000
+ return ins
+
+
+def dbglog(message: str):
+ print("\n", end="")
+ print(message, end="")
+
+
+class DisasUnitTest(unittest.TestCase):
+ # from nandgame
+ # opcode | u | op | op0 | zx | sw
+ # D+A | 1 | 0 | 0 | 0 | 0
+ def test_add(self):
+ ins = 0b10000
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "add")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "A")
+
+ # D-A | 1 | 1 | 0 | 0 | 0
+ def test_sub1(self):
+ ins = 0b11000
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "sub")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "A")
+
+ # A-D | 1 | 1 | 0 | 0 | 1
+ def test_sub2(self):
+ ins = 0b11001
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "sub")
+ self.assertEqual(X, "A")
+ self.assertEqual(Y, "D")
+
+ # D+1 | 1 | 0 | 1 | 0 | 0
+ def test_incD(self):
+ ins = 0b10100
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "inc")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "")
+
+ # A+1 | 1 | 0 | 1 | 0 | 1
+ def test_incA(self):
+ ins = 0b10101
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "inc")
+ self.assertEqual(X, "A")
+ self.assertEqual(Y, "")
+
+ # D-1 | 1 | 1 | 1 | 0 | 0
+ def test_decD(self):
+ ins = 0b11100
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "dec")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "")
+
+ # A-1 | 1 | 1 | 1 | 0 | 1
+ def test_decA(self):
+ ins = 0b11101
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "dec")
+ self.assertEqual(X, "A")
+ self.assertEqual(Y, "")
+
+ # -D | 1 | 1 | 0 | 1 | 1
+ def test_minusD(self):
+ ins = 0b11011
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "sub")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "D")
+
+ # -A | 1 | 1 | 0 | 1 | 0
+ def test_minusA(self):
+ ins = 0b11010
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "sub")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "A")
+
+ # -1 | 1 | 1 | 1 | 1 | 0
+ def test_minusOne(self):
+ ins = 0b11110
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "dec")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "")
+
+ # 1 | 1 | 0 | 1 | 1 | 0
+ def test_plusOne(self):
+ ins = 0b10110
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "inc")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "")
+
+ # D | 1 | 0 | 0 | 1 | 1
+ def test_D(self):
+ ins = 0b10011
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "add")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "D")
+
+ # A | 1 | 0 | 0 | 1 | 0
+ def test_A(self):
+ ins = 0b10010
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "add")
+ self.assertEqual(X, "0")
+ self.assertEqual(Y, "A")
+
+ # D&A | 0 | 0 | 0 | 0 | 0
+ def test_DandA(self):
+ ins = 0b00000
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "and")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "A")
+
+ # D|A | 0 | 0 | 1 | 0 | 0
+ def test_DorA(self):
+ ins = 0b00100
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "or")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "A")
+
+ # ~D | 0 | 1 | 1 | 0 | 0
+ def test_negD(self):
+ ins = 0b01100
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "neg")
+ self.assertEqual(X, "D")
+ self.assertEqual(Y, "")
+
+ # ~A | 0 | 1 | 1 | 0 | 1
+ def test_negA(self):
+ ins = 0b01101
+ ins = make_instruction(ins)
+ (mnemonic, _, X, Y, _) = di(ins)
+ dbglog(f" {mnemonic} {X}, {Y}")
+ self.assertEqual(mnemonic, "neg")
+ self.assertEqual(X, "A")
+ self.assertEqual(Y, "")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/nandgame/assembler/py_nand_ass/lexer.py b/nandgame/assembler/py_nand_ass/lexer.py
new file mode 100755
index 0000000..e55ab05
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/lexer.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+import ply.lex as lex
+
+# List of token names. This is always required
+tokens = (
+ "OP",
+ "JUMP",
+ "COMMA",
+ "COLON",
+ "SYMBOL",
+ "NUMBER",
+ "HEXNUMBER",
+ "DOT",
+ "REG",
+ "NL",
+)
+
+# Regular expression rules for simple tokens
+t_COMMA = r","
+t_COLON = r":"
+t_DOT = r"\."
+
+
+def t_OP(t):
+ r"and|or|xor|not|mov|add|inc|sub|dec|cmp|neg|hlt|nop"
+ return t
+
+
+def t_REG(t):
+ r"\b(AD?M?|DM?|M|_)\b"
+ return t
+
+
+def t_JUMP(t):
+ r"jmp|jlt|jgt|jle|jge|jeq|jne"
+ return t
+
+
+def t_NUMBER(t):
+ r"\#\d+"
+ t.value = int(t.value[1:])
+ return t
+
+
+def t_HEXNUMBER(t):
+ r"\#0x[0-9a-fA-F]+"
+ t.value = int(t.value[1:], 16)
+ return t
+
+
+def t_SYMBOL(t):
+ r"[a-z][A-Za-z0-9_]+"
+ return t
+
+
+# Define a rule so we can track line numbers
+def t_NL(t):
+ r"\n+"
+ t.lexer.lineno += len(t.value)
+ return t
+
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = " \t"
+# t_ignore_COMMENT = r';.*'
+
+
+def t_COMMENT(t):
+ r";.*"
+ pass
+
+
+# Error handling rule
+def t_error(t):
+ print("!!! Illegal character '%s'" % t.value[0])
+ t.lexer.skip(1)
+
+
+# EOF handling rule
+def t_eof(t):
+ if not t.lexer.newline_added:
+ t.lexer.input("\n")
+ t.lexer.newline_added = True
+ return t.lexer.token()
+ return None
+
+
+# Build the lexer
+lexer = lex.lex()
+lexer.newline_added = False
diff --git a/nandgame/assembler/py_nand_ass/parser.py b/nandgame/assembler/py_nand_ass/parser.py
new file mode 100755
index 0000000..7cc4930
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/parser.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+import sys
+from typing import Any, MutableSequence, Tuple
+
+import ply.yacc as yacc
+from ply.lex import LexToken
+
+# Get the token map from the lexer. This is required.
+from .lexer import tokens
+
+from . import parser_types as pt
+
+
+P = MutableSequence[Any]
+
+
+def p_program(p: P) -> None:
+ """program : instruction_list
+ | empty
+ """
+ p[0] = p[1]
+
+
+def p_empty(p: P) -> None:
+ """empty :"""
+ pass
+
+
+def p_instructions(p: P) -> None:
+ """instruction_list : instruction_list line
+ | line
+ """
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1] + [p[2]]
+ pass
+
+
+# # try right-recursive?
+# def p_instructions2(p: P) -> None:
+# """instruction_list2 : line instruction_list2
+# | line
+# """
+# if len(p) == 2:
+# p[0] = [p[1]]
+# else:
+# p[0] = [p[1]] + p[2]
+
+# print(f"INSes2: {len(p)}")
+# print(f" {p[1]}")
+# if len(p) > 2:
+# print(f" {p[2]}")
+# pass
+
+
+def p_line(p: P) -> None:
+ """line : instruction NL
+ | jumpdest NL
+ | NL
+ """
+ # | instruction error NL
+ # | jumpdest error NL
+ if len(p) == 2:
+ pass
+ elif len(p) == 3:
+ p[0] = p[1]
+ print(f"Item: {p[0]}")
+ # if error handling
+ else:
+ p[0] = p[1]
+ assert False
+
+
+def p_instruction(p: P) -> None:
+ """instruction : valid_instruction
+ | one_arg_invalid
+ """
+ p[0] = p[1]
+
+
+def p_valid_instruction(p: P) -> None:
+ """valid_instruction : no_args
+ | two_arg
+ | three_arg
+ """
+ tp: Tuple[Any, Any, Any, Any, Any] = p[1]
+ p[0] = pt.Instruction(p.lineno(1), *tp)
+
+
+def p_jumpdest(p: P) -> None:
+ """jumpdest : symbol COLON"""
+ p[0] = pt.JumpTarget(lineno=p.lineno(1), label=p[1])
+
+
+def p_no_arg(p: P) -> None:
+ """no_args : opcode"""
+ p[0] = (*p[1], None, None, None)
+ pass
+
+
+def p_onearg_invalid(p: P) -> None:
+ """one_arg_invalid : opcode argument"""
+ op: tuple[str, str] = p[1]
+ p[0] = pt.ErrorInstruction(
+ p.lineno(1), op[1], "No opcode only supports one argument."
+ )
+ pass
+
+
+def p_two_arg(p: P) -> None:
+ """two_arg : opcode register COMMA argument"""
+ p[0] = (*p[1], p[2], p[4], None)
+ pass
+
+
+def p_three_arg(p: P) -> None:
+ """three_arg : opcode register COMMA argument COMMA argument"""
+ p[0] = (*p[1], p[2], p[4], p[6])
+ pass
+
+
+# checks which combinations are allowed is done one level up
+def p_argument(p: P) -> None:
+ """argument : number
+ | register
+ | symbol
+ """
+ p[0] = p[1]
+
+
+def p_symbol(p: P) -> None:
+ """symbol : SYMBOL"""
+ p[0] = pt.Symbol(p[1])
+
+
+def p_register(p: P) -> None:
+ """register : REG"""
+ p[0] = pt.Register(p[1])
+
+
+def p_opcode(p: P) -> None:
+ """opcode : opcode_jmp
+ | opcode_njmp
+ """
+ p[0] = p[1]
+
+
+def p_opcode_jmp(p: P) -> None:
+ """opcode_jmp : OP DOT JUMP"""
+ p[0] = (p[1], p[3])
+
+
+def p_opcode_njmp(p: P) -> None:
+ """opcode_njmp : OP"""
+ p[0] = (p[1], None)
+
+
+def p_number(p: P) -> None:
+ """number : NUMBER
+ | HEXNUMBER
+ """
+ p[0] = pt.Immediate(p[1])
+
+
+def p_error(p: LexToken | None) -> LexToken|None:
+ if p:
+ print(
+ f"WARNING: Unexpected {repr(p.value)} on line {p.lineno}", file=sys.stderr
+ )
+ else:
+ print("WARNING: Unexpected end of file.", file=sys.stderr)
+ return
+
+ while True:
+ tok: LexToken = parser.token()
+ if not tok or tok.type == 'NL':
+ break
+ parser.errok()
+ return tok
+
+
+parser: yacc.LRParser = yacc.yacc()
diff --git a/nandgame/assembler/py_nand_ass/parser_types.py b/nandgame/assembler/py_nand_ass/parser_types.py
new file mode 100644
index 0000000..3dd44b9
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/parser_types.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Symbol:
+ """Define a symbol"""
+
+ name: str
+
+
+@dataclass
+class Immediate:
+ """An immediate value loaded in a register."""
+
+ value: int
+
+
+@dataclass
+class Address:
+ """An address (used in 2nd stage assembler)."""
+
+ value: int
+
+
+@dataclass
+class Register:
+ """Define a source or dest register"""
+
+ name: str
+
+
+@dataclass
+class AsmLine:
+ lineno: int
+
+
+@dataclass
+class Instruction(AsmLine):
+ opcode: str
+ jumptarget: str
+ dest: Register | None
+ arg1: Symbol | Register | Immediate | None
+ arg2: Symbol | Register | Immediate | None
+
+ @property
+ def num_args(self) -> int:
+ if self.dest is None:
+ return 0
+ if self.arg1 is None:
+ return 1
+ if self.arg2 is None:
+ return 2
+ return 3
+
+
+@dataclass
+class ErrorInstruction(AsmLine):
+ opcode: str
+ error_message: str
+
+
+@dataclass
+class JumpTarget(AsmLine):
+ label: Symbol
diff --git a/nandgame/assembler/py_nand_ass/simple_assembler.py b/nandgame/assembler/py_nand_ass/simple_assembler.py
new file mode 100755
index 0000000..d684635
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/simple_assembler.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python3
+
+###
+# LLM generated
+###
+
+"""
+Assembler for nandgame, matching the custom disassembler above.
+
+Syntax (as produced by print_decoded):
+
+ mnemonic[.jump] DEST, OP1[, OP2]
+
+Examples:
+
+ mov A, #123
+ add.jgt D, D, A
+ sub _, D, M
+ not D, D
+ inc.jeq D, D
+ and _, D, M
+ xor M, D, M
+
+DEST:
+ A, D, M, any combination like AD, AM, DM, ADM, or "_" for no destination.
+
+OP1 / OP2:
+ D, A, M, #0 (for OP1 only), or #<number> for mov A,#imm (A-instruction).
+
+Jumps:
+ jlt, jle, jeq, jne, jgt, jge, jmp, or none.
+"""
+
+import sys
+from typing import Union
+
+from . import parser_types as pt
+
+ZERO = "#0"
+DEST_NONE = "_"
+JUMP_NONE = ""
+ENDIANNESS = "little"
+
+# mapping from mnemonic to (opcode, two_op)
+MNEMONICS = {
+ "and": (0b000, True),
+ "or": (0b001, True),
+ "xor": (0b010, True),
+ "not": (0b011, False),
+ "add": (0b100, True),
+ "inc": (0b101, False),
+ "sub": (0b110, True),
+ "dec": (0b111, False),
+}
+
+# jump mnemonic -> bits 0..2
+JUMP_ENCODE = {
+ "": 0b000,
+ "jgt": 0b001,
+ "jeq": 0b010,
+ "jge": 0b011,
+ "jlt": 0b100,
+ "jne": 0b101,
+ "jle": 0b110,
+ "jmp": 0b111,
+}
+
+
+def encode_dest(dest: str) -> int:
+ """
+ dest is something like "A", "D", "M", "AD", "ADM", or "_" for none.
+ Returns bits for A,D,M in positions 5,4,3.
+ """
+ dest = dest.strip()
+ if dest == DEST_NONE:
+ return 0
+
+ bits = 0
+ if "A" in dest:
+ bits |= 1 << 5
+ if "D" in dest:
+ bits |= 1 << 4
+ if "M" in dest:
+ bits |= 1 << 3
+ return bits
+
+
+def encode_jump(jump: str) -> int:
+ jump = jump.strip()
+ if jump not in JUMP_ENCODE:
+ raise ValueError(f"Unknown jump condition: {jump}")
+ return JUMP_ENCODE[jump]
+
+
+def encode_args_two_op(op1: str, op2: str) -> int:
+ """
+ For two-operand instructions, find zx, sw, use_mem bits that reproduce
+ the given op1/op2 under decode_arg1/decode_arg2.
+
+ op1 in {D, A, M, #0}
+ op2 in {D, A, M}
+ """
+ op1 = op1.strip()
+ op2 = op2.strip()
+
+ # brute-force all combinations of zx, sw, use_mem and pick the one that matches
+ for zx in (0, 1):
+ for sw in (0, 1):
+ for use_mem in (0, 1):
+ # simulate decode_arg1/2
+ if zx:
+ dec_op1 = ZERO
+ else:
+ if not sw:
+ dec_op1 = "D"
+ else:
+ dec_op1 = "M" if use_mem else "A"
+
+ if sw:
+ dec_op2 = "D"
+ else:
+ dec_op2 = "M" if use_mem else "A"
+
+ if dec_op1 == op1 and dec_op2 == op2:
+ bits = 0
+ if use_mem:
+ bits |= 1 << 12
+ if zx:
+ bits |= 1 << 7
+ if sw:
+ bits |= 1 << 6
+ return bits
+
+ raise ValueError(f"Unsupported operand combination for two-op: {op1}, {op2}")
+
+
+def encode_args_one_op(op1: str) -> int:
+ """
+ For one-operand instructions, only decode_arg1 matters.
+ We choose canonical encodings:
+
+ D -> zx=0, sw=0
+ A -> zx=0, sw=1, use_mem=0
+ M -> zx=0, sw=1, use_mem=1
+ #0 -> zx=1
+ """
+ op1 = op1.strip()
+ bits = 0
+
+ if op1 == ZERO:
+ bits |= 1 << 7 # zx
+ # sw/use_mem don't matter for arg1 when zx=1, but keep them 0
+ return bits
+
+ if op1 == "D":
+ # zx=0, sw=0, use_mem=0
+ return bits
+
+ if op1 == "A":
+ bits |= 1 << 6 # sw=1
+ # use_mem=0
+ return bits
+
+ if op1 == "M":
+ bits |= 1 << 6 # sw=1
+ bits |= 1 << 12 # use_mem=1
+ return bits
+
+ raise ValueError(f"Unsupported operand for one-op: {op1}")
+
+#Arg = Union[str, int, pt.Address, pt.Immediate, pt.Register]
+Arg = Union[str, int, None]
+
+def encode_instruction(
+ mnemonic: str, dest: str, op1: Arg, op2: Arg, jump: str
+) -> int:
+ """
+ Encode a single instruction into a 16-bit integer.
+ """
+ mnemonic = mnemonic.strip()
+
+ if mnemonic == "hlt":
+ return 0xFFFF & ~0x4000
+
+ dest = dest.strip()
+ if op1 is None:
+ op1 = ""
+ elif isinstance(op1, int):
+ op1 = f"#{op1}"
+ else:
+ op1 = str(op1).strip()
+
+ if op2 is None:
+ op2 = ""
+ elif isinstance(op2, int):
+ op2 = f"#{op2}"
+ else:
+ op2 = str(op2).strip()
+
+ jump = jump.strip()
+
+ # A-instruction: mov A, #imm
+ if mnemonic == "mov":
+ if dest == "A" and op1.startswith("#") and not op2 and not jump:
+ imm_str = op1[1:]
+ if imm_str.startswith("0x") or imm_str.startswith("0X"):
+ value = int(imm_str, 16)
+ else:
+ value = int(imm_str, 10)
+ if not (0 <= value < 0x8000):
+ raise ValueError(f"Immediate out of range (0..32767): {value}")
+ return value & 0x7FFF
+ else:
+ raise ValueError("Invalid args to mov.")
+
+ # C-instruction
+ if mnemonic in MNEMONICS:
+ pass
+ elif mnemonic == "nop":
+ mnemonic = "and"
+ dest = "_"
+ op1 = "#0"
+ op2 = "A"
+ jump = ""
+ else:
+ raise ValueError(f"Unknown mnemonic: {mnemonic}")
+
+ opcode, two_op = MNEMONICS[mnemonic]
+
+ # bit 14, 15 = 1
+ ins = 0xC000
+
+ # opcode bits: low 2 bits in 8..9, high bit in 10 (ar_n_log)
+ low2 = opcode & 0b11
+ high1 = (opcode >> 2) & 0b1
+ ins |= low2 << 8
+ if high1:
+ ins |= 1 << 10
+
+ # dest bits
+ ins |= encode_dest(dest)
+
+ # jump bits
+ ins |= encode_jump(jump)
+
+ # arg bits
+ if two_op:
+ if not op2:
+ raise ValueError(f"Two-op instruction {mnemonic} requires two operands")
+ ins |= encode_args_two_op(op1, op2)
+ else:
+ if not op1:
+ raise ValueError(f"One-op instruction {mnemonic} requires one operand")
+ ins |= encode_args_one_op(op1)
+
+ return ins
+
+
+def parse_line(line: str):
+ """
+ Parse a single assembly line into (mnemonic, dest, op1, op2, jump).
+ Returns None if the line is empty or comment.
+ """
+ # strip comments starting with ';' or '#'
+ for sep in ";":
+ idx = line.find(sep)
+ if idx != -1:
+ line = line[:idx]
+ line = line.strip()
+ if not line:
+ return None
+
+ # first token: mnemonic[.jump]
+ parts = line.split(None, 1)
+ if not parts:
+ return None
+ opcode_part = parts[0]
+ rest = parts[1] if len(parts) > 1 else ""
+
+ if "." in opcode_part:
+ mnemonic, jump = opcode_part.split(".", 1)
+ else:
+ mnemonic, jump = opcode_part, ""
+
+ # operands: dest, op1[, op2]
+ dest = ""
+ op1 = ""
+ op2 = ""
+
+ if rest:
+ ops = [o.strip() for o in rest.split(",")]
+ ops = [o for o in ops if o] # remove empty
+ if len(ops) >= 1:
+ dest = ops[0]
+ if len(ops) >= 2:
+ op1 = ops[1]
+ if len(ops) >= 3:
+ op2 = ops[2]
+ if len(ops) > 3:
+ raise ValueError(f"Too many operands: {rest}")
+
+ # normalize no-dest
+ if dest == "":
+ dest = DEST_NONE
+
+ return mnemonic, dest, op1, op2, jump
+
+
+def assemble_file(in_filename: str, out_filename: str):
+ with open(in_filename, "r", encoding="ascii") as fin, open(
+ out_filename, "wb"
+ ) as fout:
+ lineno = 0
+ for line in fin:
+ lineno += 1
+ try:
+ parsed = parse_line(line)
+ if parsed is None:
+ continue
+ mnemonic, dest, op1, op2, jump = parsed
+ ins = encode_instruction(mnemonic, dest, op1, op2, jump)
+ fout.write(ins.to_bytes(2, byteorder=ENDIANNESS))
+ except Exception as e:
+ raise SystemExit(f"{in_filename}:{lineno}: {e}") from e