Restructure asembler as package

author: uvok 2026-01-14 20:44:16 +0100
committer: uvok 2026-01-14 20:44:16 +0100
commit: 1561eff8780dc15dc5ea46d7225cc49a46f709ca (patch)
tree: 130d44ef295ff2113fc56c592a78780035449dff /nandgame/assembler/py_nand_ass
parent: 281414ea9b42e213b85b95b7072b73d1f1e3f240 (diff)
8 files changed, 1226 insertions, 0 deletions
diff --git a/nandgame/assembler/py_nand_ass/__init__.py b/nandgame/assembler/py_nand_ass/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/__init__.py
diff --git a/nandgame/assembler/py_nand_ass/assembler.py b/nandgame/assembler/py_nand_ass/assembler.py
new file mode 100644
index 0000000..ee8baf0
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/assembler.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+
+from dataclasses import dataclass
+import sys
+from typing import Callable, Iterable
+
+from . import parser_types as pt
+from .simple_assembler import encode_instruction
+
+
+@dataclass
+class MnemonicInfo:
+    opcode: str
+    num_args: int
+    supports_jmp: bool
+
+
+class Program:
+
+    def __init__(self):
+        self.labels: dict[str, int] = {}
+        self.instructions: bytes = b""
+        self.pc: int = 0
+
+    def encode(self, ins: pt.Instruction):
+        arg1 = self._resolve(ins, lambda: ins.arg1)
+        arg2 = self._resolve(ins, lambda: ins.arg2)
+
+        op = encode_instruction(
+            ins.opcode,
+            ins.dest.name if ins.dest else "",
+            arg1,
+            arg2,
+            (ins.jumptarget or ""),
+        )
+        self.instructions += op.to_bytes(length=2, byteorder="little")
+
+    def _resolve(
+        self,
+        ins: pt.Instruction,
+        get_prop: Callable[[], pt.Symbol | pt.Register | pt.Immediate | str | None],
+    ):
+        arg = get_prop()
+        if isinstance(arg, pt.Symbol):
+            ret = self.labels.get(arg.name, None)
+            if ret is None:
+                raise ValueError(f"Line {ins.lineno}: Label {arg.name} not defined")
+        elif isinstance(arg, pt.Register):
+            ret = arg.name
+        elif isinstance(arg, pt.Immediate):
+            ret = arg.value
+        else:
+            ret = arg
+
+        return ret
+
+    def write_to_file(self, filename: str) -> None:
+        with open(filename, "wb") as outfile:
+            _ = outfile.write(self.instructions)
+        print(f"Output written to {filename}")
+
+
+opcode_infos: dict[str, MnemonicInfo] = {
+    "and": MnemonicInfo("and", 3, True),
+    "or": MnemonicInfo("or", 3, True),
+    "xor": MnemonicInfo("xor", 3, True),
+    "not": MnemonicInfo("not", 2, True),
+    "mov": MnemonicInfo("mov", 2, True),
+    "add": MnemonicInfo("add", 3, True),
+    "inc": MnemonicInfo("inc", 2, True),
+    "sub": MnemonicInfo("sub", 3, True),
+    "dec": MnemonicInfo("dec", 2, True),
+    "cmp": MnemonicInfo("cmp", 2, True),
+    "neg": MnemonicInfo("neg", 2, True),
+    "hlt": MnemonicInfo("hlt", 0, False),
+    "nop": MnemonicInfo("nop", 0, False),
+}
+
+
+def get_op_info(instruction: pt.Instruction) -> MnemonicInfo | None:
+    """Get information about a given opcode in a instruction."""
+    return opcode_infos.get(instruction.opcode, None)
+
+
+def check_instructions(
+    instructions: Iterable[pt.AsmLine],
+) -> Iterable[pt.ErrorInstruction]:
+    """Given an iterable of assembler lines, check for errors."""
+    for ins in instructions:
+        # If instruction already is an error generated by the parser, just return that.
+        if isinstance(ins, pt.ErrorInstruction):
+            yield ins
+            continue
+
+        if not isinstance(ins, pt.Instruction):
+            continue
+
+        if (
+            ins.arg1 is not None
+            and ins.arg2 is not None
+            and not isinstance(ins.arg1, pt.Register)
+            and not isinstance(ins.arg2, pt.Register)
+        ):
+            yield pt.ErrorInstruction(
+                lineno=ins.lineno,
+                opcode=ins.opcode,
+                error_message="At least one argument must be a register.",
+            )
+
+        opinfo = get_op_info(ins)
+        if opinfo is None:
+            yield pt.ErrorInstruction(
+                lineno=ins.lineno,
+                opcode=ins.opcode,
+                error_message="Unknown instruction",
+            )
+            continue
+
+        if opinfo.num_args != ins.num_args:
+            yield pt.ErrorInstruction(
+                lineno=ins.lineno,
+                opcode=ins.opcode,
+                error_message=f"Expected {opinfo.num_args} args, got {ins.num_args}.",
+            )
+
+        if not opinfo.supports_jmp and ins.jumptarget:
+            yield pt.ErrorInstruction(
+                lineno=ins.lineno,
+                opcode=ins.opcode,
+                error_message="OPcode got a jump, but it's not supported here.",
+            )
+
+
+def assemble(instructions: Iterable[pt.AsmLine]) -> Program:
+    prog = Program()
+
+    prog.pc = 0
+    # first pass: populate symbols
+    for ins in instructions:
+        match ins:
+            case pt.JumpTarget():
+                lblname = ins.label.name
+                if lblname in prog.labels:
+                    print(
+                        f"WARNING: Label {lblname} redefined on line {ins.lineno}. Using previous definition.",
+                        file=sys.stderr,
+                    )
+                else:
+                    prog.labels[lblname] = prog.pc
+            case pt.Instruction():
+                prog.pc += 1
+            case _:
+                pass
+
+    prog.pc = 0
+    # second pass: assemble with resolve
+    for ins in instructions:
+        match ins:
+            case pt.Instruction():
+                prog.encode(ins)
+                prog.pc += 1
+            case _:
+                pass
+    return prog
diff --git a/nandgame/assembler/py_nand_ass/disas.py b/nandgame/assembler/py_nand_ass/disas.py
new file mode 100755
index 0000000..116aea4
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/disas.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+"""
+Disassembler for nandgame.
+
+Using my own flavor of assembly language.
+I don't like the "C-style" one nandgame introduces.
+"""
+
+import sys
+
+ZERO = "#0"
+DEST_NONE = "_"
+JUMP_NONE = ""
+JUMPS_IF_NZERO = ["jgt", "jlt", "jne", "jmp"]
+JUMPS_IF_ZERO = ["jge", "jle", "jeq", "jmp"]
+
+ENDIANNESS = "little"
+
+
+def decode_jump(ins: int) -> str:
+    if (ins & 0x7) == 0:
+        return JUMP_NONE
+
+    if (ins & 0x7) == 0x7:
+        return "jmp"
+
+    jl = (ins & (1 << 2)) != 0
+    je = (ins & (1 << 1)) != 0
+    jg = (ins & (1 << 0)) != 0
+
+    # implied: and not jg
+    if jl and je:
+        return "jle"
+    # implied: and not je
+    if jl and jg:
+        return "jne"
+    # implied: and not je
+    if je and jg:
+        return "jge"
+
+    # implied: only one flag is 1
+    if jl:
+        return "jlt"
+    if je:
+        return "jeq"
+    if jg:
+        return "jgt"
+
+    return "<unknown>"
+
+
+# return op, and whether it's a one-op or two-op
+def decode_ins(ins: int) -> tuple[str, bool]:
+    opcode = (ins >> 8) & 0x03
+    ar_n_log = (ins & (1 << 10)) != 0
+    opcode |= ar_n_log << 2
+
+    if opcode == 0b000:
+        return "and", True
+    if opcode == 0b001:
+        return "or", True
+    if opcode == 0b010:
+        return "xor", True
+    if opcode == 0b011:
+        return "not", False
+    if opcode == 0b100:
+        return "add", True
+    if opcode == 0b101:
+        return "inc", False
+    if opcode == 0b110:
+        return "sub", True
+    if opcode == 0b111:
+        return "dec", False
+
+    return "<?>", False
+
+
+# normally, X = arg1 = D
+def decode_arg1(ins: int) -> str:
+    use_mem = (ins & (1 << 12)) != 0
+    zx = (ins & (1 << 7)) != 0
+    sw = (ins & (1 << 6)) != 0
+
+    if zx:
+        return ZERO
+    if not sw:
+        return "D"
+    return "M" if use_mem else "A"
+
+
+# normally, Y = arg2 = A
+def decode_arg2(ins: int) -> str:
+    use_mem = (ins & (1 << 12)) != 0
+
+    # don't care, only X is zeroed
+    # zx = (ins & (1 << 7)) != 0
+    sw = (ins & (1 << 6)) != 0
+
+    if sw:
+        return "D"
+    return "M" if use_mem else "A"
+
+
+def decode_dest(ins: int) -> str:
+    dA = (ins & (1 << 5)) != 0
+    dD = (ins & (1 << 4)) != 0
+    dM = (ins & (1 << 3)) != 0
+    dest = ""
+    if dA:
+        dest += "A"
+    if dD:
+        dest += "D"
+    if dM:
+        dest += "M"
+
+    return dest if dest else DEST_NONE
+
+
+def decode_instruction(ins: int) -> list[str]:
+    """
+    Will return a 5 element list/tuple/whatever
+    mnemonic, destination, X, Y, jumpdest
+    """
+    if ins & 0x8000 == 0:
+        # mov? ldr? ldi? aaaaaaaaaaa....
+        return ["mov", "A", f"#{ins}", "", ""]
+    else:
+        mnemonic, two_op = decode_ins(ins)
+        dest = decode_dest(ins)
+        op1 = decode_arg1(ins)
+        op2 = decode_arg2(ins) if two_op else ""
+        jumpdest = decode_jump(ins)
+        return [mnemonic, dest, op1, op2, jumpdest]
+
+
+def fixup_ins(ins: int) -> list[str]:
+    (mnemonic, dest, op1, op2, jumpdest) = decode_instruction(ins)
+    # fixups
+    if op1 == ZERO:
+        # subtract something from #0 - subtraction
+        if mnemonic == "sub":
+            return ["neg", dest, op2, "", jumpdest]
+
+        # 0 AND something = 0
+        if mnemonic == "and":
+            # if no dest, only jump matters
+            if dest == DEST_NONE:
+                # jump always or jump-if-zero --> always jump
+                if jumpdest in JUMPS_IF_ZERO:
+                    return ["jmp", "", "", "", ""]
+                # all other jumps? <, >, <>, nojmp
+                else:
+                    return ["nop", "", "", "", ""]
+            else:
+                if jumpdest in JUMPS_IF_ZERO:
+                    newjmp = "jmp"
+                else:
+                    newjmp = ""
+                return ["mov", dest, ZERO, "", newjmp]
+
+        # 0 +/|/^ something = something
+        if mnemonic in ["add", "or", "xor"]:
+            if dest == DEST_NONE and jumpdest == JUMP_NONE:
+                return ["nop", "", "", "", ""]
+            else:
+                return ["mov", dest, op2, "", jumpdest]
+
+        # basically, not 0 == 0xFFFF....
+        # opposite of what AND is doing?
+        if mnemonic == "not":
+            # if no dest, only jump matters
+            if dest == DEST_NONE:
+                # 0xFFFF is not jgt, since highest bit is always signed.
+                if jumpdest in ["jeq", "jgt", "jge", JUMP_NONE]:
+                    return ["nop", "", "", "", ""]
+                else:
+                    return ["jmp", "", "", "", ""]
+    elif dest == DEST_NONE:
+        if mnemonic == "sub":
+            return ["cmp", "", op1, op2, jumpdest]
+
+    return [mnemonic, dest, op1, op2, jumpdest]
+
+
+def print_decoded(ins: int, simplify: bool) -> str:
+    # illegal instruction
+    if ins & 0xC000 == 0x8000 and simplify:
+        return "halt"
+
+    if simplify:
+        (mnemonic, dest, op1, op2, jumpdest) = fixup_ins(ins)
+    else:
+        (mnemonic, dest, op1, op2, jumpdest) = decode_instruction(ins)
+
+    jumpdest_str = f".{jumpdest}" if jumpdest else ""
+    opcode_str = f"{mnemonic}{jumpdest_str}"
+    dest_str = f"{dest}, " if dest else 7 * " "
+    op2_str = ", " if op2 else ""
+    op1_str = f"{op1}{op2_str}"
+    return f"{opcode_str:<9}{dest_str:<6}{op1_str:<4}{op2}"
diff --git a/nandgame/assembler/py_nand_ass/disas_test.py b/nandgame/assembler/py_nand_ass/disas_test.py
new file mode 100755
index 0000000..ed4430c
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/disas_test.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+
+"""
+Test bench for dsassembler.
+"""
+
+import unittest
+
+from .disas import decode_instruction as di
+
+
+def make_instruction(ins):
+    ins <<= 6
+    ins |= 0x8000
+    return ins
+
+
+def dbglog(message: str):
+    print("\n", end="")
+    print(message, end="")
+
+
+class DisasUnitTest(unittest.TestCase):
+    # from nandgame
+    # opcode | u | op | op0 | zx | sw
+    # D+A    | 1 | 0  | 0   | 0  | 0
+    def test_add(self):
+        ins = 0b10000
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "add")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "A")
+
+    # D-A    | 1 | 1  | 0   | 0  | 0
+    def test_sub1(self):
+        ins = 0b11000
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "sub")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "A")
+
+    # A-D    | 1 | 1  | 0   | 0  | 1
+    def test_sub2(self):
+        ins = 0b11001
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "sub")
+        self.assertEqual(X, "A")
+        self.assertEqual(Y, "D")
+
+    # D+1    | 1 | 0  | 1   | 0  | 0
+    def test_incD(self):
+        ins = 0b10100
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "inc")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "")
+
+    # A+1    | 1 | 0  | 1   | 0  | 1
+    def test_incA(self):
+        ins = 0b10101
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "inc")
+        self.assertEqual(X, "A")
+        self.assertEqual(Y, "")
+
+    # D-1    | 1 | 1  | 1   | 0  | 0
+    def test_decD(self):
+        ins = 0b11100
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "dec")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "")
+
+    # A-1    | 1 | 1  | 1   | 0  | 1
+    def test_decA(self):
+        ins = 0b11101
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "dec")
+        self.assertEqual(X, "A")
+        self.assertEqual(Y, "")
+
+    # -D     | 1 | 1  | 0   | 1  | 1
+    def test_minusD(self):
+        ins = 0b11011
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "sub")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "D")
+
+    # -A     | 1 | 1  | 0   | 1  | 0
+    def test_minusA(self):
+        ins = 0b11010
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "sub")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "A")
+
+    # -1     | 1 | 1  | 1   | 1  | 0
+    def test_minusOne(self):
+        ins = 0b11110
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "dec")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "")
+
+    # 1      | 1 | 0  | 1   | 1  | 0
+    def test_plusOne(self):
+        ins = 0b10110
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "inc")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "")
+
+    # D      | 1 | 0  | 0   | 1  | 1
+    def test_D(self):
+        ins = 0b10011
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "add")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "D")
+
+    # A      | 1 | 0  | 0   | 1  | 0
+    def test_A(self):
+        ins = 0b10010
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "add")
+        self.assertEqual(X, "0")
+        self.assertEqual(Y, "A")
+
+    # D&A    | 0 | 0  | 0   | 0  | 0
+    def test_DandA(self):
+        ins = 0b00000
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "and")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "A")
+
+    # D|A    | 0 | 0  | 1   | 0  | 0
+    def test_DorA(self):
+        ins = 0b00100
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "or")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "A")
+
+    # ~D     | 0 | 1  | 1   | 0  | 0
+    def test_negD(self):
+        ins = 0b01100
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "neg")
+        self.assertEqual(X, "D")
+        self.assertEqual(Y, "")
+
+    # ~A     | 0 | 1  | 1   | 0  | 1
+    def test_negA(self):
+        ins = 0b01101
+        ins = make_instruction(ins)
+        (mnemonic, _, X, Y, _) = di(ins)
+        dbglog(f"    {mnemonic} {X}, {Y}")
+        self.assertEqual(mnemonic, "neg")
+        self.assertEqual(X, "A")
+        self.assertEqual(Y, "")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/nandgame/assembler/py_nand_ass/lexer.py b/nandgame/assembler/py_nand_ass/lexer.py
new file mode 100755
index 0000000..e55ab05
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/lexer.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+import ply.lex as lex
+
+# List of token names.   This is always required
+tokens = (
+    "OP",
+    "JUMP",
+    "COMMA",
+    "COLON",
+    "SYMBOL",
+    "NUMBER",
+    "HEXNUMBER",
+    "DOT",
+    "REG",
+    "NL",
+)
+
+# Regular expression rules for simple tokens
+t_COMMA = r","
+t_COLON = r":"
+t_DOT = r"\."
+
+
+def t_OP(t):
+    r"and|or|xor|not|mov|add|inc|sub|dec|cmp|neg|hlt|nop"
+    return t
+
+
+def t_REG(t):
+    r"\b(AD?M?|DM?|M|_)\b"
+    return t
+
+
+def t_JUMP(t):
+    r"jmp|jlt|jgt|jle|jge|jeq|jne"
+    return t
+
+
+def t_NUMBER(t):
+    r"\#\d+"
+    t.value = int(t.value[1:])
+    return t
+
+
+def t_HEXNUMBER(t):
+    r"\#0x[0-9a-fA-F]+"
+    t.value = int(t.value[1:], 16)
+    return t
+
+
+def t_SYMBOL(t):
+    r"[a-z][A-Za-z0-9_]+"
+    return t
+
+
+# Define a rule so we can track line numbers
+def t_NL(t):
+    r"\n+"
+    t.lexer.lineno += len(t.value)
+    return t
+
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = " \t"
+# t_ignore_COMMENT = r';.*'
+
+
+def t_COMMENT(t):
+    r";.*"
+    pass
+
+
+# Error handling rule
+def t_error(t):
+    print("!!! Illegal character '%s'" % t.value[0])
+    t.lexer.skip(1)
+
+
+# EOF handling rule
+def t_eof(t):
+    if not t.lexer.newline_added:
+        t.lexer.input("\n")
+        t.lexer.newline_added = True
+        return t.lexer.token()
+    return None
+
+
+# Build the lexer
+lexer = lex.lex()
+lexer.newline_added = False
diff --git a/nandgame/assembler/py_nand_ass/parser.py b/nandgame/assembler/py_nand_ass/parser.py
new file mode 100755
index 0000000..7cc4930
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/parser.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+import sys
+from typing import Any, MutableSequence, Tuple
+
+import ply.yacc as yacc
+from ply.lex import LexToken
+
+# Get the token map from the lexer.  This is required.
+from .lexer import tokens
+
+from . import parser_types as pt
+
+
+P = MutableSequence[Any]
+
+
+def p_program(p: P) -> None:
+    """program : instruction_list
+               | empty
+    """
+    p[0] = p[1]
+
+
+def p_empty(p: P) -> None:
+    """empty :"""
+    pass
+
+
+def p_instructions(p: P) -> None:
+    """instruction_list : instruction_list line
+                        | line
+    """
+    if len(p) == 2:
+        p[0] = [p[1]]
+    else:
+        p[0] = p[1] + [p[2]]
+    pass
+
+
+# # try right-recursive?
+# def p_instructions2(p: P) -> None:
+#     """instruction_list2 : line instruction_list2
+#                          | line
+#     """
+#     if len(p) == 2:
+#         p[0] = [p[1]]
+#     else:
+#         p[0] = [p[1]] + p[2]
+
+#     print(f"INSes2: {len(p)}")
+#     print(f"    {p[1]}")
+#     if len(p) > 2:
+#         print(f"    {p[2]}")
+#     pass
+
+
+def p_line(p: P) -> None:
+    """line : instruction NL
+            | jumpdest NL
+            | NL
+    """
+    # | instruction error NL
+    # | jumpdest error NL
+    if len(p) == 2:
+        pass
+    elif len(p) == 3:
+        p[0] = p[1]
+        print(f"Item: {p[0]}")
+    # if error handling
+    else:
+        p[0] = p[1]
+        assert False
+
+
+def p_instruction(p: P) -> None:
+    """instruction : valid_instruction
+    | one_arg_invalid
+    """
+    p[0] = p[1]
+
+
+def p_valid_instruction(p: P) -> None:
+    """valid_instruction : no_args
+                         | two_arg
+                         | three_arg
+    """
+    tp: Tuple[Any, Any, Any, Any, Any] = p[1]
+    p[0] = pt.Instruction(p.lineno(1), *tp)
+
+
+def p_jumpdest(p: P) -> None:
+    """jumpdest : symbol COLON"""
+    p[0] = pt.JumpTarget(lineno=p.lineno(1), label=p[1])
+
+
+def p_no_arg(p: P) -> None:
+    """no_args : opcode"""
+    p[0] = (*p[1], None, None, None)
+    pass
+
+
+def p_onearg_invalid(p: P) -> None:
+    """one_arg_invalid : opcode argument"""
+    op: tuple[str, str] = p[1]
+    p[0] = pt.ErrorInstruction(
+        p.lineno(1), op[1], "No opcode only supports one argument."
+    )
+    pass
+
+
+def p_two_arg(p: P) -> None:
+    """two_arg : opcode register COMMA argument"""
+    p[0] = (*p[1], p[2], p[4], None)
+    pass
+
+
+def p_three_arg(p: P) -> None:
+    """three_arg : opcode register COMMA argument COMMA argument"""
+    p[0] = (*p[1], p[2], p[4], p[6])
+    pass
+
+
+# checks which combinations are allowed is done one level up
+def p_argument(p: P) -> None:
+    """argument : number
+                | register
+                | symbol
+    """
+    p[0] = p[1]
+
+
+def p_symbol(p: P) -> None:
+    """symbol : SYMBOL"""
+    p[0] = pt.Symbol(p[1])
+
+
+def p_register(p: P) -> None:
+    """register : REG"""
+    p[0] = pt.Register(p[1])
+
+
+def p_opcode(p: P) -> None:
+    """opcode : opcode_jmp
+              | opcode_njmp
+    """
+    p[0] = p[1]
+
+
+def p_opcode_jmp(p: P) -> None:
+    """opcode_jmp : OP DOT JUMP"""
+    p[0] = (p[1], p[3])
+
+
+def p_opcode_njmp(p: P) -> None:
+    """opcode_njmp : OP"""
+    p[0] = (p[1], None)
+
+
+def p_number(p: P) -> None:
+    """number : NUMBER
+              | HEXNUMBER
+    """
+    p[0] = pt.Immediate(p[1])
+
+
+def p_error(p: LexToken | None) -> LexToken|None:
+    if p:
+        print(
+            f"WARNING: Unexpected {repr(p.value)} on line {p.lineno}", file=sys.stderr
+        )
+    else:
+        print("WARNING: Unexpected end of file.", file=sys.stderr)
+        return
+
+    while True:
+        tok: LexToken = parser.token()
+        if not tok or tok.type == 'NL':
+            break
+    parser.errok()
+    return tok
+
+
+parser: yacc.LRParser = yacc.yacc()
diff --git a/nandgame/assembler/py_nand_ass/parser_types.py b/nandgame/assembler/py_nand_ass/parser_types.py
new file mode 100644
index 0000000..3dd44b9
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/parser_types.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Symbol:
+    """Define a symbol"""
+
+    name: str
+
+
+@dataclass
+class Immediate:
+    """An immediate value loaded in a register."""
+
+    value: int
+
+
+@dataclass
+class Address:
+    """An address (used in 2nd stage assembler)."""
+
+    value: int
+
+
+@dataclass
+class Register:
+    """Define a source or dest register"""
+
+    name: str
+
+
+@dataclass
+class AsmLine:
+    lineno: int
+
+
+@dataclass
+class Instruction(AsmLine):
+    opcode: str
+    jumptarget: str
+    dest: Register | None
+    arg1: Symbol | Register | Immediate | None
+    arg2: Symbol | Register | Immediate | None
+
+    @property
+    def num_args(self) -> int:
+        if self.dest is None:
+            return 0
+        if self.arg1 is None:
+            return 1
+        if self.arg2 is None:
+            return 2
+        return 3
+
+
+@dataclass
+class ErrorInstruction(AsmLine):
+    opcode: str
+    error_message: str
+
+
+@dataclass
+class JumpTarget(AsmLine):
+    label: Symbol
diff --git a/nandgame/assembler/py_nand_ass/simple_assembler.py b/nandgame/assembler/py_nand_ass/simple_assembler.py
new file mode 100755
index 0000000..d684635
--- /dev/null
+++ b/nandgame/assembler/py_nand_ass/simple_assembler.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python3
+
+###
+# LLM generated
+###
+
+"""
+Assembler for nandgame, matching the custom disassembler above.
+
+Syntax (as produced by print_decoded):
+
+    mnemonic[.jump]  DEST, OP1[, OP2]
+
+Examples:
+
+    mov        A, #123
+    add.jgt    D, D, A
+    sub        _, D, M
+    not        D, D
+    inc.jeq    D, D
+    and        _, D, M
+    xor        M, D, M
+
+DEST:
+    A, D, M, any combination like AD, AM, DM, ADM, or "_" for no destination.
+
+OP1 / OP2:
+    D, A, M, #0 (for OP1 only), or #<number> for mov A,#imm (A-instruction).
+
+Jumps:
+    jlt, jle, jeq, jne, jgt, jge, jmp, or none.
+"""
+
+import sys
+from typing import Union
+
+from . import parser_types as pt
+
+ZERO = "#0"
+DEST_NONE = "_"
+JUMP_NONE = ""
+ENDIANNESS = "little"
+
+# mapping from mnemonic to (opcode, two_op)
+MNEMONICS = {
+    "and": (0b000, True),
+    "or": (0b001, True),
+    "xor": (0b010, True),
+    "not": (0b011, False),
+    "add": (0b100, True),
+    "inc": (0b101, False),
+    "sub": (0b110, True),
+    "dec": (0b111, False),
+}
+
+# jump mnemonic -> bits 0..2
+JUMP_ENCODE = {
+    "": 0b000,
+    "jgt": 0b001,
+    "jeq": 0b010,
+    "jge": 0b011,
+    "jlt": 0b100,
+    "jne": 0b101,
+    "jle": 0b110,
+    "jmp": 0b111,
+}
+
+
+def encode_dest(dest: str) -> int:
+    """
+    dest is something like "A", "D", "M", "AD", "ADM", or "_" for none.
+    Returns bits for A,D,M in positions 5,4,3.
+    """
+    dest = dest.strip()
+    if dest == DEST_NONE:
+        return 0
+
+    bits = 0
+    if "A" in dest:
+        bits |= 1 << 5
+    if "D" in dest:
+        bits |= 1 << 4
+    if "M" in dest:
+        bits |= 1 << 3
+    return bits
+
+
+def encode_jump(jump: str) -> int:
+    jump = jump.strip()
+    if jump not in JUMP_ENCODE:
+        raise ValueError(f"Unknown jump condition: {jump}")
+    return JUMP_ENCODE[jump]
+
+
+def encode_args_two_op(op1: str, op2: str) -> int:
+    """
+    For two-operand instructions, find zx, sw, use_mem bits that reproduce
+    the given op1/op2 under decode_arg1/decode_arg2.
+
+    op1 in {D, A, M, #0}
+    op2 in {D, A, M}
+    """
+    op1 = op1.strip()
+    op2 = op2.strip()
+
+    # brute-force all combinations of zx, sw, use_mem and pick the one that matches
+    for zx in (0, 1):
+        for sw in (0, 1):
+            for use_mem in (0, 1):
+                # simulate decode_arg1/2
+                if zx:
+                    dec_op1 = ZERO
+                else:
+                    if not sw:
+                        dec_op1 = "D"
+                    else:
+                        dec_op1 = "M" if use_mem else "A"
+
+                if sw:
+                    dec_op2 = "D"
+                else:
+                    dec_op2 = "M" if use_mem else "A"
+
+                if dec_op1 == op1 and dec_op2 == op2:
+                    bits = 0
+                    if use_mem:
+                        bits |= 1 << 12
+                    if zx:
+                        bits |= 1 << 7
+                    if sw:
+                        bits |= 1 << 6
+                    return bits
+
+    raise ValueError(f"Unsupported operand combination for two-op: {op1}, {op2}")
+
+
+def encode_args_one_op(op1: str) -> int:
+    """
+    For one-operand instructions, only decode_arg1 matters.
+    We choose canonical encodings:
+
+        D  -> zx=0, sw=0
+        A  -> zx=0, sw=1, use_mem=0
+        M  -> zx=0, sw=1, use_mem=1
+        #0 -> zx=1
+    """
+    op1 = op1.strip()
+    bits = 0
+
+    if op1 == ZERO:
+        bits |= 1 << 7  # zx
+        # sw/use_mem don't matter for arg1 when zx=1, but keep them 0
+        return bits
+
+    if op1 == "D":
+        # zx=0, sw=0, use_mem=0
+        return bits
+
+    if op1 == "A":
+        bits |= 1 << 6  # sw=1
+        # use_mem=0
+        return bits
+
+    if op1 == "M":
+        bits |= 1 << 6  # sw=1
+        bits |= 1 << 12  # use_mem=1
+        return bits
+
+    raise ValueError(f"Unsupported operand for one-op: {op1}")
+
+#Arg = Union[str, int, pt.Address, pt.Immediate, pt.Register]
+Arg = Union[str, int, None]
+
+def encode_instruction(
+    mnemonic: str, dest: str, op1: Arg, op2: Arg, jump: str
+) -> int:
+    """
+    Encode a single instruction into a 16-bit integer.
+    """
+    mnemonic = mnemonic.strip()
+
+    if mnemonic == "hlt":
+        return 0xFFFF & ~0x4000
+
+    dest = dest.strip()
+    if op1 is None:
+        op1 = ""
+    elif isinstance(op1, int):
+        op1 = f"#{op1}"
+    else:
+        op1 = str(op1).strip()
+
+    if op2 is None:
+        op2 = ""
+    elif isinstance(op2, int):
+        op2 = f"#{op2}"
+    else:
+        op2 = str(op2).strip()
+
+    jump = jump.strip()
+
+    # A-instruction: mov A, #imm
+    if mnemonic == "mov":
+        if dest == "A" and op1.startswith("#") and not op2 and not jump:
+            imm_str = op1[1:]
+            if imm_str.startswith("0x") or imm_str.startswith("0X"):
+                value = int(imm_str, 16)
+            else:
+                value = int(imm_str, 10)
+            if not (0 <= value < 0x8000):
+                raise ValueError(f"Immediate out of range (0..32767): {value}")
+            return value & 0x7FFF
+        else:
+            raise ValueError("Invalid args to mov.")
+
+    # C-instruction
+    if mnemonic in MNEMONICS:
+        pass
+    elif mnemonic == "nop":
+        mnemonic = "and"
+        dest = "_"
+        op1 = "#0"
+        op2 = "A"
+        jump = ""
+    else:
+        raise ValueError(f"Unknown mnemonic: {mnemonic}")
+
+    opcode, two_op = MNEMONICS[mnemonic]
+
+    # bit 14, 15 = 1
+    ins = 0xC000
+
+    # opcode bits: low 2 bits in 8..9, high bit in 10 (ar_n_log)
+    low2 = opcode & 0b11
+    high1 = (opcode >> 2) & 0b1
+    ins |= low2 << 8
+    if high1:
+        ins |= 1 << 10
+
+    # dest bits
+    ins |= encode_dest(dest)
+
+    # jump bits
+    ins |= encode_jump(jump)
+
+    # arg bits
+    if two_op:
+        if not op2:
+            raise ValueError(f"Two-op instruction {mnemonic} requires two operands")
+        ins |= encode_args_two_op(op1, op2)
+    else:
+        if not op1:
+            raise ValueError(f"One-op instruction {mnemonic} requires one operand")
+        ins |= encode_args_one_op(op1)
+
+    return ins
+
+
+def parse_line(line: str):
+    """
+    Parse a single assembly line into (mnemonic, dest, op1, op2, jump).
+    Returns None if the line is empty or comment.
+    """
+    # strip comments starting with ';' or '#'
+    for sep in ";":
+        idx = line.find(sep)
+        if idx != -1:
+            line = line[:idx]
+    line = line.strip()
+    if not line:
+        return None
+
+    # first token: mnemonic[.jump]
+    parts = line.split(None, 1)
+    if not parts:
+        return None
+    opcode_part = parts[0]
+    rest = parts[1] if len(parts) > 1 else ""
+
+    if "." in opcode_part:
+        mnemonic, jump = opcode_part.split(".", 1)
+    else:
+        mnemonic, jump = opcode_part, ""
+
+    # operands: dest, op1[, op2]
+    dest = ""
+    op1 = ""
+    op2 = ""
+
+    if rest:
+        ops = [o.strip() for o in rest.split(",")]
+        ops = [o for o in ops if o]  # remove empty
+        if len(ops) >= 1:
+            dest = ops[0]
+        if len(ops) >= 2:
+            op1 = ops[1]
+        if len(ops) >= 3:
+            op2 = ops[2]
+        if len(ops) > 3:
+            raise ValueError(f"Too many operands: {rest}")
+
+    # normalize no-dest
+    if dest == "":
+        dest = DEST_NONE
+
+    return mnemonic, dest, op1, op2, jump
+
+
+def assemble_file(in_filename: str, out_filename: str):
+    with open(in_filename, "r", encoding="ascii") as fin, open(
+        out_filename, "wb"
+    ) as fout:
+        lineno = 0
+        for line in fin:
+            lineno += 1
+            try:
+                parsed = parse_line(line)
+                if parsed is None:
+                    continue
+                mnemonic, dest, op1, op2, jump = parsed
+                ins = encode_instruction(mnemonic, dest, op1, op2, jump)
+                fout.write(ins.to_bytes(2, byteorder=ENDIANNESS))
+            except Exception as e:
+                raise SystemExit(f"{in_filename}:{lineno}: {e}") from e
author	uvok	2026-01-14 20:44:16 +0100
committer	uvok	2026-01-14 20:44:16 +0100
commit	1561eff8780dc15dc5ea46d7225cc49a46f709ca (patch)
tree	130d44ef295ff2113fc56c592a78780035449dff /nandgame/assembler/py_nand_ass
parent	281414ea9b42e213b85b95b7072b73d1f1e3f240 (diff)