#!/usr/bin/env python3 import sys from typing import Any, MutableSequence, Tuple import ply.yacc as yacc from ply.lex import LexToken # Get the token map from the lexer. This is required. from .lexer import tokens from . import parser_types as pt P = MutableSequence[Any] def p_program(p: P) -> None: """program : instruction_list | empty """ p[0] = p[1] def p_empty(p: P) -> None: """empty :""" pass def p_instructions(p: P) -> None: """instruction_list : instruction_list line | line """ if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[2]] pass # # try right-recursive? # def p_instructions2(p: P) -> None: # """instruction_list2 : line instruction_list2 # | line # """ # if len(p) == 2: # p[0] = [p[1]] # else: # p[0] = [p[1]] + p[2] # print(f"INSes2: {len(p)}") # print(f" {p[1]}") # if len(p) > 2: # print(f" {p[2]}") # pass def p_line(p: P) -> None: """line : instruction NL | jumpdest NL | NL """ # | instruction error NL # | jumpdest error NL if len(p) == 2: pass elif len(p) == 3: p[0] = p[1] print(f"Item: {p[0]}") # if error handling else: p[0] = p[1] assert False def p_instruction(p: P) -> None: """instruction : valid_instruction | one_arg_invalid """ p[0] = p[1] def p_valid_instruction(p: P) -> None: """valid_instruction : no_args | two_arg | three_arg """ tp: Tuple[Any, Any, Any, Any, Any] = p[1] p[0] = pt.Instruction(p.lineno(1), *tp) def p_jumpdest(p: P) -> None: """jumpdest : symbol COLON""" p[0] = pt.JumpTarget(lineno=p.lineno(1), label=p[1]) def p_no_arg(p: P) -> None: """no_args : opcode""" p[0] = (*p[1], None, None, None) pass def p_onearg_invalid(p: P) -> None: """one_arg_invalid : opcode argument""" op: tuple[str, str] = p[1] p[0] = pt.ErrorInstruction( p.lineno(1), op[1], "No opcode only supports one argument." ) pass def p_two_arg(p: P) -> None: """two_arg : opcode register COMMA argument""" p[0] = (*p[1], p[2], p[4], None) pass def p_three_arg(p: P) -> None: """three_arg : opcode register COMMA argument COMMA argument""" p[0] = (*p[1], p[2], p[4], p[6]) pass # checks which combinations are allowed is done one level up def p_argument(p: P) -> None: """argument : number | register | symbol """ p[0] = p[1] def p_symbol(p: P) -> None: """symbol : SYMBOL""" p[0] = pt.Symbol(p[1]) def p_register(p: P) -> None: """register : REG""" p[0] = pt.Register(p[1]) def p_opcode(p: P) -> None: """opcode : opcode_jmp | opcode_njmp """ p[0] = p[1] def p_opcode_jmp(p: P) -> None: """opcode_jmp : OP DOT JUMP""" p[0] = (p[1], p[3]) def p_opcode_njmp(p: P) -> None: """opcode_njmp : OP""" p[0] = (p[1], None) def p_number(p: P) -> None: """number : SHARP NUMBER | SHARP HEXNUMBER """ p[0] = pt.Immediate(p[2]) def p_error(p: LexToken | None) -> LexToken|None: if p: print( f"WARNING: Unexpected {repr(p.value)} on line {p.lineno}", file=sys.stderr ) else: print("WARNING: Unexpected end of file.", file=sys.stderr) return while True: tok: LexToken = __parser.token() if not tok or tok.type == 'NL': break __parser.errok() return tok __parser: yacc.LRParser = yacc.yacc() def parse_text(text: str) -> list[pt.AsmLine]: return __parser.parse(text, tracking=True) def parse_file(filename: str) -> list[pt.AsmLine]: with open(filename, "rb") as infile: file_content_bin = infile.read() file_content_txt = file_content_bin.decode("ascii") return parse_text(file_content_txt)