#!/usr/bin/env python3 import ply.lex as lex # List of token names. This is always required tokens = ( "OP", "JUMP", "COMMA", "COLON", "SYMBOL", "NUMBER", "HEXNUMBER", "DOT", "REG", "NL", ) # Regular expression rules for simple tokens t_COMMA = r"," t_COLON = r":" t_DOT = r"\." def t_OP(t): r"and|or|xor|not|mov|add|inc|sub|dec|cmp|neg|hlt|nop" return t def t_REG(t): r"\b(AD?M?|DM?|M|_)\b" return t def t_JUMP(t): r"jmp|jlt|jgt|jle|jge|jeq|jne" return t def t_NUMBER(t): r"\#\d+" t.value = int(t.value[1:]) return t def t_HEXNUMBER(t): r"\#0x[0-9a-fA-F]+" t.value = int(t.value[1:], 16) return t def t_SYMBOL(t): r"[a-z][A-Za-z0-9_]+" return t # Define a rule so we can track line numbers def t_NL(t): r"\n+" t.lexer.lineno += len(t.value) return t # A string containing ignored characters (spaces and tabs) t_ignore = " \t" # t_ignore_COMMENT = r';.*' def t_COMMENT(t): r";.*" pass # Error handling rule def t_error(t): print("!!! Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # EOF handling rule def t_eof(t): if not t.lexer.newline_added: t.lexer.input("\n") t.lexer.newline_added = True return t.lexer.token() return None # Build the lexer lexer = lex.lex() lexer.newline_added = False