#!/usr/bin/env python3 import ply.lex as lex # List of token names. This is always required tokens = ( 'OP', 'JUMP', 'COMMA', 'COLON', 'SYMBOL', 'NUMBER', 'HEXNUMBER', 'DOT', 'REG', 'NL' ) # Regular expression rules for simple tokens t_COMMA = r',' t_COLON = r':' t_DOT = r'\.' def t_OP(t): r"mov|and|dec|hlt|add|sub|inc" return t def t_REG(t): r"\b(AD?M?|DM?|M|_)\b" return t def t_JUMP(t): r"jmp|jlt|jgt|jle|jge|jeq|jne" return t def t_NUMBER(t): r'\#\d+' t.value = int(t.value[1:]) return t def t_HEXNUMBER(t): r'\#0x[0-9a-fA-F]+' t.value = int(t.value[1:], 16) return t def t_SYMBOL(t): r'[a-z][A-Za-z0-9_]+' return t # Define a rule so we can track line numbers def t_NL(t): r'\n+' t.lexer.lineno += len(t.value) return t # A string containing ignored characters (spaces and tabs) t_ignore = ' \t' #t_ignore_COMMENT = r';.*' def t_COMMENT(t): r';.*' pass # Error handling rule def t_error(t): print("!!! Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # EOF handling rule def t_eof(t): if not t.lexer.newline_added: t.lexer.input("\n") t.lexer.newline_added = True return t.lexer.token() return None # Build the lexer lexer = lex.lex() lexer.newline_added = False