#!/usr/bin/env python3 """ Bytecode VM with Java/C++-like syntax Includes disassembler and debugger with improved type safety and error handling """ import struct import sys import os from enum import IntEnum from dataclasses import dataclass from typing import List, Optional, Any, Dict, Tuple, Union import re import traceback from collections import defaultdict # ============================================================================ # TOKEN DEFINITIONS # ============================================================================ class TokenType(IntEnum): # Keywords FUN = 0 IF = 1 ELSE = 2 WHILE = 3 FOR = 4 RETURN = 5 UINT = 6 INT = 7 FLOAT = 8 BOOL = 9 CHAR = 10 STR = 11 TRUE = 12 FALSE = 13 U8 = 14 U16 = 15 U32 = 16 I8 = 17 I16 = 18 I32 = 19 # Identifiers and literals IDENTIFIER = 20 INT_LITERAL = 21 FLOAT_LITERAL = 22 STRING_LITERAL = 23 BOOL_LITERAL = 24 # Operators PLUS = 30 MINUS = 31 STAR = 32 SLASH = 33 PERCENT = 34 ASSIGN = 35 PLUS_ASSIGN = 36 MINUS_ASSIGN = 37 STAR_ASSIGN = 38 SLASH_ASSIGN = 39 EQ = 40 NEQ = 41 LT = 42 GT = 43 LE = 44 GE = 45 AND = 46 OR = 47 NOT = 48 INCREMENT = 49 DECREMENT = 50 # Delimiters LPAREN = 60 RPAREN = 61 LBRACE = 62 RBRACE = 63 SEMICOLON = 64 COMMA = 65 # End of file EOF = 99 @dataclass class Token: type: TokenType value: Any line: int col: int filename: str = "" def __str__(self): return f"Token({self.type.name}, {repr(self.value)}, line {self.line}, col {self.col})" # ============================================================================ # LEXER # ============================================================================ class Lexer: def __init__(self, source: str, filename: str = ""): self.source = source self.filename = filename self.position = 0 self.line = 1 self.col = 1 self.current_char = self.source[0] if source else None # Keyword mapping self.keywords = { 'fun': TokenType.FUN, 'if': TokenType.IF, 'else': TokenType.ELSE, 'while': TokenType.WHILE, 'for': TokenType.FOR, 'return': TokenType.RETURN, 'uint': TokenType.UINT, 'int': TokenType.INT, 'float': TokenType.FLOAT, 'bool': TokenType.BOOL, 'char': TokenType.CHAR, 'str': TokenType.STR, 'true': TokenType.TRUE, 'false': TokenType.FALSE, 'u8': TokenType.U8, 'u16': TokenType.U16, 'u32': TokenType.U32, 'i8': TokenType.I8, 'i16': TokenType.I16, 'i32': TokenType.I32, } def error(self, message: str): """Raise a lexer error with position information""" raise SyntaxError(f"{self.filename}:{self.line}:{self.col}: {message}") def advance(self): """Advance to the next character""" if self.position >= len(self.source) - 1: self.current_char = None return self.position += 1 if self.current_char == '\n': self.line += 1 self.col = 1 else: self.col += 1 self.current_char = self.source[self.position] def skip_whitespace(self): """Skip whitespace and comments""" while self.current_char is not None: if self.current_char in ' \t\r': self.advance() elif self.current_char == '\n': self.advance() elif self.current_char == '/': if self.position + 1 < len(self.source) and self.source[self.position + 1] == '/': # Single line comment while self.current_char is not None and self.current_char != '\n': self.advance() elif self.position + 1 < len(self.source) and self.source[self.position + 1] == '*': # Multi-line comment self.advance() # Skip / self.advance() # Skip * while (self.current_char is not None and not (self.current_char == '*' and self.position + 1 < len(self.source) and self.source[self.position + 1] == '/')): self.advance() if self.current_char is None: self.error("Unterminated multi-line comment") self.advance() # Skip * self.advance() # Skip / else: break else: break def number(self): """Parse a number (integer or float)""" start_line, start_col = self.line, self.col result = '' while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() if self.current_char == '.': result += self.current_char self.advance() while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() return Token(TokenType.FLOAT_LITERAL, float(result), start_line, start_col, self.filename) else: return Token(TokenType.INT_LITERAL, int(result), start_line, start_col, self.filename) def string(self): """Parse a string literal""" start_line, start_col = self.line, self.col self.advance() # Skip opening quote result = '' while self.current_char is not None and self.current_char != '"': if self.current_char == '\\': self.advance() if self.current_char == 'n': result += '\n' elif self.current_char == 't': result += '\t' elif self.current_char == 'r': result += '\r' elif self.current_char == '0': result += '\0' elif self.current_char == '\\': result += '\\' elif self.current_char == '"': result += '"' else: result += '\\' + self.current_char else: result += self.current_char self.advance() if self.current_char != '"': self.error("Unterminated string literal") self.advance() # Skip closing quote return Token(TokenType.STRING_LITERAL, result, start_line, start_col, self.filename) def identifier(self): """Parse an identifier or keyword""" start_line, start_col = self.line, self.col result = '' while (self.current_char is not None and (self.current_char.isalnum() or self.current_char == '_')): result += self.current_char self.advance() # Check if it's a keyword token_type = self.keywords.get(result, TokenType.IDENTIFIER) # Handle boolean literals if token_type == TokenType.TRUE: return Token(TokenType.BOOL_LITERAL, True, start_line, start_col, self.filename) elif token_type == TokenType.FALSE: return Token(TokenType.BOOL_LITERAL, False, start_line, start_col, self.filename) return Token(token_type, result, start_line, start_col, self.filename) def next_token(self): """Get the next token from the source""" self.skip_whitespace() if self.current_char is None: return Token(TokenType.EOF, None, self.line, self.col, self.filename) start_line, start_col = self.line, self.col # Single character tokens if self.current_char == '(': self.advance() return Token(TokenType.LPAREN, '(', start_line, start_col, self.filename) elif self.current_char == ')': self.advance() return Token(TokenType.RPAREN, ')', start_line, start_col, self.filename) elif self.current_char == '{': self.advance() return Token(TokenType.LBRACE, '{', start_line, start_col, self.filename) elif self.current_char == '}': self.advance() return Token(TokenType.RBRACE, '}', start_line, start_col, self.filename) elif self.current_char == ';': self.advance() return Token(TokenType.SEMICOLON, ';', start_line, start_col, self.filename) elif self.current_char == ',': self.advance() return Token(TokenType.COMMA, ',', start_line, start_col, self.filename) # Multi-character operators elif self.current_char == '+': self.advance() if self.current_char == '+': self.advance() return Token(TokenType.INCREMENT, '++', start_line, start_col, self.filename) elif self.current_char == '=': self.advance() return Token(TokenType.PLUS_ASSIGN, '+=', start_line, start_col, self.filename) else: return Token(TokenType.PLUS, '+', start_line, start_col, self.filename) elif self.current_char == '-': self.advance() if self.current_char == '-': self.advance() return Token(TokenType.DECREMENT, '--', start_line, start_col, self.filename) elif self.current_char == '=': self.advance() return Token(TokenType.MINUS_ASSIGN, '-=', start_line, start_col, self.filename) else: return Token(TokenType.MINUS, '-', start_line, start_col, self.filename) elif self.current_char == '*': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.STAR_ASSIGN, '*=', start_line, start_col, self.filename) else: return Token(TokenType.STAR, '*', start_line, start_col, self.filename) elif self.current_char == '/': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.SLASH_ASSIGN, '/=', start_line, start_col, self.filename) else: return Token(TokenType.SLASH, '/', start_line, start_col, self.filename) elif self.current_char == '%': self.advance() return Token(TokenType.PERCENT, '%', start_line, start_col, self.filename) elif self.current_char == '=': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.EQ, '==', start_line, start_col, self.filename) else: return Token(TokenType.ASSIGN, '=', start_line, start_col, self.filename) elif self.current_char == '!': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.NEQ, '!=', start_line, start_col, self.filename) else: return Token(TokenType.NOT, '!', start_line, start_col, self.filename) elif self.current_char == '<': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.LE, '<=', start_line, start_col, self.filename) else: return Token(TokenType.LT, '<', start_line, start_col, self.filename) elif self.current_char == '>': self.advance() if self.current_char == '=': self.advance() return Token(TokenType.GE, '>=', start_line, start_col, self.filename) else: return Token(TokenType.GT, '>', start_line, start_col, self.filename) elif self.current_char == '&': self.advance() if self.current_char == '&': self.advance() return Token(TokenType.AND, '&&', start_line, start_col, self.filename) else: self.error("Single '&' not supported, use '&&' for logical AND") elif self.current_char == '|': self.advance() if self.current_char == '|': self.advance() return Token(TokenType.OR, '||', start_line, start_col, self.filename) else: self.error("Single '|' not supported, use '||' for logical OR") # Numbers elif self.current_char.isdigit(): return self.number() # Strings elif self.current_char == '"': return self.string() # Identifiers elif self.current_char.isalpha() or self.current_char == '_': return self.identifier() else: self.error(f"Unexpected character: '{self.current_char}'") # ============================================================================ # OPCODES # ============================================================================ class Opcode(IntEnum): PUSH_CONST = 0x01 PUSH_INT = 0x02 PUSH_FLOAT = 0x03 PUSH_STR = 0x04 LOAD_LOCAL = 0x10 STORE_LOCAL = 0x11 ADD = 0x20 SUB = 0x21 MUL = 0x22 DIV = 0x23 MOD = 0x24 NEG = 0x25 BIT_AND = 0x26 BIT_OR = 0x27 BIT_XOR = 0x28 SHL = 0x29 SHR = 0x2A FADD = 0x30 FSUB = 0x31 FMUL = 0x32 FDIV = 0x33 FNEG = 0x34 CMP_EQ = 0x40 CMP_NEQ = 0x41 CMP_LT = 0x42 CMP_GT = 0x43 CMP_LE = 0x44 CMP_GE = 0x45 JMP = 0x50 JMP_IF = 0x51 JMP_IF_NOT = 0x52 CALL = 0x60 RET = 0x61 CONST_CAST = 0x70 TRUNC = 0x71 TO_FLOAT = 0x72 TO_INT = 0x73 DUP = 0x80 POP = 0x81 PRINT = 0x90 HALT = 0xA0 # Type codes class TypeCode(IntEnum): I8 = 0x01 U8 = 0x02 I16 = 0x03 U16 = 0x04 I32 = 0x05 U32 = 0x06 F32 = 0x07 BOOL = 0x08 CHAR = 0x09 STR = 0x0A # ============================================================================ # VALUE REPRESENTATION # ============================================================================ @dataclass class Value: """Runtime value container with type safety""" type_code: TypeCode data: Any def __post_init__(self): """Validate value data matches type""" self._validate_type() def _validate_type(self): """Validate that data matches the type code""" type_validators = { TypeCode.I8: lambda x: isinstance(x, int) and -128 <= x <= 127, TypeCode.U8: lambda x: isinstance(x, int) and 0 <= x <= 255, TypeCode.I16: lambda x: isinstance(x, int) and -32768 <= x <= 32767, TypeCode.U16: lambda x: isinstance(x, int) and 0 <= x <= 65535, TypeCode.I32: lambda x: isinstance(x, int), TypeCode.U32: lambda x: isinstance(x, int) and x >= 0, TypeCode.F32: lambda x: isinstance(x, float), TypeCode.BOOL: lambda x: isinstance(x, bool), TypeCode.CHAR: lambda x: isinstance(x, str) and len(x) == 1, TypeCode.STR: lambda x: isinstance(x, str), } validator = type_validators.get(self.type_code) if validator and not validator(self.data): raise TypeError(f"Value {self.data} is not valid for type {self.type_code.name}") def to_bool(self) -> bool: """Convert to boolean""" if self.type_code == TypeCode.BOOL: return self.data elif self.type_code in [TypeCode.I8, TypeCode.U8, TypeCode.I16, TypeCode.U16, TypeCode.I32, TypeCode.U32]: return bool(self.data) elif self.type_code == TypeCode.F32: return bool(self.data) elif self.type_code == TypeCode.CHAR: return bool(ord(self.data)) elif self.type_code == TypeCode.STR: return bool(self.data) return False def to_int(self) -> int: """Convert to integer""" if self.type_code in [TypeCode.I8, TypeCode.U8, TypeCode.I16, TypeCode.U16, TypeCode.I32, TypeCode.U32]: return int(self.data) elif self.type_code == TypeCode.F32: return int(self.data) elif self.type_code == TypeCode.BOOL: return 1 if self.data else 0 elif self.type_code == TypeCode.CHAR: return ord(self.data) return 0 def to_float(self) -> float: """Convert to float""" return float(self.data) def get_type_name(self) -> str: """Get human-readable type name""" return self.type_code.name def __repr__(self): return f"Value({self.type_code.name}, {repr(self.data)})" # ============================================================================ # DISASSEMBLER # ============================================================================ class Disassembler: def __init__(self, bytecode: bytes, filename: str = ""): self.bytecode = bytecode self.filename = filename self.ip = 0 self.constants = [] self.functions = [] def error(self, message: str): """Raise a disassembler error with position information""" raise ValueError(f"{self.filename}:0x{self.ip:08x}: {message}") def disassemble(self) -> str: """Disassemble entire .popclass file""" output = [] # Parse header magic = self.bytecode[self.ip:self.ip+4] if magic != b'POPC': self.error(f"Invalid .popclass file magic: {magic}") self.ip += 4 version_major = struct.unpack(' List[str]: """Disassemble a single function""" output = [] ip = func['code_offset'] end_ip = ip + func['code_size'] while ip < end_ip: # Save current IP for this instruction current_ip = ip try: opcode = self.bytecode[ip] ip += 1 # Get mnemonic mnemonic = self.get_opcode_mnemonic(opcode) # Handle operands based on opcode operands = [] if opcode == Opcode.PUSH_CONST: const_idx = struct.unpack('= len(self.constants): self.error(f"Constant index {const_idx} out of range") const_type, value, type_str = self.constants[const_idx] operands.append(f"const[{const_idx}] ; {type_str} {value}") elif opcode == Opcode.PUSH_INT: width = self.bytecode[ip] ip += 1 value = struct.unpack('= len(self.constants): self.error(f"Constant index {const_idx} out of range") const_type, value, type_str = self.constants[const_idx] operands.append(f"const[{const_idx}] ; \"{value}\"") elif opcode in [Opcode.LOAD_LOCAL, Opcode.STORE_LOCAL]: local_idx = struct.unpack('= len(self.functions): self.error(f"Function index {func_idx} out of range") operands.append(f"func[{func_idx}], {arg_count}") elif opcode == Opcode.RET: has_value = self.bytecode[ip] ip += 1 operands.append(f"{has_value}") else: # No operands for this opcode pass # Format the instruction operands_str = ', '.join(operands) if operands else '' output.append(f"0x{current_ip:08x}: {mnemonic:15} {operands_str}") except Exception as e: output.append(f"0x{current_ip:08x}: ERROR: {e}") break return output def get_opcode_mnemonic(self, opcode: int) -> str: """Convert opcode to mnemonic string""" try: return Opcode(opcode).name except ValueError: return f"UNKNOWN(0x{opcode:02x})" # ============================================================================ # DEBUGGER # ============================================================================ class Debugger: def __init__(self, vm): self.vm = vm self.breakpoints = set() self.step_mode = False self.last_command = None def print_status(self): """Print current VM status""" print(f"\nIP: 0x{self.vm.ip:08x} | Stack: {len(self.vm.current_frame.stack)} | " f"Locals: {len(self.vm.current_frame.locals)} | Frame: {len(self.vm.call_stack)}") # Show current instruction if self.vm.ip < len(self.vm.bytecode): opcode = self.vm.bytecode[self.vm.ip] try: print(f"Next: {Opcode(opcode).name}") except: print(f"Next: UNKNOWN(0x{opcode:02x})") def print_stack(self): """Print current stack contents""" if not self.vm.current_frame.stack: print("Stack: empty") return print("Stack (top to bottom):") for i, value in enumerate(reversed(self.vm.current_frame.stack)): print(f" [{len(self.vm.current_frame.stack)-i-1}]: {value}") def print_locals(self): """Print current local variables""" if not self.vm.current_frame.locals: print("Locals: none") return print("Local variables:") for i, value in enumerate(self.vm.current_frame.locals): print(f" [{i}]: {value}") def print_breakpoints(self): """Print all breakpoints""" if not self.breakpoints: print("No breakpoints set") return print("Breakpoints:") for bp in sorted(self.breakpoints): print(f" 0x{bp:08x}") def disassemble_around(self, ip: int, lines_before: int = 2, lines_after: int = 5): """Disassemble code around current IP""" dis = Disassembler(self.vm.bytecode) # Find current function current_func = None for func in dis.functions: if func['code_offset'] <= ip < func['code_offset'] + func['code_size']: current_func = func break if not current_func: print("Not in any function") return # Disassemble the function and find current instruction func_code = dis.disassemble_function(current_func) # Find current instruction in disassembly current_line = -1 for i, line in enumerate(func_code): if f"0x{ip:08x}:" in line: current_line = i break if current_line == -1: print("Could not find current instruction") return # Print surrounding lines start = max(0, current_line - lines_before) end = min(len(func_code), current_line + lines_after + 1) print(f"Disassembly around 0x{ip:08x}:") for i in range(start, end): marker = ">>> " if i == current_line else " " print(f"{marker}{func_code[i]}") def handle_command(self, command: str) -> bool: """Handle debugger command""" cmd_parts = command.strip().split() if not cmd_parts: return True cmd = cmd_parts[0].lower() if cmd in ['c', 'continue']: self.step_mode = False return False elif cmd in ['s', 'step']: self.step_mode = True return False elif cmd in ['n', 'next']: # Step over calls current_ip = self.vm.ip self.step_mode = True return False elif cmd in ['si', 'stepi']: # Single instruction self.execute_single_instruction() return True elif cmd in ['b', 'break']: if len(cmd_parts) > 1: try: if cmd_parts[1].startswith('0x'): bp = int(cmd_parts[1], 16) else: bp = int(cmd_parts[1]) self.breakpoints.add(bp) print(f"Breakpoint set at 0x{bp:08x}") except ValueError: print("Invalid breakpoint address") else: print("Usage: break
") elif cmd in ['db', 'delbreak']: if len(cmd_parts) > 1: try: if cmd_parts[1].startswith('0x'): bp = int(cmd_parts[1], 16) else: bp = int(cmd_parts[1]) if bp in self.breakpoints: self.breakpoints.remove(bp) print(f"Breakpoint removed at 0x{bp:08x}") else: print("Breakpoint not found") except ValueError: print("Invalid breakpoint address") else: self.breakpoints.clear() print("All breakpoints cleared") elif cmd in ['bl', 'breaklist']: self.print_breakpoints() elif cmd in ['st', 'stack']: self.print_stack() elif cmd in ['l', 'locals']: self.print_locals() elif cmd in ['d', 'disasm']: lines = 10 if len(cmd_parts) > 1: try: lines = int(cmd_parts[1]) except ValueError: pass self.disassemble_around(self.vm.ip, lines_after=lines) elif cmd in ['p', 'print']: if len(cmd_parts) > 1: # Try to evaluate expression (simple for now) expr = ' '.join(cmd_parts[1:]) if expr == 'stack': self.print_stack() elif expr == 'locals': self.print_locals() elif expr == 'ip': print(f"IP: 0x{self.vm.ip:08x}") else: print(f"Unknown expression: {expr}") else: self.print_status() elif cmd in ['h', 'help', '?']: self.print_help() elif cmd in ['q', 'quit']: print("Debugger exited") sys.exit(0) else: print(f"Unknown command: {cmd}") self.print_help() return True def execute_single_instruction(self): """Execute a single instruction""" if self.vm.halted: print("VM is halted") return old_ip = self.vm.ip try: self.vm.execute_instruction() print(f"Executed: 0x{old_ip:08x} -> 0x{self.vm.ip:08x}") self.disassemble_around(self.vm.ip, lines_before=0, lines_after=1) except Exception as e: print(f"Error executing instruction at 0x{old_ip:08x}: {e}") def print_help(self): """Print debugger help""" print("Debugger commands:") print(" c, continue - Continue execution") print(" s, step - Step into functions") print(" n, next - Step over functions") print(" si, stepi - Single instruction step") print(" b, break - Set breakpoint") print(" db, delbreak - Delete breakpoint (or all)") print(" bl, breaklist - List breakpoints") print(" st, stack - Show stack") print(" l, locals - Show local variables") print(" d, disasm [n] - Disassemble around IP") print(" p, print [expr]- Print status or expression") print(" h, help, ? - This help") print(" q, quit - Quit debugger") def run(self): """Run debugger main loop""" print("POP VM Debugger started") print("Type 'help' for commands") while not self.vm.halted: # Check breakpoints if self.vm.ip in self.breakpoints: print(f"\nBreakpoint hit at 0x{self.vm.ip:08x}") self.step_mode = True if self.step_mode: self.print_status() try: command = input("\ndbg> ") if self.handle_command(command): continue # Stay in step mode else: # Command said to continue pass except EOFError: print("\nExiting debugger") break except KeyboardInterrupt: print("\nInterrupted") self.step_mode = True continue # Execute instruction try: self.vm.execute_instruction() except Exception as e: print(f"Runtime error at IP 0x{self.vm.ip:08x}: {e}") break # ============================================================================ # VM IMPLEMENTATION # ============================================================================ class Frame: """Call frame with type safety""" def __init__(self, func_idx: int, return_ip: int, arg_values: List[Value]): self.func_idx = func_idx self.return_ip = return_ip self.locals = arg_values[:] # Copy arguments self.stack: List[Value] = [] def __repr__(self): return f"Frame(func={self.func_idx}, return_ip=0x{self.return_ip:08x}, locals={len(self.locals)}, stack={len(self.stack)})" class VM: def __init__(self, bytecode: bytes, filename: str = ""): self.bytecode = bytecode self.filename = filename self.ip = 0 self.constants: List[Value] = [] self.functions: List[Dict] = [] self.call_stack: List[Frame] = [] self.current_frame: Optional[Frame] = None self.halted = False self.load_bytecode() def error(self, message: str): """Raise a VM error with position information""" raise RuntimeError(f"{self.filename}:0x{self.ip:08x}: {message}") def load_bytecode(self): """Load bytecode into VM with validation""" self.ip = 0 # Check magic magic = self.bytecode[self.ip:self.ip+4] if magic != b'POPC': self.error(f"Invalid .popclass file magic: {magic}") self.ip += 4 # Version version_major = struct.unpack(' int: """Fetch one byte and advance IP""" if self.ip >= len(self.bytecode): self.error("Unexpected end of bytecode") b = self.bytecode[self.ip] self.ip += 1 return b def fetch_u16(self) -> int: """Fetch u16 and advance IP""" if self.ip + 2 > len(self.bytecode): self.error("Unexpected end of bytecode while reading u16") value = struct.unpack(' int: """Fetch u32 and advance IP""" if self.ip + 4 > len(self.bytecode): self.error("Unexpected end of bytecode while reading u32") value = struct.unpack(' int: """Fetch i32 and advance IP""" if self.ip + 4 > len(self.bytecode): self.error("Unexpected end of bytecode while reading i32") value = struct.unpack(' float: """Fetch f32 and advance IP""" if self.ip + 4 > len(self.bytecode): self.error("Unexpected end of bytecode while reading f32") value = struct.unpack(' Value: """Pop value from current frame's stack""" if not self.current_frame.stack: self.error("Stack underflow") return self.current_frame.stack.pop() def type_check_binary_op(self, a: Value, b: Value, op: str) -> Tuple[TypeCode, TypeCode]: """Check if types are compatible for binary operation""" # Allow numeric types to mix with some restrictions numeric_types = {TypeCode.I8, TypeCode.U8, TypeCode.I16, TypeCode.U16, TypeCode.I32, TypeCode.U32, TypeCode.F32} if a.type_code in numeric_types and b.type_code in numeric_types: # For mixed operations, promote to the wider type if a.type_code == TypeCode.F32 or b.type_code == TypeCode.F32: return (TypeCode.F32, TypeCode.F32) else: # Both are integers, use I32 as common type return (TypeCode.I32, TypeCode.I32) # For boolean operations if op in ['==', '!=', '<', '>', '<=', '>=']: if a.type_code == b.type_code: return (a.type_code, b.type_code) self.error(f"Type mismatch in {op}: {a.get_type_name()} and {b.get_type_name()}") def run(self, entry_func: int = 0, debug: bool = False): """Run VM starting from entry function""" if entry_func >= len(self.functions): self.error(f"Invalid entry function index: {entry_func}") # Set up initial frame func = self.functions[entry_func] self.current_frame = Frame(entry_func, -1, []) # Initialize locals for _ in range(func['local_count']): self.current_frame.locals.append(Value(TypeCode.I32, 0)) self.ip = func['code_offset'] self.call_stack = [self.current_frame] if debug: debugger = Debugger(self) debugger.run() else: while not self.halted and self.ip < len(self.bytecode): self.execute_instruction() def execute_instruction(self): """Execute one instruction with type safety""" opcode = self.fetch_byte() if opcode == Opcode.PUSH_CONST: idx = self.fetch_u32() if idx >= len(self.constants): self.error(f"Constant index {idx} out of range") self.push(self.constants[idx]) elif opcode == Opcode.PUSH_INT: width = self.fetch_byte() value = self.fetch_i32() if width == 8: type_code = TypeCode.I8 elif width == 16: type_code = TypeCode.I16 else: type_code = TypeCode.I32 self.push(Value(type_code, value)) elif opcode == Opcode.PUSH_FLOAT: value = self.fetch_f32() self.push(Value(TypeCode.F32, value)) elif opcode == Opcode.PUSH_STR: idx = self.fetch_u32() if idx >= len(self.constants): self.error(f"Constant index {idx} out of range") self.push(self.constants[idx]) elif opcode == Opcode.LOAD_LOCAL: idx = self.fetch_u16() if idx >= len(self.current_frame.locals): self.error(f"Local variable index {idx} out of range") self.push(self.current_frame.locals[idx]) elif opcode == Opcode.STORE_LOCAL: idx = self.fetch_u16() value = self.pop() if idx >= len(self.current_frame.locals): # Extend locals if needed self.current_frame.locals.extend([Value(TypeCode.I32, 0)] * (idx - len(self.current_frame.locals) + 1)) self.current_frame.locals[idx] = value elif opcode == Opcode.ADD: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '+') if a_type == TypeCode.F32: result = b.to_float() + a.to_float() self.push(Value(TypeCode.F32, result)) else: result = b.to_int() + a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.SUB: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '-') if a_type == TypeCode.F32: result = b.to_float() - a.to_float() self.push(Value(TypeCode.F32, result)) else: result = b.to_int() - a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.MUL: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '*') if a_type == TypeCode.F32: result = b.to_float() * a.to_float() self.push(Value(TypeCode.F32, result)) else: result = b.to_int() * a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.DIV: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '/') if a.to_int() == 0 or a.to_float() == 0: self.error("Division by zero") if a_type == TypeCode.F32: result = b.to_float() / a.to_float() self.push(Value(TypeCode.F32, result)) else: result = b.to_int() // a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.MOD: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '%') if a.to_int() == 0: self.error("Modulo by zero") result = b.to_int() % a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.NEG: a = self.pop() if a.type_code == TypeCode.F32: result = -a.to_float() self.push(Value(TypeCode.F32, result)) else: result = -a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.BIT_AND: a = self.pop() b = self.pop() result = b.to_int() & a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.BIT_OR: a = self.pop() b = self.pop() result = b.to_int() | a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.BIT_XOR: a = self.pop() b = self.pop() result = b.to_int() ^ a.to_int() self.push(Value(TypeCode.I32, result)) elif opcode == Opcode.FADD: a = self.pop() b = self.pop() result = b.to_float() + a.to_float() self.push(Value(TypeCode.F32, result)) elif opcode == Opcode.FSUB: a = self.pop() b = self.pop() result = b.to_float() - a.to_float() self.push(Value(TypeCode.F32, result)) elif opcode == Opcode.FMUL: a = self.pop() b = self.pop() result = b.to_float() * a.to_float() self.push(Value(TypeCode.F32, result)) elif opcode == Opcode.FDIV: a = self.pop() b = self.pop() if a.to_float() == 0: self.error("Division by zero") result = b.to_float() / a.to_float() self.push(Value(TypeCode.F32, result)) elif opcode == Opcode.FNEG: a = self.pop() result = -a.to_float() self.push(Value(TypeCode.F32, result)) elif opcode == Opcode.CMP_EQ: a = self.pop() b = self.pop() result = b.data == a.data self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.CMP_NEQ: a = self.pop() b = self.pop() result = b.data != a.data self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.CMP_LT: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '<') if a_type == TypeCode.F32: result = b.to_float() < a.to_float() else: result = b.to_int() < a.to_int() self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.CMP_GT: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '>') if a_type == TypeCode.F32: result = b.to_float() > a.to_float() else: result = b.to_int() > a.to_int() self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.CMP_LE: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '<=') if a_type == TypeCode.F32: result = b.to_float() <= a.to_float() else: result = b.to_int() <= a.to_int() self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.CMP_GE: a = self.pop() b = self.pop() a_type, b_type = self.type_check_binary_op(a, b, '>=') if a_type == TypeCode.F32: result = b.to_float() >= a.to_float() else: result = b.to_int() >= a.to_int() self.push(Value(TypeCode.BOOL, result)) elif opcode == Opcode.JMP: offset = self.fetch_i32() self.ip += offset elif opcode == Opcode.JMP_IF: offset = self.fetch_i32() cond = self.pop() if cond.to_bool(): self.ip += offset elif opcode == Opcode.JMP_IF_NOT: offset = self.fetch_i32() cond = self.pop() if not cond.to_bool(): self.ip += offset elif opcode == Opcode.CALL: func_idx = self.fetch_u16() arg_count = self.fetch_byte() if func_idx >= len(self.functions): self.error(f"Function index {func_idx} out of range") # Pop arguments args = [] for _ in range(arg_count): args.insert(0, self.pop()) # Validate argument count func = self.functions[func_idx] if len(args) != func['arg_count']: self.error(f"Function expects {func['arg_count']} arguments, got {len(args)}") # Save return address return_ip = self.ip # Create new frame new_frame = Frame(func_idx, return_ip, args) # Initialize remaining locals for _ in range(func['local_count'] - arg_count): new_frame.locals.append(Value(TypeCode.I32, 0)) self.call_stack.append(new_frame) self.current_frame = new_frame self.ip = func['code_offset'] elif opcode == Opcode.RET: has_value = self.fetch_byte() return_value = None if has_value: return_value = self.pop() # Pop frame self.call_stack.pop() if not self.call_stack: # Returned from main self.halted = True if return_value: print(f"Program returned: {return_value.data}") return # Restore previous frame self.current_frame = self.call_stack[-1] self.ip = self.current_frame.return_ip # Push return value if any if return_value: self.push(return_value) elif opcode == Opcode.DUP: if not self.current_frame.stack: self.error("Cannot DUP from empty stack") value = self.current_frame.stack[-1] self.push(Value(value.type_code, value.data)) elif opcode == Opcode.POP: self.pop() elif opcode == Opcode.PRINT: value = self.pop() print(value.data) elif opcode == Opcode.HALT: self.halted = True else: self.error(f"Unknown opcode: 0x{opcode:02X}") # ============================================================================ # COMPILER WITH PROPER FOR LOOP SUPPORT # ============================================================================ class Compiler: def __init__(self, source: str, filename: str = ""): self.lexer = Lexer(source, filename) self.filename = filename self.current_token = self.lexer.next_token() self.constants: List[Tuple[TypeCode, Any]] = [] self.functions: List[Dict] = [] self.current_function: Optional[Dict] = None self.local_vars: Dict[str, int] = {} self.local_count = 0 self.loop_stack: List[Dict] = [] # For break/continue in loops def error(self, msg: str): """Raise a compilation error with position information""" raise SyntaxError(f"{self.filename}:{self.current_token.line}:{self.current_token.col}: {msg}") def expect(self, token_type: TokenType, context: str = ""): """Expect a specific token type, provide context for better error messages""" if self.current_token.type != token_type: context_str = f" in {context}" if context else "" self.error(f"Expected {token_type.name}{context_str}, but got {self.current_token.type.name}") self.current_token = self.lexer.next_token() def eat(self, token_type: TokenType): """Compatibility alias for expect""" self.expect(token_type) def add_constant(self, type_code: TypeCode, value: Any) -> int: """Add constant to pool and return index""" for i, (tc, v) in enumerate(self.constants): if tc == type_code and v == value: return i self.constants.append((type_code, value)) return len(self.constants) - 1 def emit(self, *bytes_data): """Emit bytes to current function's code""" for b in bytes_data: if isinstance(b, int): self.current_function['code'].append(b) elif isinstance(b, bytes): self.current_function['code'].extend(b) def compile(self) -> bytes: """Main compilation entry point""" try: while self.current_token.type != TokenType.EOF: if self.current_token.type == TokenType.FUN: self.compile_function() else: self.error("Expected function definition") return self.generate_bytecode() except Exception as e: # Add context to compilation errors if not isinstance(e, SyntaxError): self.error(str(e)) else: raise def compile_function(self): """Compile function definition""" self.expect(TokenType.FUN, "function definition") func_name = self.current_token.value self.expect(TokenType.IDENTIFIER, "function name") self.expect(TokenType.LPAREN, "function parameter list") # Parse parameters params = [] while self.current_token.type != TokenType.RPAREN: param_type = self.parse_type() param_name = self.current_token.value self.expect(TokenType.IDENTIFIER, "parameter name") params.append((param_type, param_name)) if self.current_token.type == TokenType.COMMA: self.eat(TokenType.COMMA) self.expect(TokenType.RPAREN, "function parameter list") # Set up function self.current_function = { 'name': func_name, 'arg_count': len(params), 'code': [], 'labels': {}, # For break/continue labels } # Set up locals self.local_vars = {} self.local_count = 0 # Parameters become first locals for _, param_name in params: self.local_vars[param_name] = self.local_count self.local_count += 1 # Parse body self.expect(TokenType.LBRACE, "function body") while self.current_token.type != TokenType.RBRACE: self.compile_statement() self.expect(TokenType.RBRACE, "function body") # Add implicit return if not present if not self.current_function['code'] or self.current_function['code'][-1] != Opcode.RET: self.emit(Opcode.RET, 0) self.current_function['local_count'] = self.local_count self.functions.append(self.current_function) self.current_function = None def parse_type(self) -> TypeCode: """Parse type specification with better error reporting""" type_map = { TokenType.U8: TypeCode.U8, TokenType.U16: TypeCode.U16, TokenType.U32: TypeCode.U32, TokenType.I8: TypeCode.I8, TokenType.I16: TypeCode.I16, TokenType.I32: TypeCode.I32, TokenType.FLOAT: TypeCode.F32, TokenType.BOOL: TypeCode.BOOL, TokenType.CHAR: TypeCode.CHAR, TokenType.STR: TypeCode.STR, TokenType.UINT: TypeCode.U32, TokenType.INT: TypeCode.I32, } if self.current_token.type in type_map: type_code = type_map[self.current_token.type] self.current_token = self.lexer.next_token() return type_code else: self.error(f"Expected type specification, got {self.current_token.type.name}") def compile_statement(self): """Compile a statement with better error context""" try: if self.current_token.type in [TokenType.U8, TokenType.U16, TokenType.U32, TokenType.I8, TokenType.I16, TokenType.I32, TokenType.FLOAT, TokenType.BOOL, TokenType.CHAR, TokenType.STR, TokenType.UINT, TokenType.INT]: self.compile_var_declaration() elif self.current_token.type == TokenType.IF: self.compile_if_statement() elif self.current_token.type == TokenType.WHILE: self.compile_while_statement() elif self.current_token.type == TokenType.FOR: self.compile_for_statement() elif self.current_token.type == TokenType.RETURN: self.compile_return_statement() elif self.current_token.type == TokenType.LBRACE: self.compile_block() elif self.current_token.type == TokenType.IDENTIFIER: self.compile_assignment_or_call() else: self.error(f"Unexpected statement starting with {self.current_token.type.name}") except Exception as e: # Add context to statement compilation errors if not isinstance(e, SyntaxError): self.error(f"Error in statement: {e}") else: raise def compile_block(self): """Compile a block of statements""" self.expect(TokenType.LBRACE, "block start") # Save current locals to restore after block old_locals = self.local_vars.copy() old_local_count = self.local_count while self.current_token.type != TokenType.RBRACE: self.compile_statement() self.expect(TokenType.RBRACE, "block end") # Restore locals (block scoping) self.local_vars = old_locals self.local_count = old_local_count def compile_var_declaration(self): """Compile variable declaration with type checking""" var_type = self.parse_type() var_name = self.current_token.value self.expect(TokenType.IDENTIFIER, "variable name") # Check for redeclaration if var_name in self.local_vars: self.error(f"Redeclaration of variable '{var_name}'") # Add to locals self.local_vars[var_name] = self.local_count self.local_count += 1 local_idx = self.local_vars[var_name] if self.current_token.type == TokenType.ASSIGN: self.eat(TokenType.ASSIGN) self.compile_expression() # TODO: Add type checking for assignment self.emit(Opcode.STORE_LOCAL, *struct.pack(' body_start: # Only if there's an increment offset = increment_start - (body_end + 5) self.emit(Opcode.JMP, *struct.pack(' bytes: """Generate final bytecode""" bytecode = bytearray() # Header bytecode.extend(b'POPC') # Magic for .popclass files bytecode.extend(struct.pack(' bool: """Compile source file to .popclass file with improved error reporting""" if not os.path.exists(source_file): print(f"Error: Source file '{source_file}' not found") return False if not output_file: output_file = os.path.splitext(source_file)[0] + '.popclass' try: with open(source_file, 'r', encoding='utf-8') as f: source = f.read() compiler = Compiler(source, source_file) bytecode = compiler.compile() with open(output_file, 'wb') as f: f.write(bytecode) print(f"Successfully compiled {source_file} to {output_file}") return True except SyntaxError as e: print(f"Compilation error in {e.filename}:{e.lineno}: {e.msg}") return False except Exception as e: print(f"Compilation error: {e}") traceback.print_exc() return False def disassemble_file(popclass_file: str) -> bool: """Disassemble .popclass file""" if not os.path.exists(popclass_file): print(f"Error: .popclass file '{popclass_file}' not found") return False try: with open(popclass_file, 'rb') as f: bytecode = f.read() disassembler = Disassembler(bytecode, popclass_file) disassembly = disassembler.disassemble() base_name = os.path.splitext(popclass_file)[0] disasm_file = base_name + '.popasm' # Save disassembly result with open(disasm_file, 'w', encoding='utf-8') as f: f.write(disassembly) print(disassembly) return True except Exception as e: print(f"Disassembly error: {e}") traceback.print_exc() return False def execute_popclass(popclass_file: str, debug: bool = False) -> bool: """Execute .popclass file""" if not os.path.exists(popclass_file): print(f"Error: .popclass file '{popclass_file}' not found") return False try: with open(popclass_file, 'rb') as f: bytecode = f.read() vm = VM(bytecode, popclass_file) vm.run(debug=debug) return True except Exception as e: print(f"Execution error: {e}") traceback.print_exc() return False def main(): if len(sys.argv) < 2: print("POP VM Tools - Enhanced with Type Safety and Better Error Messages") print("Usage:") print(" python interpreter.py compile [output_file]") print(" python interpreter.py disasm ") print(" python interpreter.py run ") print(" python interpreter.py debug ") print(" python interpreter.py (compiles and runs)") print("\nFeatures:") print(" - Improved type safety and error messages") print(" - C-style for loops with ++/-- operators") print(" - Better debugging information") return command = sys.argv[1] if command == 'compile': if len(sys.argv) < 3: print("Error: No source file specified") return source_file = sys.argv[2] output_file = sys.argv[3] if len(sys.argv) > 3 else None compile_source(source_file, output_file) elif command == 'disasm': if len(sys.argv) < 3: print("Error: No .popclass file specified") return popclass_file = sys.argv[2] disassemble_file(popclass_file) elif command == 'run': if len(sys.argv) < 3: print("Error: No .popclass file specified") return popclass_file = sys.argv[2] execute_popclass(popclass_file) elif command == 'debug': if len(sys.argv) < 3: print("Error: No .popclass file specified") return popclass_file = sys.argv[2] execute_popclass(popclass_file, debug=True) else: # Assume it's a source file - compile and run source_file = sys.argv[1] if not os.path.exists(source_file): print(f"Error: Source file '{source_file}' not found") return # Compile to temporary file temp_file = 'temp.popclass' if compile_source(source_file, temp_file): # Run the compiled file execute_popclass(temp_file) # Clean up temporary file try: os.remove(temp_file) except: pass if __name__ == '__main__': main()