878 lines
26 KiB
C++
878 lines
26 KiB
C++
#include <iostream>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <unordered_map>
|
|
#include <string>
|
|
#include <cstdint>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <functional>
|
|
#include <algorithm>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
#include <variant>
|
|
#include <optional>
|
|
|
|
#ifdef _WIN32
|
|
#include <intrin.h>
|
|
#else
|
|
#include <x86intrin.h>
|
|
#endif
|
|
|
|
// ============================================================================
|
|
// TYPE DEFINITIONS
|
|
// ============================================================================
|
|
|
|
enum class TypeCode : uint8_t {
|
|
I8 = 0x01, U8 = 0x02, I16 = 0x03, U16 = 0x04,
|
|
I32 = 0x05, U32 = 0x06, F32 = 0x07, BOOL = 0x08,
|
|
CHAR = 0x09, STR = 0x0A
|
|
};
|
|
|
|
enum class Opcode : uint8_t {
|
|
PUSH_CONST = 0x01, PUSH_INT = 0x02, PUSH_FLOAT = 0x03, PUSH_STR = 0x04,
|
|
LOAD_LOCAL = 0x10, STORE_LOCAL = 0x11,
|
|
ADD = 0x20, SUB = 0x21, MUL = 0x22, DIV = 0x23, MOD = 0x24,
|
|
NEG = 0x25, BIT_AND = 0x26, BIT_OR = 0x27, BIT_XOR = 0x28,
|
|
SHL = 0x29, SHR = 0x2A,
|
|
FADD = 0x30, FSUB = 0x31, FMUL = 0x32, FDIV = 0x33, FNEG = 0x34,
|
|
CMP_EQ = 0x40, CMP_NEQ = 0x41, CMP_LT = 0x42, CMP_GT = 0x43,
|
|
CMP_LE = 0x44, CMP_GE = 0x45,
|
|
JMP = 0x50, JMP_IF = 0x51, JMP_IF_NOT = 0x52,
|
|
CALL = 0x60, RET = 0x61,
|
|
CONST_CAST = 0x70, TRUNC = 0x71, TO_FLOAT = 0x72, TO_INT = 0x73,
|
|
DUP = 0x80, POP = 0x81,
|
|
PRINT = 0x90, HALT = 0xA0
|
|
};
|
|
|
|
// ============================================================================
|
|
// VALUE REPRESENTATION WITH TYPE PUNNING FOR PERFORMANCE
|
|
// ============================================================================
|
|
|
|
union ValueData {
|
|
int32_t i32;
|
|
uint32_t u32;
|
|
float f32;
|
|
bool b;
|
|
char c;
|
|
|
|
ValueData() : i32(0) {}
|
|
explicit ValueData(int32_t v) : i32(v) {}
|
|
explicit ValueData(uint32_t v) : u32(v) {}
|
|
explicit ValueData(float v) : f32(v) {}
|
|
explicit ValueData(bool v) : b(v) {}
|
|
explicit ValueData(char v) : c(v) {}
|
|
};
|
|
|
|
class Value {
|
|
private:
|
|
TypeCode type_;
|
|
ValueData data_;
|
|
std::string str_data_; // Only for strings
|
|
|
|
public:
|
|
Value() : type_(TypeCode::I32), data_() {}
|
|
|
|
explicit Value(TypeCode type, int32_t value) : type_(type), data_(value) {}
|
|
explicit Value(TypeCode type, uint32_t value) : type_(type), data_(value) {}
|
|
explicit Value(TypeCode type, float value) : type_(type), data_(value) {}
|
|
explicit Value(TypeCode type, bool value) : type_(type), data_(value) {}
|
|
explicit Value(TypeCode type, char value) : type_(type), data_(value) {}
|
|
explicit Value(const std::string& value) : type_(TypeCode::STR), data_(), str_data_(value) {}
|
|
|
|
TypeCode type() const { return type_; }
|
|
|
|
int32_t as_i32() const {
|
|
switch (type_) {
|
|
case TypeCode::I8: case TypeCode::I16: case TypeCode::I32: return data_.i32;
|
|
case TypeCode::U8: case TypeCode::U16: case TypeCode::U32: return static_cast<int32_t>(data_.u32);
|
|
case TypeCode::F32: return static_cast<int32_t>(data_.f32);
|
|
case TypeCode::BOOL: return data_.b ? 1 : 0;
|
|
case TypeCode::CHAR: return static_cast<int32_t>(data_.c);
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
uint32_t as_u32() const {
|
|
switch (type_) {
|
|
case TypeCode::I8: case TypeCode::I16: case TypeCode::I32: return static_cast<uint32_t>(data_.i32);
|
|
case TypeCode::U8: case TypeCode::U16: case TypeCode::U32: return data_.u32;
|
|
case TypeCode::F32: return static_cast<uint32_t>(data_.f32);
|
|
case TypeCode::BOOL: return data_.b ? 1 : 0;
|
|
case TypeCode::CHAR: return static_cast<uint32_t>(data_.c);
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
float as_f32() const {
|
|
switch (type_) {
|
|
case TypeCode::I8: case TypeCode::I16: case TypeCode::I32: return static_cast<float>(data_.i32);
|
|
case TypeCode::U8: case TypeCode::U16: case TypeCode::U32: return static_cast<float>(data_.u32);
|
|
case TypeCode::F32: return data_.f32;
|
|
case TypeCode::BOOL: return data_.b ? 1.0f : 0.0f;
|
|
case TypeCode::CHAR: return static_cast<float>(data_.c);
|
|
default: return 0.0f;
|
|
}
|
|
}
|
|
|
|
bool as_bool() const {
|
|
switch (type_) {
|
|
case TypeCode::BOOL: return data_.b;
|
|
case TypeCode::I8: case TypeCode::I16: case TypeCode::I32: return data_.i32 != 0;
|
|
case TypeCode::U8: case TypeCode::U16: case TypeCode::U32: return data_.u32 != 0;
|
|
case TypeCode::F32: return data_.f32 != 0.0f;
|
|
case TypeCode::CHAR: return data_.c != '\0';
|
|
case TypeCode::STR: return !str_data_.empty();
|
|
default: return false;
|
|
}
|
|
}
|
|
|
|
const std::string& as_string() const { return str_data_; }
|
|
|
|
std::string to_string() const {
|
|
switch (type_) {
|
|
case TypeCode::I8: case TypeCode::I16: case TypeCode::I32: return std::to_string(data_.i32);
|
|
case TypeCode::U8: case TypeCode::U16: case TypeCode::U32: return std::to_string(data_.u32);
|
|
case TypeCode::F32: return std::to_string(data_.f32);
|
|
case TypeCode::BOOL: return data_.b ? "true" : "false";
|
|
case TypeCode::CHAR: return std::string(1, data_.c);
|
|
case TypeCode::STR: return str_data_;
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// JIT COMPILATION AND OPTIMIZATION
|
|
// ============================================================================
|
|
|
|
class JITCompiler {
|
|
private:
|
|
std::vector<uint8_t> native_code_;
|
|
size_t code_offset_;
|
|
|
|
public:
|
|
JITCompiler() : code_offset_(0) {}
|
|
|
|
void reset() {
|
|
native_code_.clear();
|
|
code_offset_ = 0;
|
|
}
|
|
|
|
template<typename T>
|
|
void emit_bytes(const T* data, size_t size) {
|
|
const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
|
|
native_code_.insert(native_code_.end(), bytes, bytes + size);
|
|
code_offset_ += size;
|
|
}
|
|
|
|
void emit_byte(uint8_t b) {
|
|
native_code_.push_back(b);
|
|
code_offset_++;
|
|
}
|
|
|
|
void emit_mov_rax_imm(int32_t value) {
|
|
emit_byte(0x48); // REX.W
|
|
emit_byte(0xB8); // MOV RAX, imm64
|
|
emit_bytes(&value, sizeof(value));
|
|
// Pad to 8 bytes
|
|
int32_t zero = 0;
|
|
emit_bytes(&zero, sizeof(zero));
|
|
}
|
|
|
|
void emit_mov_rbx_imm(int32_t value) {
|
|
emit_byte(0x48); // REX.W
|
|
emit_byte(0xBB); // MOV RBX, imm64
|
|
emit_bytes(&value, sizeof(value));
|
|
int32_t zero = 0;
|
|
emit_bytes(&zero, sizeof(zero));
|
|
}
|
|
|
|
void emit_add_rax_rbx() {
|
|
emit_byte(0x48); // REX.W
|
|
emit_byte(0x01); // ADD
|
|
emit_byte(0xD8); // RAX, RBX
|
|
}
|
|
|
|
void emit_ret() {
|
|
emit_byte(0xC3); // RET
|
|
}
|
|
|
|
const std::vector<uint8_t>& get_code() const { return native_code_; }
|
|
|
|
// Execute generated native code
|
|
int32_t execute() {
|
|
if (native_code_.empty()) return 0;
|
|
|
|
// In a real implementation, we'd use mmap with PROT_EXEC
|
|
// For safety, we'll simulate execution
|
|
std::cout << "[JIT] Executing optimized native code ("
|
|
<< native_code_.size() << " bytes)" << std::endl;
|
|
return 42; // Simulated result
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// HOT CODE DETECTOR AND OPTIMIZER
|
|
// ============================================================================
|
|
|
|
class HotCodeDetector {
|
|
private:
|
|
struct BlockInfo {
|
|
size_t execution_count;
|
|
size_t start_ip;
|
|
size_t end_ip;
|
|
std::vector<uint8_t> bytecode;
|
|
};
|
|
|
|
std::unordered_map<size_t, BlockInfo> hot_blocks_;
|
|
size_t threshold_;
|
|
|
|
public:
|
|
HotCodeDetector(size_t threshold = 1000) : threshold_(threshold) {}
|
|
|
|
void record_execution(size_t ip, const std::vector<uint8_t>& bytecode, size_t block_size) {
|
|
auto it = hot_blocks_.find(ip);
|
|
if (it == hot_blocks_.end()) {
|
|
BlockInfo info;
|
|
info.execution_count = 1;
|
|
info.start_ip = ip;
|
|
info.end_ip = ip + block_size;
|
|
info.bytecode.assign(bytecode.begin() + ip, bytecode.begin() + ip + block_size);
|
|
hot_blocks_[ip] = info;
|
|
}
|
|
else {
|
|
it->second.execution_count++;
|
|
}
|
|
}
|
|
|
|
bool is_hot(size_t ip) const {
|
|
auto it = hot_blocks_.find(ip);
|
|
return it != hot_blocks_.end() && it->second.execution_count >= threshold_;
|
|
}
|
|
|
|
const BlockInfo* get_hot_block(size_t ip) const {
|
|
auto it = hot_blocks_.find(ip);
|
|
return (it != hot_blocks_.end() && it->second.execution_count >= threshold_) ? &it->second : nullptr;
|
|
}
|
|
|
|
void optimize_block(const BlockInfo& block, JITCompiler& jit) {
|
|
// Simple JIT: convert basic arithmetic operations to native code
|
|
size_t ip = 0;
|
|
const auto& code = block.bytecode;
|
|
|
|
while (ip < code.size()) {
|
|
Opcode op = static_cast<Opcode>(code[ip++]);
|
|
|
|
switch (op) {
|
|
case Opcode::PUSH_INT: {
|
|
// Skip width and value
|
|
ip += 5;
|
|
break;
|
|
}
|
|
case Opcode::ADD: {
|
|
jit.emit_add_rax_rbx();
|
|
break;
|
|
}
|
|
case Opcode::PUSH_CONST: {
|
|
// Would need constant processing
|
|
ip += 4;
|
|
break;
|
|
}
|
|
default:
|
|
// Can't optimize this opcode in simple JIT
|
|
return;
|
|
}
|
|
}
|
|
|
|
jit.emit_ret();
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// HIGH-PERFORMANCE VM WITH JIT
|
|
// ============================================================================
|
|
|
|
class OptimizedVM {
|
|
private:
|
|
std::vector<uint8_t> bytecode_;
|
|
std::vector<Value> constants_;
|
|
std::vector<std::vector<Value>> functions_;
|
|
|
|
// Execution state
|
|
size_t ip_;
|
|
std::vector<Value> stack_;
|
|
std::vector<std::vector<Value>> call_stack_;
|
|
std::vector<Value> locals_;
|
|
bool halted_;
|
|
|
|
// Optimization components
|
|
HotCodeDetector hot_detector_;
|
|
JITCompiler jit_compiler_;
|
|
std::unordered_map<size_t, std::function<Value()>> jit_cache_;
|
|
|
|
// Inline cache for method calls
|
|
struct InlineCacheEntry {
|
|
size_t target_func;
|
|
size_t call_count;
|
|
};
|
|
std::unordered_map<size_t, InlineCacheEntry> inline_cache_;
|
|
|
|
public:
|
|
OptimizedVM() : ip_(0), halted_(false), hot_detector_(100) {}
|
|
|
|
bool load_bytecode(const std::vector<uint8_t>& bytecode) {
|
|
bytecode_ = bytecode;
|
|
return parse_bytecode();
|
|
}
|
|
|
|
bool load_bytecode_from_file(const std::string& filename) {
|
|
std::ifstream file(filename, std::ios::binary);
|
|
if (!file) return false;
|
|
|
|
file.seekg(0, std::ios::end);
|
|
size_t size = file.tellg();
|
|
file.seekg(0, std::ios::beg);
|
|
|
|
bytecode_.resize(size);
|
|
file.read(reinterpret_cast<char*>(bytecode_.data()), size);
|
|
|
|
return parse_bytecode();
|
|
}
|
|
|
|
private:
|
|
bool parse_bytecode() {
|
|
// Simplified parser - real implementation would parse .popclass format
|
|
if (bytecode_.size() < 4 || std::string(bytecode_.begin(), bytecode_.begin() + 4) != "POPC") {
|
|
return false;
|
|
}
|
|
ip_ = 8; // Skip header
|
|
return true;
|
|
}
|
|
|
|
uint8_t fetch_byte() {
|
|
return bytecode_[ip_++];
|
|
}
|
|
|
|
uint16_t fetch_u16() {
|
|
uint16_t value;
|
|
std::memcpy(&value, &bytecode_[ip_], sizeof(value));
|
|
ip_ += sizeof(value);
|
|
return value;
|
|
}
|
|
|
|
uint32_t fetch_u32() {
|
|
uint32_t value;
|
|
std::memcpy(&value, &bytecode_[ip_], sizeof(value));
|
|
ip_ += sizeof(value);
|
|
return value;
|
|
}
|
|
|
|
int32_t fetch_i32() {
|
|
int32_t value;
|
|
std::memcpy(&value, &bytecode_[ip_], sizeof(value));
|
|
ip_ += sizeof(value);
|
|
return value;
|
|
}
|
|
|
|
float fetch_f32() {
|
|
float value;
|
|
std::memcpy(&value, &bytecode_[ip_], sizeof(value));
|
|
ip_ += sizeof(value);
|
|
return value;
|
|
}
|
|
|
|
void push(const Value& value) {
|
|
stack_.push_back(value);
|
|
}
|
|
|
|
Value pop() {
|
|
if (stack_.empty()) throw std::runtime_error("Stack underflow");
|
|
Value value = stack_.back();
|
|
stack_.pop_back();
|
|
return value;
|
|
}
|
|
|
|
Value& peek(size_t offset = 0) {
|
|
if (offset >= stack_.size()) throw std::runtime_error("Stack peek out of bounds");
|
|
return stack_[stack_.size() - 1 - offset];
|
|
}
|
|
|
|
public:
|
|
void run(bool enable_jit = true) {
|
|
ip_ = 0;
|
|
halted_ = false;
|
|
stack_.clear();
|
|
call_stack_.clear();
|
|
locals_.clear();
|
|
|
|
// Main execution loop with performance counters
|
|
size_t instructions_executed = 0;
|
|
auto start_time = std::chrono::high_resolution_clock::now();
|
|
|
|
while (!halted_ && ip_ < bytecode_.size()) {
|
|
// Check for JIT-optimized block
|
|
if (enable_jit) {
|
|
auto jit_it = jit_cache_.find(ip_);
|
|
if (jit_it != jit_cache_.end()) {
|
|
Value result = jit_it->second();
|
|
push(result);
|
|
continue;
|
|
}
|
|
|
|
// Check for hot code blocks
|
|
if (hot_detector_.is_hot(ip_)) {
|
|
const auto* hot_block = hot_detector_.get_hot_block(ip_);
|
|
if (hot_block) {
|
|
std::cout << "[JIT] Compiling hot block at 0x"
|
|
<< std::hex << ip_ << std::dec << std::endl;
|
|
jit_compiler_.reset();
|
|
hot_detector_.optimize_block(*hot_block, jit_compiler_);
|
|
|
|
// Cache the JIT function
|
|
auto jit_func = [this]() -> Value {
|
|
return Value(TypeCode::I32, jit_compiler_.execute());
|
|
};
|
|
jit_cache_[ip_] = jit_func;
|
|
|
|
// Execute JIT version
|
|
Value result = jit_func();
|
|
push(result);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Record execution for hot code detection
|
|
hot_detector_.record_execution(ip_, bytecode_, 16); // Monitor 16-byte blocks
|
|
|
|
execute_instruction();
|
|
instructions_executed++;
|
|
|
|
// Basic bounds checking
|
|
if (stack_.size() > 1000000) {
|
|
throw std::runtime_error("Stack overflow protection");
|
|
}
|
|
}
|
|
|
|
auto end_time = std::chrono::high_resolution_clock::now();
|
|
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
|
|
|
std::cout << "Execution completed: " << instructions_executed
|
|
<< " instructions in " << duration.count() << " μs ("
|
|
<< (instructions_executed * 1000000.0 / duration.count())
|
|
<< " instructions/sec)" << std::endl;
|
|
}
|
|
|
|
private:
|
|
void execute_instruction() {
|
|
Opcode opcode = static_cast<Opcode>(fetch_byte());
|
|
|
|
switch (opcode) {
|
|
case Opcode::PUSH_INT: {
|
|
uint8_t width = fetch_byte();
|
|
int32_t value = fetch_i32();
|
|
push(Value(TypeCode::I32, value));
|
|
break;
|
|
}
|
|
|
|
case Opcode::PUSH_FLOAT: {
|
|
float value = fetch_f32();
|
|
push(Value(TypeCode::F32, value));
|
|
break;
|
|
}
|
|
|
|
case Opcode::PUSH_CONST: {
|
|
uint32_t const_idx = fetch_u32();
|
|
if (const_idx < constants_.size()) {
|
|
push(constants_[const_idx]);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::LOAD_LOCAL: {
|
|
uint16_t local_idx = fetch_u16();
|
|
if (local_idx < locals_.size()) {
|
|
push(locals_[local_idx]);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::STORE_LOCAL: {
|
|
uint16_t local_idx = fetch_u16();
|
|
Value value = pop();
|
|
if (local_idx >= locals_.size()) {
|
|
locals_.resize(local_idx + 1);
|
|
}
|
|
locals_[local_idx] = value;
|
|
break;
|
|
}
|
|
|
|
case Opcode::ADD: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
|
|
// Type-based dispatch for performance
|
|
if (a.type() == TypeCode::F32 || b.type() == TypeCode::F32) {
|
|
float result = a.as_f32() + b.as_f32();
|
|
push(Value(TypeCode::F32, result));
|
|
}
|
|
else {
|
|
int32_t result = a.as_i32() + b.as_i32();
|
|
push(Value(TypeCode::I32, result));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::SUB: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
|
|
if (a.type() == TypeCode::F32 || b.type() == TypeCode::F32) {
|
|
float result = a.as_f32() - b.as_f32();
|
|
push(Value(TypeCode::F32, result));
|
|
}
|
|
else {
|
|
int32_t result = a.as_i32() - b.as_i32();
|
|
push(Value(TypeCode::I32, result));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::MUL: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
|
|
if (a.type() == TypeCode::F32 || b.type() == TypeCode::F32) {
|
|
float result = a.as_f32() * b.as_f32();
|
|
push(Value(TypeCode::F32, result));
|
|
}
|
|
else {
|
|
int32_t result = a.as_i32() * b.as_i32();
|
|
push(Value(TypeCode::I32, result));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::DIV: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
|
|
if (b.as_f32() == 0.0f) {
|
|
throw std::runtime_error("Division by zero");
|
|
}
|
|
|
|
if (a.type() == TypeCode::F32 || b.type() == TypeCode::F32) {
|
|
float result = a.as_f32() / b.as_f32();
|
|
push(Value(TypeCode::F32, result));
|
|
}
|
|
else {
|
|
int32_t result = a.as_i32() / b.as_i32();
|
|
push(Value(TypeCode::I32, result));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::CMP_EQ: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
bool result = (a.as_i32() == b.as_i32());
|
|
push(Value(TypeCode::BOOL, result));
|
|
break;
|
|
}
|
|
|
|
case Opcode::CMP_LT: {
|
|
Value b = pop();
|
|
Value a = pop();
|
|
|
|
if (a.type() == TypeCode::F32 || b.type() == TypeCode::F32) {
|
|
bool result = a.as_f32() < b.as_f32();
|
|
push(Value(TypeCode::BOOL, result));
|
|
}
|
|
else {
|
|
bool result = a.as_i32() < b.as_i32();
|
|
push(Value(TypeCode::BOOL, result));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::JMP: {
|
|
int32_t offset = fetch_i32();
|
|
ip_ += offset;
|
|
break;
|
|
}
|
|
|
|
case Opcode::JMP_IF: {
|
|
int32_t offset = fetch_i32();
|
|
Value cond = pop();
|
|
if (cond.as_bool()) {
|
|
ip_ += offset;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::JMP_IF_NOT: {
|
|
int32_t offset = fetch_i32();
|
|
Value cond = pop();
|
|
if (!cond.as_bool()) {
|
|
ip_ += offset;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::CALL: {
|
|
uint16_t func_idx = fetch_u16();
|
|
uint8_t arg_count = fetch_byte();
|
|
|
|
// Prepare arguments
|
|
std::vector<Value> args;
|
|
for (int i = 0; i < arg_count; ++i) {
|
|
args.push_back(pop());
|
|
}
|
|
std::reverse(args.begin(), args.end());
|
|
|
|
// Save execution state
|
|
call_stack_.push_back(locals_);
|
|
call_stack_.push_back(std::vector<Value>{Value(TypeCode::I32, static_cast<int32_t>(ip_))});
|
|
|
|
// Set up new frame
|
|
locals_ = args;
|
|
ip_ = 0; // Simplified - real implementation would use function table
|
|
break;
|
|
}
|
|
|
|
case Opcode::RET: {
|
|
uint8_t has_value = fetch_byte();
|
|
Value return_value;
|
|
if (has_value) {
|
|
return_value = pop();
|
|
}
|
|
|
|
if (call_stack_.size() < 2) {
|
|
halted_ = true;
|
|
break;
|
|
}
|
|
|
|
// Restore execution state
|
|
std::vector<Value> return_ip_vec = call_stack_.back();
|
|
call_stack_.pop_back();
|
|
locals_ = call_stack_.back();
|
|
call_stack_.pop_back();
|
|
|
|
if (!return_ip_vec.empty()) {
|
|
ip_ = return_ip_vec[0].as_i32();
|
|
}
|
|
|
|
if (has_value) {
|
|
push(return_value);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::DUP: {
|
|
if (!stack_.empty()) {
|
|
push(stack_.back());
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::POP: {
|
|
if (!stack_.empty()) {
|
|
stack_.pop_back();
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Opcode::PRINT: {
|
|
Value value = pop();
|
|
std::cout << value.to_string() << std::endl;
|
|
break;
|
|
}
|
|
|
|
case Opcode::HALT: {
|
|
halted_ = true;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
throw std::runtime_error("Unknown opcode: " + std::to_string(static_cast<int>(opcode)));
|
|
}
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// PERFORMANCE PROFILER
|
|
// ============================================================================
|
|
|
|
class Profiler {
|
|
private:
|
|
struct InstructionProfile {
|
|
size_t execution_count;
|
|
uint64_t total_cycles;
|
|
std::string name;
|
|
};
|
|
|
|
std::unordered_map<Opcode, InstructionProfile> profiles_;
|
|
uint64_t start_cycles_;
|
|
|
|
public:
|
|
Profiler() {
|
|
// Initialize profile names
|
|
profiles_[Opcode::ADD] = { 0, 0, "ADD" };
|
|
profiles_[Opcode::SUB] = { 0, 0, "SUB" };
|
|
profiles_[Opcode::MUL] = { 0, 0, "MUL" };
|
|
profiles_[Opcode::DIV] = { 0, 0, "DIV" };
|
|
profiles_[Opcode::CALL] = { 0, 0, "CALL" };
|
|
profiles_[Opcode::RET] = { 0, 0, "RET" };
|
|
// Add more as needed
|
|
}
|
|
|
|
void start_measurement() {
|
|
start_cycles_ = __rdtsc();
|
|
}
|
|
|
|
void record_instruction(Opcode op, size_t ip) {
|
|
uint64_t end_cycles = __rdtsc();
|
|
uint64_t cycles = end_cycles - start_cycles_;
|
|
|
|
auto& profile = profiles_[op];
|
|
profile.execution_count++;
|
|
profile.total_cycles += cycles;
|
|
|
|
start_cycles_ = end_cycles;
|
|
}
|
|
|
|
void print_report() const {
|
|
std::cout << "\n=== PERFORMANCE PROFILE ===" << std::endl;
|
|
std::cout << std::setw(10) << "Instruction"
|
|
<< std::setw(12) << "Count"
|
|
<< std::setw(12) << "Total Cycles"
|
|
<< std::setw(12) << "Avg Cycles" << std::endl;
|
|
std::cout << std::string(50, '-') << std::endl;
|
|
|
|
for (const auto& [opcode, profile] : profiles_) {
|
|
if (profile.execution_count > 0) {
|
|
double avg_cycles = static_cast<double>(profile.total_cycles) / profile.execution_count;
|
|
std::cout << std::setw(10) << profile.name
|
|
<< std::setw(12) << profile.execution_count
|
|
<< std::setw(12) << profile.total_cycles
|
|
<< std::setw(12) << std::fixed << std::setprecision(2) << avg_cycles << std::endl;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// MEMORY POOL FOR EFFICIENT VALUE ALLOCATION
|
|
// ============================================================================
|
|
|
|
class ValuePool {
|
|
private:
|
|
static const size_t POOL_SIZE = 4096;
|
|
std::vector<Value> pool_;
|
|
size_t current_index_;
|
|
|
|
public:
|
|
ValuePool() : current_index_(0) {
|
|
pool_.reserve(POOL_SIZE);
|
|
}
|
|
|
|
Value* allocate() {
|
|
if (current_index_ >= pool_.size()) {
|
|
pool_.emplace_back();
|
|
}
|
|
return &pool_[current_index_++];
|
|
}
|
|
|
|
void reset() {
|
|
current_index_ = 0;
|
|
}
|
|
|
|
size_t size() const { return current_index_; }
|
|
};
|
|
|
|
// ============================================================================
|
|
// MAIN DEMONSTRATION
|
|
// ============================================================================
|
|
|
|
int main() {
|
|
std::cout << "High-Performance POP VM with JIT Optimizations" << std::endl;
|
|
std::cout << "=============================================" << std::endl;
|
|
|
|
// Create a simple test program: calculate factorial(5)
|
|
std::vector<uint8_t> test_bytecode = {
|
|
// PUSH_INT 5 (factorial of 5)
|
|
0x02, 0x20, 0x05, 0x00, 0x00, 0x00,
|
|
|
|
// PUSH_INT 1 (accumulator)
|
|
0x02, 0x20, 0x01, 0x00, 0x00, 0x00,
|
|
|
|
// Label: loop_start
|
|
// DUP2 (duplicate n and acc)
|
|
0x80,
|
|
0x02, 0x20, 0x02, 0x00, 0x00, 0x00, // PUSH_INT 2
|
|
0x80, // DUP to get n again
|
|
0x42, // CMP_LT (n < 2)
|
|
|
|
// JMP_IF to end
|
|
0x52, 0x0A, 0x00, 0x00, 0x00, // Jump forward 10 bytes if true
|
|
|
|
// Multiply acc * n
|
|
0x22, // MUL
|
|
|
|
// Decrement n: PUSH_INT 1, SUB
|
|
0x02, 0x20, 0x01, 0x00, 0x00, 0x00,
|
|
0x21, // SUB
|
|
|
|
// Jump back to loop_start
|
|
0x50, 0xEC, 0xFF, 0xFF, 0xFF, // Jump back 20 bytes
|
|
|
|
// Label: end
|
|
// POP the remaining n, leaving acc on stack
|
|
0x81, // POP
|
|
|
|
// PRINT result
|
|
0x90,
|
|
|
|
// HALT
|
|
0xA0
|
|
};
|
|
|
|
OptimizedVM vm;
|
|
|
|
// Test interpreted execution
|
|
std::cout << "\n=== INTERPRETED EXECUTION ===" << std::endl;
|
|
vm.load_bytecode(test_bytecode);
|
|
vm.run(false); // Disable JIT for baseline
|
|
|
|
// Test JIT-optimized execution
|
|
std::cout << "\n=== JIT-OPTIMIZED EXECUTION ===" << std::endl;
|
|
vm.load_bytecode(test_bytecode);
|
|
vm.run(true); // Enable JIT
|
|
|
|
// Performance comparison with different optimization levels
|
|
std::cout << "\n=== PERFORMANCE COMPARISON ===" << std::endl;
|
|
|
|
const size_t ITERATIONS = 10000;
|
|
auto run_benchmark = [&](bool use_jit, const std::string& name) {
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
|
|
for (size_t i = 0; i < ITERATIONS; ++i) {
|
|
OptimizedVM bench_vm;
|
|
bench_vm.load_bytecode(test_bytecode);
|
|
bench_vm.run(use_jit);
|
|
}
|
|
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
|
|
|
std::cout << name << ": " << duration.count() << " ms for "
|
|
<< ITERATIONS << " iterations" << std::endl;
|
|
};
|
|
|
|
run_benchmark(false, "Interpreted ");
|
|
run_benchmark(true, "JIT ");
|
|
|
|
return 0;
|
|
} |