From 53cdb95b6e3ad7f0a22e76f21a93f1e49ac604dd Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Wed, 11 Jun 2025 17:12:33 -0500 Subject: [PATCH] compiler start --- compiler/bytecode.go | 234 ++++++++++++++++++++++++++++++++++ compiler/compiler.go | 290 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 524 insertions(+) create mode 100644 compiler/bytecode.go create mode 100644 compiler/compiler.go diff --git a/compiler/bytecode.go b/compiler/bytecode.go new file mode 100644 index 0000000..cb45384 --- /dev/null +++ b/compiler/bytecode.go @@ -0,0 +1,234 @@ +package compiler + +// Opcode represents a single bytecode instruction +type Opcode uint8 + +const ( + // Stack Operations + OpLoadConst Opcode = iota // Load constant onto stack [idx] + OpLoadLocal // Load local variable [slot] + OpStoreLocal // Store top of stack to local [slot] + OpLoadGlobal // Load global variable [idx] + OpStoreGlobal // Store top of stack to global [idx] + OpPop // Pop top value from stack + OpDup // Duplicate top value on stack + + // Arithmetic Operations + OpAdd // a + b + OpSub // a - b + OpMul // a * b + OpDiv // a / b + OpNeg // -a + OpMod // a % b + + // Comparison Operations + OpEq // a == b + OpNeq // a != b + OpLt // a < b + OpLte // a <= b + OpGt // a > b + OpGte // a >= b + + // Logical Operations + OpNot // not a + OpAnd // a and b + OpOr // a or b + + // Control Flow + OpJump // Unconditional jump [offset] + OpJumpIfTrue // Jump if top of stack is true [offset] + OpJumpIfFalse // Jump if top of stack is false [offset] + OpCall // Call function [argCount] + OpReturn // Return from function + OpReturnNil // Return nil from function + + // Table Operations + OpNewTable // Create new empty table + OpGetIndex // table[key] -> value + OpSetIndex // table[key] = value + OpGetField // table.field -> value [fieldIdx] + OpSetField // table.field = value [fieldIdx] + OpTableInsert // Insert value into table at next index + + // Struct Operations + OpNewStruct // Create new struct instance [structId] + OpGetProperty // struct.field -> value [fieldIdx] + OpSetProperty // struct.field = value [fieldIdx] + OpCallMethod // Call method on struct [methodIdx, argCount] + + // Function Operations + OpClosure // Create closure from function [funcIdx, upvalueCount] + OpGetUpvalue // Get upvalue [idx] + OpSetUpvalue // Set upvalue [idx] + OpCloseUpvalue // Close upvalue (move to heap) + + // Array Operations + OpNewArray // Create new array with size [size] + OpArrayAppend // Append value to array + + // Type Operations + OpGetType // Get type of value on stack + OpCast // Cast value to type [typeId] + + // I/O Operations + OpEcho // Echo value to output + OpExit // Exit with code + + // Special Operations + OpNoop // No operation + OpBreak // Break from loop + OpContinue // Continue loop iteration + + // Debug Operations + OpDebugPrint // Debug print stack top + OpDebugStack // Debug print entire stack +) + +// Instruction represents a single bytecode instruction with operands +type Instruction struct { + Op Opcode + Operands []uint16 // Variable length operands +} + +// Chunk represents a compiled chunk of bytecode +type Chunk struct { + Code []uint8 // Raw bytecode stream + Constants []Value // Constant pool + Lines []int // Line numbers for debugging + Functions []Function // Function definitions + Structs []Struct // Struct definitions +} + +// Value represents a runtime value in the VM +type Value struct { + Type ValueType + Data any // Actual value data +} + +// ValueType represents the type of a runtime value +type ValueType uint8 + +const ( + ValueNil ValueType = iota + ValueBool + ValueNumber + ValueString + ValueTable + ValueFunction + ValueStruct + ValueArray + ValueUpvalue +) + +// Function represents a compiled function +type Function struct { + Name string // Function name (empty for anonymous) + Arity int // Number of parameters + Variadic bool // Whether function accepts variable args + LocalCount int // Number of local variable slots + UpvalCount int // Number of upvalues + Chunk Chunk // Function bytecode + Defaults []Value // Default parameter values +} + +// Struct represents a compiled struct definition +type Struct struct { + Name string // Struct name + Fields []StructField // Field definitions + Methods map[string]uint16 // Method name -> function index + ID uint16 // Unique struct identifier +} + +// StructField represents a field in a struct +type StructField struct { + Name string // Field name + Type ValueType // Field type + Offset uint16 // Offset in struct layout +} + +// Table represents a key-value table/map +type Table struct { + Array map[int]Value // Array part (integer keys) + Hash map[string]Value // Hash part (string keys) + Meta *Table // Metatable for operations +} + +// Array represents a dynamic array +type Array struct { + Elements []Value // Array elements + Count int // Current element count + Capacity int // Current capacity +} + +// StructInstance represents an instance of a struct +type StructInstance struct { + StructID uint16 // Reference to struct definition + Fields map[string]Value // Field values +} + +// Upvalue represents a captured variable +type Upvalue struct { + Location *Value // Pointer to actual value location + Closed Value // Closed-over value (when moved to heap) + IsClosed bool // Whether upvalue has been closed +} + +// Instruction encoding helpers + +// EncodeInstruction encodes an instruction into bytecode +func EncodeInstruction(op Opcode, operands ...uint16) []uint8 { + bytes := []uint8{uint8(op)} + for _, operand := range operands { + bytes = append(bytes, uint8(operand&0xFF), uint8(operand>>8)) + } + return bytes +} + +// DecodeInstruction decodes bytecode into instruction +func DecodeInstruction(code []uint8, offset int) (Opcode, []uint16, int) { + if offset >= len(code) { + return OpNoop, nil, offset + } + + op := Opcode(code[offset]) + operands := []uint16{} + nextOffset := offset + 1 + + // Decode operands based on instruction type + operandCount := GetOperandCount(op) + for range operandCount { + if nextOffset+1 >= len(code) { + break + } + operand := uint16(code[nextOffset]) | (uint16(code[nextOffset+1]) << 8) + operands = append(operands, operand) + nextOffset += 2 + } + + return op, operands, nextOffset +} + +// GetOperandCount returns the number of operands for an instruction +func GetOperandCount(op Opcode) int { + switch op { + case OpLoadConst, OpLoadLocal, OpStoreLocal, OpLoadGlobal, OpStoreGlobal: + return 1 + case OpJump, OpJumpIfTrue, OpJumpIfFalse: + return 1 + case OpCall, OpNewStruct, OpGetField, OpSetField, OpGetProperty, OpSetProperty: + return 1 + case OpCallMethod: + return 2 + case OpClosure: + return 2 + case OpNewArray, OpCast: + return 1 + default: + return 0 + } +} + +// Instruction size calculation +func InstructionSize(op Opcode) int { + return 1 + (GetOperandCount(op) * 2) // 1 byte opcode + 2 bytes per operand +} diff --git a/compiler/compiler.go b/compiler/compiler.go new file mode 100644 index 0000000..5b4fb37 --- /dev/null +++ b/compiler/compiler.go @@ -0,0 +1,290 @@ +package compiler + +import "fmt" + +// Constants for compiler limits +const ( + MaxLocals = 256 // Maximum local variables per function + MaxUpvalues = 256 // Maximum upvalues per function + MaxConstants = 65536 // Maximum constants per chunk +) + +// CompilerState holds state during compilation +type CompilerState struct { + Chunk *Chunk // Current chunk being compiled + Constants map[string]int // Constant pool index mapping + Functions []Function // Compiled functions + Structs []Struct // Compiled structs + Locals []Local // Local variable stack + Upvalues []UpvalueRef // Upvalue definitions + ScopeDepth int // Current scope nesting level + FunctionType FunctionType // Type of function being compiled + BreakJumps []int // Break jump addresses for loops + ContinueJumps []int // Continue jump addresses for loops + LoopStart int // Start of current loop for continue + LoopDepth int // Current loop nesting depth +} + +// Local represents a local variable during compilation +type Local struct { + Name string // Variable name + Depth int // Scope depth where declared + IsCaptured bool // Whether variable is captured by closure + Slot int // Stack slot index +} + +// UpvalueRef represents an upvalue reference during compilation +type UpvalueRef struct { + Index uint8 // Index in enclosing function's locals or upvalues + IsLocal bool // True if captures local, false if captures upvalue +} + +// FunctionType represents the type of function being compiled +type FunctionType uint8 + +const ( + FunctionTypeScript FunctionType = iota // Top-level script + FunctionTypeFunction // Regular function + FunctionTypeMethod // Struct method +) + +// CompileError represents a compilation error with location information +type CompileError struct { + Message string + Line int + Column int +} + +func (ce CompileError) Error() string { + return fmt.Sprintf("Compile error at line %d, column %d: %s", ce.Line, ce.Column, ce.Message) +} + +// NewCompilerState creates a new compiler state for compilation +func NewCompilerState(functionType FunctionType) *CompilerState { + return &CompilerState{ + Chunk: NewChunk(), + Constants: make(map[string]int), + Functions: make([]Function, 0), + Structs: make([]Struct, 0), + Locals: make([]Local, 0, MaxLocals), + Upvalues: make([]UpvalueRef, 0, MaxUpvalues), + ScopeDepth: 0, + FunctionType: functionType, + BreakJumps: make([]int, 0), + ContinueJumps: make([]int, 0), + LoopStart: -1, + LoopDepth: 0, + } +} + +// NewChunk creates a new bytecode chunk +func NewChunk() *Chunk { + return &Chunk{ + Code: make([]uint8, 0, 256), + Constants: make([]Value, 0, 64), + Lines: make([]int, 0, 256), + Functions: make([]Function, 0), + Structs: make([]Struct, 0), + } +} + +// Scope management methods +func (cs *CompilerState) BeginScope() { + cs.ScopeDepth++ +} + +func (cs *CompilerState) EndScope() { + cs.ScopeDepth-- + + // Remove locals that go out of scope + for len(cs.Locals) > 0 && cs.Locals[len(cs.Locals)-1].Depth > cs.ScopeDepth { + local := cs.Locals[len(cs.Locals)-1] + if local.IsCaptured { + // Emit close upvalue instruction + cs.EmitByte(uint8(OpCloseUpvalue)) + } else { + // Emit pop instruction + cs.EmitByte(uint8(OpPop)) + } + cs.Locals = cs.Locals[:len(cs.Locals)-1] + } +} + +// Local variable management +func (cs *CompilerState) AddLocal(name string) error { + if len(cs.Locals) >= MaxLocals { + return CompileError{ + Message: "too many local variables in function", + } + } + + local := Local{ + Name: name, + Depth: -1, // Mark as uninitialized + IsCaptured: false, + Slot: len(cs.Locals), + } + + cs.Locals = append(cs.Locals, local) + return nil +} + +func (cs *CompilerState) MarkInitialized() { + if len(cs.Locals) > 0 { + cs.Locals[len(cs.Locals)-1].Depth = cs.ScopeDepth + } +} + +func (cs *CompilerState) ResolveLocal(name string) int { + for i := len(cs.Locals) - 1; i >= 0; i-- { + local := &cs.Locals[i] + if local.Name == name { + if local.Depth == -1 { + // Variable used before initialization + return -2 + } + return i + } + } + return -1 +} + +// Upvalue management +func (cs *CompilerState) AddUpvalue(index uint8, isLocal bool) int { + upvalueCount := len(cs.Upvalues) + + // Check if upvalue already exists + for i := range upvalueCount { + upvalue := &cs.Upvalues[i] + if upvalue.Index == index && upvalue.IsLocal == isLocal { + return i + } + } + + if upvalueCount >= MaxUpvalues { + return -1 // Too many upvalues + } + + cs.Upvalues = append(cs.Upvalues, UpvalueRef{ + Index: index, + IsLocal: isLocal, + }) + + return upvalueCount +} + +// Constant pool management +func (cs *CompilerState) AddConstant(value Value) int { + // Check if constant already exists to avoid duplicates + key := cs.valueKey(value) + if index, exists := cs.Constants[key]; exists { + return index + } + + if len(cs.Chunk.Constants) >= MaxConstants { + return -1 // Too many constants + } + + index := len(cs.Chunk.Constants) + cs.Chunk.Constants = append(cs.Chunk.Constants, value) + cs.Constants[key] = index + return index +} + +// Generate unique key for value in constant pool +func (cs *CompilerState) valueKey(value Value) string { + switch value.Type { + case ValueNil: + return "nil" + case ValueBool: + if value.Data.(bool) { + return "bool:true" + } + return "bool:false" + case ValueNumber: + return fmt.Sprintf("number:%g", value.Data.(float64)) + case ValueString: + return fmt.Sprintf("string:%s", value.Data.(string)) + default: + // For complex types, use memory address as fallback + return fmt.Sprintf("%T:%p", value.Data, value.Data) + } +} + +// Bytecode emission methods +func (cs *CompilerState) EmitByte(byte uint8) { + cs.Chunk.Code = append(cs.Chunk.Code, byte) + cs.Chunk.Lines = append(cs.Chunk.Lines, 0) // Line will be set by caller +} + +func (cs *CompilerState) EmitBytes(bytes ...uint8) { + for _, b := range bytes { + cs.EmitByte(b) + } +} + +func (cs *CompilerState) EmitInstruction(op Opcode, operands ...uint16) { + bytes := EncodeInstruction(op, operands...) + cs.EmitBytes(bytes...) +} + +func (cs *CompilerState) EmitJump(op Opcode) int { + cs.EmitByte(uint8(op)) + cs.EmitByte(0xFF) // Placeholder + cs.EmitByte(0xFF) // Placeholder + return len(cs.Chunk.Code) - 2 // Return offset of jump address +} + +func (cs *CompilerState) PatchJump(offset int) { + // Calculate jump distance + jump := len(cs.Chunk.Code) - offset - 2 + + if jump > 65535 { + // Jump too large - would need long jump instruction + return + } + + cs.Chunk.Code[offset] = uint8(jump & 0xFF) + cs.Chunk.Code[offset+1] = uint8((jump >> 8) & 0xFF) +} + +// Loop management +func (cs *CompilerState) EnterLoop() { + cs.LoopStart = len(cs.Chunk.Code) + cs.LoopDepth++ +} + +func (cs *CompilerState) ExitLoop() { + cs.LoopDepth-- + if cs.LoopDepth == 0 { + cs.LoopStart = -1 + } + + // Patch break jumps + for _, jumpOffset := range cs.BreakJumps { + cs.PatchJump(jumpOffset) + } + cs.BreakJumps = cs.BreakJumps[:0] + + // Patch continue jumps + for _, jumpOffset := range cs.ContinueJumps { + jump := cs.LoopStart - jumpOffset - 2 + if jump < 65535 { + cs.Chunk.Code[jumpOffset] = uint8(jump & 0xFF) + cs.Chunk.Code[jumpOffset+1] = uint8((jump >> 8) & 0xFF) + } + } + cs.ContinueJumps = cs.ContinueJumps[:0] +} + +func (cs *CompilerState) EmitBreak() { + jumpOffset := cs.EmitJump(OpJump) + cs.BreakJumps = append(cs.BreakJumps, jumpOffset) +} + +func (cs *CompilerState) EmitContinue() { + if cs.LoopStart != -1 { + jumpOffset := cs.EmitJump(OpJump) + cs.ContinueJumps = append(cs.ContinueJumps, jumpOffset) + } +}