package compiler import "fmt" // Constants for compiler limits const ( MaxLocals = 256 // Maximum local variables per function MaxUpvalues = 256 // Maximum upvalues per function MaxConstants = 65536 // Maximum constants per chunk ) // CompilerState holds state during compilation type CompilerState struct { Chunk *Chunk // Current chunk being compiled Constants map[string]int // Constant pool index mapping for deduplication Functions []Function // Compiled functions Structs []Struct // Compiled structs Locals []Local // Local variable stack Upvalues []UpvalueRef // Upvalue definitions ScopeDepth int // Current scope nesting level FunctionType FunctionType // Type of function being compiled BreakJumps []int // Break jump addresses for loops ContinueJumps []int // Continue jump addresses for loops LoopStart int // Start of current loop for continue LoopDepth int // Current loop nesting depth parent *CompilerState // Parent compiler state for nested functions CurrentLine int // Current source line being compiled } // Local represents a local variable during compilation type Local struct { Name string // Variable name Depth int // Scope depth where declared IsCaptured bool // Whether variable is captured by closure Slot int // Stack slot index } // UpvalueRef represents an upvalue reference during compilation type UpvalueRef struct { Index uint8 // Index in enclosing function's locals or upvalues IsLocal bool // True if captures local, false if captures upvalue } // FunctionType represents the type of function being compiled type FunctionType uint8 const ( FunctionTypeScript FunctionType = iota // Top-level script FunctionTypeFunction // Regular function FunctionTypeMethod // Struct method ) // CompileError represents a compilation error with location information type CompileError struct { Message string Line int Column int } func (ce CompileError) Error() string { return fmt.Sprintf("Compile error at line %d, column %d: %s", ce.Line, ce.Column, ce.Message) } // NewCompilerState creates a new compiler state for compilation func NewCompilerState(functionType FunctionType) *CompilerState { return &CompilerState{ Chunk: NewChunk(), Constants: make(map[string]int), Functions: make([]Function, 0), Structs: make([]Struct, 0), Locals: make([]Local, 0, MaxLocals), Upvalues: make([]UpvalueRef, 0, MaxUpvalues), ScopeDepth: 0, FunctionType: functionType, BreakJumps: make([]int, 0), ContinueJumps: make([]int, 0), LoopStart: -1, LoopDepth: 0, parent: nil, } } // NewChunk creates a new bytecode chunk func NewChunk() *Chunk { return &Chunk{ Code: make([]uint8, 0, 256), Constants: make([]Value, 0, 64), Lines: make([]int, 0, 256), Functions: make([]Function, 0), Structs: make([]Struct, 0), } } // Scope management methods func (cs *CompilerState) BeginScope() { cs.ScopeDepth++ } func (cs *CompilerState) EndScope() { cs.ScopeDepth-- // Remove locals that go out of scope for len(cs.Locals) > 0 && cs.Locals[len(cs.Locals)-1].Depth > cs.ScopeDepth { local := cs.Locals[len(cs.Locals)-1] if local.IsCaptured { cs.EmitByte(uint8(OpCloseUpvalue)) } else { cs.EmitByte(uint8(OpPop)) } cs.Locals = cs.Locals[:len(cs.Locals)-1] } } // Local variable management func (cs *CompilerState) AddLocal(name string) error { if len(cs.Locals) >= MaxLocals { return CompileError{ Message: "too many local variables in function", } } local := Local{ Name: name, Depth: -1, // Mark as uninitialized IsCaptured: false, Slot: len(cs.Locals), } cs.Locals = append(cs.Locals, local) return nil } func (cs *CompilerState) MarkInitialized() { if len(cs.Locals) > 0 { cs.Locals[len(cs.Locals)-1].Depth = cs.ScopeDepth } } func (cs *CompilerState) ResolveLocal(name string) int { for i := len(cs.Locals) - 1; i >= 0; i-- { local := &cs.Locals[i] if local.Name == name { if local.Depth == -1 { return -2 // Variable used before initialization } return i } } return -1 } // Upvalue management func (cs *CompilerState) AddUpvalue(index uint8, isLocal bool) int { upvalueCount := len(cs.Upvalues) // Check if upvalue already exists for i := range upvalueCount { upvalue := &cs.Upvalues[i] if upvalue.Index == index && upvalue.IsLocal == isLocal { return i } } if upvalueCount >= MaxUpvalues { return -1 // Too many upvalues } cs.Upvalues = append(cs.Upvalues, UpvalueRef{ Index: index, IsLocal: isLocal, }) return upvalueCount } // Enhanced constant pool management with better deduplication func (cs *CompilerState) AddConstant(value Value) int { // Generate unique key for deduplication key := cs.valueKey(value) if index, exists := cs.Constants[key]; exists { return index } if len(cs.Chunk.Constants) >= MaxConstants { return -1 // Too many constants } index := len(cs.Chunk.Constants) cs.Chunk.Constants = append(cs.Chunk.Constants, value) cs.Constants[key] = index return index } // Enhanced value key generation for better deduplication func (cs *CompilerState) valueKey(value Value) string { switch value.Type { case ValueNil: return "nil" case ValueBool: if value.Data.(bool) { return "bool:true" } return "bool:false" case ValueNumber: num := value.Data.(float64) // Handle special numeric values if num == 0 { return "number:0" } else if num == 1 { return "number:1" } else if num == -1 { return "number:-1" } return fmt.Sprintf("number:%g", num) case ValueString: str := value.Data.(string) if str == "" { return "string:empty" } // For very long strings, just use a hash to avoid memory issues if len(str) > 100 { return fmt.Sprintf("string:hash:%d", cs.simpleHash(str)) } return fmt.Sprintf("string:%s", str) default: // For complex types, use memory address as fallback return fmt.Sprintf("%T:%p", value.Data, value.Data) } } // Simple hash function for long strings func (cs *CompilerState) simpleHash(s string) uint32 { var hash uint32 for _, c := range s { hash = hash*31 + uint32(c) } return hash } // Optimized bytecode emission methods func (cs *CompilerState) EmitByte(byte uint8) { cs.Chunk.Code = append(cs.Chunk.Code, byte) cs.Chunk.Lines = append(cs.Chunk.Lines, cs.CurrentLine) } func (cs *CompilerState) EmitBytes(bytes ...uint8) { for _, b := range bytes { cs.EmitByte(b) } } func (cs *CompilerState) EmitInstruction(op Opcode, operands ...uint16) { bytes := EncodeInstruction(op, operands...) cs.EmitBytes(bytes...) } // Optimized jump emission with better jump distance calculation func (cs *CompilerState) EmitJump(op Opcode) int { cs.EmitByte(uint8(op)) cs.EmitByte(0xFF) // Placeholder cs.EmitByte(0xFF) // Placeholder return len(cs.Chunk.Code) - 2 } func (cs *CompilerState) PatchJump(offset int) { jump := len(cs.Chunk.Code) - offset - 2 if jump > 65535 { // Jump distance too large - could implement long jumps here return } cs.Chunk.Code[offset] = uint8(jump & 0xFF) cs.Chunk.Code[offset+1] = uint8((jump >> 8) & 0xFF) } // Enhanced loop management with optimization support func (cs *CompilerState) EnterLoop() { cs.LoopStart = len(cs.Chunk.Code) cs.LoopDepth++ // Clear previous jump lists for new loop cs.BreakJumps = cs.BreakJumps[:0] cs.ContinueJumps = cs.ContinueJumps[:0] } func (cs *CompilerState) ExitLoop() { cs.LoopDepth-- if cs.LoopDepth == 0 { cs.LoopStart = -1 } // Patch break jumps to current position for _, jumpOffset := range cs.BreakJumps { cs.PatchJump(jumpOffset) } cs.BreakJumps = cs.BreakJumps[:0] // Patch continue jumps to loop start for _, jumpOffset := range cs.ContinueJumps { if cs.LoopStart != -1 { jump := jumpOffset - cs.LoopStart + 2 if jump < 65535 && jump >= 0 { cs.Chunk.Code[jumpOffset] = uint8(jump & 0xFF) cs.Chunk.Code[jumpOffset+1] = uint8((jump >> 8) & 0xFF) } } } cs.ContinueJumps = cs.ContinueJumps[:0] } func (cs *CompilerState) EmitBreak() { jumpOffset := cs.EmitJump(OpJump) cs.BreakJumps = append(cs.BreakJumps, jumpOffset) } func (cs *CompilerState) EmitContinue() { if cs.LoopStart != -1 { jumpOffset := cs.EmitJump(OpJump) cs.ContinueJumps = append(cs.ContinueJumps, jumpOffset) } } // Optimized instruction emission helpers func (cs *CompilerState) EmitLoadConstant(value Value) { switch value.Type { case ValueNil: cs.EmitInstruction(OpLoadNil) case ValueBool: if value.Data.(bool) { cs.EmitInstruction(OpLoadTrue) } else { cs.EmitInstruction(OpLoadFalse) } case ValueNumber: num := value.Data.(float64) if num == 0 { cs.EmitInstruction(OpLoadZero) } else if num == 1 { cs.EmitInstruction(OpLoadOne) } else { index := cs.AddConstant(value) if index != -1 { cs.EmitInstruction(OpLoadConst, uint16(index)) } } default: index := cs.AddConstant(value) if index != -1 { cs.EmitInstruction(OpLoadConst, uint16(index)) } } } func (cs *CompilerState) EmitLoadLocal(slot int) { switch slot { case 0: cs.EmitInstruction(OpLoadLocal0) case 1: cs.EmitInstruction(OpLoadLocal1) case 2: cs.EmitInstruction(OpLoadLocal2) default: cs.EmitInstruction(OpLoadLocal, uint16(slot)) } } func (cs *CompilerState) EmitStoreLocal(slot int) { switch slot { case 0: cs.EmitInstruction(OpStoreLocal0) case 1: cs.EmitInstruction(OpStoreLocal1) case 2: cs.EmitInstruction(OpStoreLocal2) default: cs.EmitInstruction(OpStoreLocal, uint16(slot)) } } // Enhanced instruction pattern detection for optimization func (cs *CompilerState) GetLastInstruction() (Opcode, []uint16) { if len(cs.Chunk.Code) == 0 { return OpNoop, nil } // Find the last complete instruction for i := len(cs.Chunk.Code) - 1; i >= 0; { op := Opcode(cs.Chunk.Code[i]) operandCount := GetOperandCount(op) if i >= operandCount*2 { // This is a complete instruction operands := make([]uint16, operandCount) for j := 0; j < operandCount; j++ { if i+1+j*2 < len(cs.Chunk.Code) && i+2+j*2 < len(cs.Chunk.Code) { operands[j] = uint16(cs.Chunk.Code[i+1+j*2]) | (uint16(cs.Chunk.Code[i+2+j*2]) << 8) } } return op, operands } i-- } return OpNoop, nil } // Replace last instruction (for peephole optimization) func (cs *CompilerState) ReplaceLastInstruction(op Opcode, operands ...uint16) bool { if len(cs.Chunk.Code) == 0 { return false } // Find last instruction lastOp, _ := cs.GetLastInstruction() lastSize := InstructionSize(lastOp) if len(cs.Chunk.Code) < lastSize { return false } // Remove last instruction cs.Chunk.Code = cs.Chunk.Code[:len(cs.Chunk.Code)-lastSize] cs.Chunk.Lines = cs.Chunk.Lines[:len(cs.Chunk.Lines)-lastSize] // Emit new instruction cs.EmitInstruction(op, operands...) return true } // Dead code elimination support func (cs *CompilerState) MarkUnreachable(start, end int) { if start >= 0 && end <= len(cs.Chunk.Code) { for i := start; i < end; i++ { cs.Chunk.Code[i] = uint8(OpNoop) } } } // Optimization statistics tracking type OptimizationStats struct { ConstantsFolded int InstructionsOpt int DeadCodeEliminated int JumpsOptimized int ConstantsDeduped int } func (cs *CompilerState) GetOptimizationStats() OptimizationStats { // Count specialized instructions used specialized := 0 noops := 0 constantsDeduped := len(cs.Constants) - len(cs.Chunk.Constants) for i := 0; i < len(cs.Chunk.Code); { op, _, next := DecodeInstruction(cs.Chunk.Code, i) if IsSpecializedInstruction(op) { specialized++ } if op == OpNoop { noops++ } i = next } return OptimizationStats{ InstructionsOpt: specialized, DeadCodeEliminated: noops, ConstantsDeduped: constantsDeduped, } } func (cs *CompilerState) SetLine(line int) { cs.CurrentLine = line } // Enhanced debugging support func (cs *CompilerState) PrintChunk(name string) { fmt.Printf("== %s ==\n", name) fmt.Printf("Constants: %d\n", len(cs.Chunk.Constants)) fmt.Printf("Functions: %d\n", len(cs.Chunk.Functions)) fmt.Printf("Structs: %d\n", len(cs.Chunk.Structs)) fmt.Printf("Code size: %d bytes\n", len(cs.Chunk.Code)) stats := cs.GetOptimizationStats() fmt.Printf("Optimizations: %d specialized, %d dead eliminated, %d constants deduped\n", stats.InstructionsOpt, stats.DeadCodeEliminated, stats.ConstantsDeduped) fmt.Println() for offset := 0; offset < len(cs.Chunk.Code); { offset = cs.disassembleInstruction(offset) } if len(cs.Chunk.Constants) > 0 { fmt.Println("\nConstants:") for i, constant := range cs.Chunk.Constants { fmt.Printf("%4d: ", i) cs.printValue(constant) fmt.Println() } } } func (cs *CompilerState) disassembleInstruction(offset int) int { fmt.Printf("%04d ", offset) if offset > 0 && len(cs.Chunk.Lines) > offset && len(cs.Chunk.Lines) > offset-1 && cs.Chunk.Lines[offset] == cs.Chunk.Lines[offset-1] { fmt.Print(" | ") } else if len(cs.Chunk.Lines) > offset { fmt.Printf("%4d ", cs.Chunk.Lines[offset]) } else { fmt.Print(" ? ") } if offset >= len(cs.Chunk.Code) { fmt.Println("END") return offset + 1 } instruction := cs.Chunk.Code[offset] op := Opcode(instruction) if name, exists := opcodeNames[op]; exists { fmt.Printf("%-16s", name) } else { fmt.Printf("UNKNOWN_%02x ", instruction) } switch op { case OpLoadConst: return cs.constantInstruction(offset) case OpLoadLocal, OpStoreLocal, OpAddConst, OpSubConst, OpInc, OpDec: return cs.byteInstruction(offset) case OpJump, OpJumpIfTrue, OpJumpIfFalse: return cs.jumpInstruction(offset, 1) case OpLoopBack: return cs.jumpInstruction(offset, -1) case OpGetLocalField, OpSetLocalField, OpTestAndJump: return cs.doubleByteInstruction(offset) default: fmt.Println() return offset + 1 } } func (cs *CompilerState) constantInstruction(offset int) int { if offset+2 >= len(cs.Chunk.Code) { fmt.Println(" [incomplete]") return offset + 1 } constant := uint16(cs.Chunk.Code[offset+1]) | (uint16(cs.Chunk.Code[offset+2]) << 8) fmt.Printf(" %4d '", constant) if int(constant) < len(cs.Chunk.Constants) { cs.printValue(cs.Chunk.Constants[constant]) } else { fmt.Print("???") } fmt.Println("'") return offset + 3 } func (cs *CompilerState) byteInstruction(offset int) int { if offset+2 >= len(cs.Chunk.Code) { fmt.Println(" [incomplete]") return offset + 1 } slot := uint16(cs.Chunk.Code[offset+1]) | (uint16(cs.Chunk.Code[offset+2]) << 8) fmt.Printf(" %4d\n", slot) return offset + 3 } func (cs *CompilerState) doubleByteInstruction(offset int) int { if offset+4 >= len(cs.Chunk.Code) { fmt.Println(" [incomplete]") return offset + 1 } arg1 := uint16(cs.Chunk.Code[offset+1]) | (uint16(cs.Chunk.Code[offset+2]) << 8) arg2 := uint16(cs.Chunk.Code[offset+3]) | (uint16(cs.Chunk.Code[offset+4]) << 8) fmt.Printf(" %4d %4d\n", arg1, arg2) return offset + 5 } func (cs *CompilerState) jumpInstruction(offset int, sign int) int { if offset+2 >= len(cs.Chunk.Code) { fmt.Println(" [incomplete]") return offset + 1 } jump := uint16(cs.Chunk.Code[offset+1]) | (uint16(cs.Chunk.Code[offset+2]) << 8) target := offset + 3 + sign*int(jump) fmt.Printf(" %4d -> %d\n", jump, target) return offset + 3 } func (cs *CompilerState) printValue(value Value) { switch value.Type { case ValueNil: fmt.Print("nil") case ValueBool: if value.Data.(bool) { fmt.Print("true") } else { fmt.Print("false") } case ValueNumber: fmt.Printf("%.6g", value.Data.(float64)) case ValueString: str := value.Data.(string) if len(str) > 50 { fmt.Printf("\"%s...\"", str[:47]) } else { fmt.Printf("\"%s\"", str) } default: fmt.Printf("<%s>", cs.valueTypeString(value.Type)) } } func (cs *CompilerState) valueTypeString(vt ValueType) string { switch vt { case ValueTable: return "table" case ValueFunction: return "function" case ValueStruct: return "struct" case ValueArray: return "array" case ValueUpvalue: return "upvalue" default: return "unknown" } }