compiler start
This commit is contained in:
parent
30e4b11a96
commit
53cdb95b6e
234
compiler/bytecode.go
Normal file
234
compiler/bytecode.go
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
package compiler
|
||||||
|
|
||||||
|
// Opcode represents a single bytecode instruction
|
||||||
|
type Opcode uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Stack Operations
|
||||||
|
OpLoadConst Opcode = iota // Load constant onto stack [idx]
|
||||||
|
OpLoadLocal // Load local variable [slot]
|
||||||
|
OpStoreLocal // Store top of stack to local [slot]
|
||||||
|
OpLoadGlobal // Load global variable [idx]
|
||||||
|
OpStoreGlobal // Store top of stack to global [idx]
|
||||||
|
OpPop // Pop top value from stack
|
||||||
|
OpDup // Duplicate top value on stack
|
||||||
|
|
||||||
|
// Arithmetic Operations
|
||||||
|
OpAdd // a + b
|
||||||
|
OpSub // a - b
|
||||||
|
OpMul // a * b
|
||||||
|
OpDiv // a / b
|
||||||
|
OpNeg // -a
|
||||||
|
OpMod // a % b
|
||||||
|
|
||||||
|
// Comparison Operations
|
||||||
|
OpEq // a == b
|
||||||
|
OpNeq // a != b
|
||||||
|
OpLt // a < b
|
||||||
|
OpLte // a <= b
|
||||||
|
OpGt // a > b
|
||||||
|
OpGte // a >= b
|
||||||
|
|
||||||
|
// Logical Operations
|
||||||
|
OpNot // not a
|
||||||
|
OpAnd // a and b
|
||||||
|
OpOr // a or b
|
||||||
|
|
||||||
|
// Control Flow
|
||||||
|
OpJump // Unconditional jump [offset]
|
||||||
|
OpJumpIfTrue // Jump if top of stack is true [offset]
|
||||||
|
OpJumpIfFalse // Jump if top of stack is false [offset]
|
||||||
|
OpCall // Call function [argCount]
|
||||||
|
OpReturn // Return from function
|
||||||
|
OpReturnNil // Return nil from function
|
||||||
|
|
||||||
|
// Table Operations
|
||||||
|
OpNewTable // Create new empty table
|
||||||
|
OpGetIndex // table[key] -> value
|
||||||
|
OpSetIndex // table[key] = value
|
||||||
|
OpGetField // table.field -> value [fieldIdx]
|
||||||
|
OpSetField // table.field = value [fieldIdx]
|
||||||
|
OpTableInsert // Insert value into table at next index
|
||||||
|
|
||||||
|
// Struct Operations
|
||||||
|
OpNewStruct // Create new struct instance [structId]
|
||||||
|
OpGetProperty // struct.field -> value [fieldIdx]
|
||||||
|
OpSetProperty // struct.field = value [fieldIdx]
|
||||||
|
OpCallMethod // Call method on struct [methodIdx, argCount]
|
||||||
|
|
||||||
|
// Function Operations
|
||||||
|
OpClosure // Create closure from function [funcIdx, upvalueCount]
|
||||||
|
OpGetUpvalue // Get upvalue [idx]
|
||||||
|
OpSetUpvalue // Set upvalue [idx]
|
||||||
|
OpCloseUpvalue // Close upvalue (move to heap)
|
||||||
|
|
||||||
|
// Array Operations
|
||||||
|
OpNewArray // Create new array with size [size]
|
||||||
|
OpArrayAppend // Append value to array
|
||||||
|
|
||||||
|
// Type Operations
|
||||||
|
OpGetType // Get type of value on stack
|
||||||
|
OpCast // Cast value to type [typeId]
|
||||||
|
|
||||||
|
// I/O Operations
|
||||||
|
OpEcho // Echo value to output
|
||||||
|
OpExit // Exit with code
|
||||||
|
|
||||||
|
// Special Operations
|
||||||
|
OpNoop // No operation
|
||||||
|
OpBreak // Break from loop
|
||||||
|
OpContinue // Continue loop iteration
|
||||||
|
|
||||||
|
// Debug Operations
|
||||||
|
OpDebugPrint // Debug print stack top
|
||||||
|
OpDebugStack // Debug print entire stack
|
||||||
|
)
|
||||||
|
|
||||||
|
// Instruction represents a single bytecode instruction with operands
|
||||||
|
type Instruction struct {
|
||||||
|
Op Opcode
|
||||||
|
Operands []uint16 // Variable length operands
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunk represents a compiled chunk of bytecode
|
||||||
|
type Chunk struct {
|
||||||
|
Code []uint8 // Raw bytecode stream
|
||||||
|
Constants []Value // Constant pool
|
||||||
|
Lines []int // Line numbers for debugging
|
||||||
|
Functions []Function // Function definitions
|
||||||
|
Structs []Struct // Struct definitions
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value represents a runtime value in the VM
|
||||||
|
type Value struct {
|
||||||
|
Type ValueType
|
||||||
|
Data any // Actual value data
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValueType represents the type of a runtime value
|
||||||
|
type ValueType uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
ValueNil ValueType = iota
|
||||||
|
ValueBool
|
||||||
|
ValueNumber
|
||||||
|
ValueString
|
||||||
|
ValueTable
|
||||||
|
ValueFunction
|
||||||
|
ValueStruct
|
||||||
|
ValueArray
|
||||||
|
ValueUpvalue
|
||||||
|
)
|
||||||
|
|
||||||
|
// Function represents a compiled function
|
||||||
|
type Function struct {
|
||||||
|
Name string // Function name (empty for anonymous)
|
||||||
|
Arity int // Number of parameters
|
||||||
|
Variadic bool // Whether function accepts variable args
|
||||||
|
LocalCount int // Number of local variable slots
|
||||||
|
UpvalCount int // Number of upvalues
|
||||||
|
Chunk Chunk // Function bytecode
|
||||||
|
Defaults []Value // Default parameter values
|
||||||
|
}
|
||||||
|
|
||||||
|
// Struct represents a compiled struct definition
|
||||||
|
type Struct struct {
|
||||||
|
Name string // Struct name
|
||||||
|
Fields []StructField // Field definitions
|
||||||
|
Methods map[string]uint16 // Method name -> function index
|
||||||
|
ID uint16 // Unique struct identifier
|
||||||
|
}
|
||||||
|
|
||||||
|
// StructField represents a field in a struct
|
||||||
|
type StructField struct {
|
||||||
|
Name string // Field name
|
||||||
|
Type ValueType // Field type
|
||||||
|
Offset uint16 // Offset in struct layout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table represents a key-value table/map
|
||||||
|
type Table struct {
|
||||||
|
Array map[int]Value // Array part (integer keys)
|
||||||
|
Hash map[string]Value // Hash part (string keys)
|
||||||
|
Meta *Table // Metatable for operations
|
||||||
|
}
|
||||||
|
|
||||||
|
// Array represents a dynamic array
|
||||||
|
type Array struct {
|
||||||
|
Elements []Value // Array elements
|
||||||
|
Count int // Current element count
|
||||||
|
Capacity int // Current capacity
|
||||||
|
}
|
||||||
|
|
||||||
|
// StructInstance represents an instance of a struct
|
||||||
|
type StructInstance struct {
|
||||||
|
StructID uint16 // Reference to struct definition
|
||||||
|
Fields map[string]Value // Field values
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upvalue represents a captured variable
|
||||||
|
type Upvalue struct {
|
||||||
|
Location *Value // Pointer to actual value location
|
||||||
|
Closed Value // Closed-over value (when moved to heap)
|
||||||
|
IsClosed bool // Whether upvalue has been closed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instruction encoding helpers
|
||||||
|
|
||||||
|
// EncodeInstruction encodes an instruction into bytecode
|
||||||
|
func EncodeInstruction(op Opcode, operands ...uint16) []uint8 {
|
||||||
|
bytes := []uint8{uint8(op)}
|
||||||
|
for _, operand := range operands {
|
||||||
|
bytes = append(bytes, uint8(operand&0xFF), uint8(operand>>8))
|
||||||
|
}
|
||||||
|
return bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
// DecodeInstruction decodes bytecode into instruction
|
||||||
|
func DecodeInstruction(code []uint8, offset int) (Opcode, []uint16, int) {
|
||||||
|
if offset >= len(code) {
|
||||||
|
return OpNoop, nil, offset
|
||||||
|
}
|
||||||
|
|
||||||
|
op := Opcode(code[offset])
|
||||||
|
operands := []uint16{}
|
||||||
|
nextOffset := offset + 1
|
||||||
|
|
||||||
|
// Decode operands based on instruction type
|
||||||
|
operandCount := GetOperandCount(op)
|
||||||
|
for range operandCount {
|
||||||
|
if nextOffset+1 >= len(code) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
operand := uint16(code[nextOffset]) | (uint16(code[nextOffset+1]) << 8)
|
||||||
|
operands = append(operands, operand)
|
||||||
|
nextOffset += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
return op, operands, nextOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOperandCount returns the number of operands for an instruction
|
||||||
|
func GetOperandCount(op Opcode) int {
|
||||||
|
switch op {
|
||||||
|
case OpLoadConst, OpLoadLocal, OpStoreLocal, OpLoadGlobal, OpStoreGlobal:
|
||||||
|
return 1
|
||||||
|
case OpJump, OpJumpIfTrue, OpJumpIfFalse:
|
||||||
|
return 1
|
||||||
|
case OpCall, OpNewStruct, OpGetField, OpSetField, OpGetProperty, OpSetProperty:
|
||||||
|
return 1
|
||||||
|
case OpCallMethod:
|
||||||
|
return 2
|
||||||
|
case OpClosure:
|
||||||
|
return 2
|
||||||
|
case OpNewArray, OpCast:
|
||||||
|
return 1
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instruction size calculation
|
||||||
|
func InstructionSize(op Opcode) int {
|
||||||
|
return 1 + (GetOperandCount(op) * 2) // 1 byte opcode + 2 bytes per operand
|
||||||
|
}
|
290
compiler/compiler.go
Normal file
290
compiler/compiler.go
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
package compiler
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// Constants for compiler limits
|
||||||
|
const (
|
||||||
|
MaxLocals = 256 // Maximum local variables per function
|
||||||
|
MaxUpvalues = 256 // Maximum upvalues per function
|
||||||
|
MaxConstants = 65536 // Maximum constants per chunk
|
||||||
|
)
|
||||||
|
|
||||||
|
// CompilerState holds state during compilation
|
||||||
|
type CompilerState struct {
|
||||||
|
Chunk *Chunk // Current chunk being compiled
|
||||||
|
Constants map[string]int // Constant pool index mapping
|
||||||
|
Functions []Function // Compiled functions
|
||||||
|
Structs []Struct // Compiled structs
|
||||||
|
Locals []Local // Local variable stack
|
||||||
|
Upvalues []UpvalueRef // Upvalue definitions
|
||||||
|
ScopeDepth int // Current scope nesting level
|
||||||
|
FunctionType FunctionType // Type of function being compiled
|
||||||
|
BreakJumps []int // Break jump addresses for loops
|
||||||
|
ContinueJumps []int // Continue jump addresses for loops
|
||||||
|
LoopStart int // Start of current loop for continue
|
||||||
|
LoopDepth int // Current loop nesting depth
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local represents a local variable during compilation
|
||||||
|
type Local struct {
|
||||||
|
Name string // Variable name
|
||||||
|
Depth int // Scope depth where declared
|
||||||
|
IsCaptured bool // Whether variable is captured by closure
|
||||||
|
Slot int // Stack slot index
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpvalueRef represents an upvalue reference during compilation
|
||||||
|
type UpvalueRef struct {
|
||||||
|
Index uint8 // Index in enclosing function's locals or upvalues
|
||||||
|
IsLocal bool // True if captures local, false if captures upvalue
|
||||||
|
}
|
||||||
|
|
||||||
|
// FunctionType represents the type of function being compiled
|
||||||
|
type FunctionType uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
FunctionTypeScript FunctionType = iota // Top-level script
|
||||||
|
FunctionTypeFunction // Regular function
|
||||||
|
FunctionTypeMethod // Struct method
|
||||||
|
)
|
||||||
|
|
||||||
|
// CompileError represents a compilation error with location information
|
||||||
|
type CompileError struct {
|
||||||
|
Message string
|
||||||
|
Line int
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ce CompileError) Error() string {
|
||||||
|
return fmt.Sprintf("Compile error at line %d, column %d: %s", ce.Line, ce.Column, ce.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCompilerState creates a new compiler state for compilation
|
||||||
|
func NewCompilerState(functionType FunctionType) *CompilerState {
|
||||||
|
return &CompilerState{
|
||||||
|
Chunk: NewChunk(),
|
||||||
|
Constants: make(map[string]int),
|
||||||
|
Functions: make([]Function, 0),
|
||||||
|
Structs: make([]Struct, 0),
|
||||||
|
Locals: make([]Local, 0, MaxLocals),
|
||||||
|
Upvalues: make([]UpvalueRef, 0, MaxUpvalues),
|
||||||
|
ScopeDepth: 0,
|
||||||
|
FunctionType: functionType,
|
||||||
|
BreakJumps: make([]int, 0),
|
||||||
|
ContinueJumps: make([]int, 0),
|
||||||
|
LoopStart: -1,
|
||||||
|
LoopDepth: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewChunk creates a new bytecode chunk
|
||||||
|
func NewChunk() *Chunk {
|
||||||
|
return &Chunk{
|
||||||
|
Code: make([]uint8, 0, 256),
|
||||||
|
Constants: make([]Value, 0, 64),
|
||||||
|
Lines: make([]int, 0, 256),
|
||||||
|
Functions: make([]Function, 0),
|
||||||
|
Structs: make([]Struct, 0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scope management methods
|
||||||
|
func (cs *CompilerState) BeginScope() {
|
||||||
|
cs.ScopeDepth++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EndScope() {
|
||||||
|
cs.ScopeDepth--
|
||||||
|
|
||||||
|
// Remove locals that go out of scope
|
||||||
|
for len(cs.Locals) > 0 && cs.Locals[len(cs.Locals)-1].Depth > cs.ScopeDepth {
|
||||||
|
local := cs.Locals[len(cs.Locals)-1]
|
||||||
|
if local.IsCaptured {
|
||||||
|
// Emit close upvalue instruction
|
||||||
|
cs.EmitByte(uint8(OpCloseUpvalue))
|
||||||
|
} else {
|
||||||
|
// Emit pop instruction
|
||||||
|
cs.EmitByte(uint8(OpPop))
|
||||||
|
}
|
||||||
|
cs.Locals = cs.Locals[:len(cs.Locals)-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local variable management
|
||||||
|
func (cs *CompilerState) AddLocal(name string) error {
|
||||||
|
if len(cs.Locals) >= MaxLocals {
|
||||||
|
return CompileError{
|
||||||
|
Message: "too many local variables in function",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
local := Local{
|
||||||
|
Name: name,
|
||||||
|
Depth: -1, // Mark as uninitialized
|
||||||
|
IsCaptured: false,
|
||||||
|
Slot: len(cs.Locals),
|
||||||
|
}
|
||||||
|
|
||||||
|
cs.Locals = append(cs.Locals, local)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) MarkInitialized() {
|
||||||
|
if len(cs.Locals) > 0 {
|
||||||
|
cs.Locals[len(cs.Locals)-1].Depth = cs.ScopeDepth
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) ResolveLocal(name string) int {
|
||||||
|
for i := len(cs.Locals) - 1; i >= 0; i-- {
|
||||||
|
local := &cs.Locals[i]
|
||||||
|
if local.Name == name {
|
||||||
|
if local.Depth == -1 {
|
||||||
|
// Variable used before initialization
|
||||||
|
return -2
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upvalue management
|
||||||
|
func (cs *CompilerState) AddUpvalue(index uint8, isLocal bool) int {
|
||||||
|
upvalueCount := len(cs.Upvalues)
|
||||||
|
|
||||||
|
// Check if upvalue already exists
|
||||||
|
for i := range upvalueCount {
|
||||||
|
upvalue := &cs.Upvalues[i]
|
||||||
|
if upvalue.Index == index && upvalue.IsLocal == isLocal {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if upvalueCount >= MaxUpvalues {
|
||||||
|
return -1 // Too many upvalues
|
||||||
|
}
|
||||||
|
|
||||||
|
cs.Upvalues = append(cs.Upvalues, UpvalueRef{
|
||||||
|
Index: index,
|
||||||
|
IsLocal: isLocal,
|
||||||
|
})
|
||||||
|
|
||||||
|
return upvalueCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constant pool management
|
||||||
|
func (cs *CompilerState) AddConstant(value Value) int {
|
||||||
|
// Check if constant already exists to avoid duplicates
|
||||||
|
key := cs.valueKey(value)
|
||||||
|
if index, exists := cs.Constants[key]; exists {
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cs.Chunk.Constants) >= MaxConstants {
|
||||||
|
return -1 // Too many constants
|
||||||
|
}
|
||||||
|
|
||||||
|
index := len(cs.Chunk.Constants)
|
||||||
|
cs.Chunk.Constants = append(cs.Chunk.Constants, value)
|
||||||
|
cs.Constants[key] = index
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate unique key for value in constant pool
|
||||||
|
func (cs *CompilerState) valueKey(value Value) string {
|
||||||
|
switch value.Type {
|
||||||
|
case ValueNil:
|
||||||
|
return "nil"
|
||||||
|
case ValueBool:
|
||||||
|
if value.Data.(bool) {
|
||||||
|
return "bool:true"
|
||||||
|
}
|
||||||
|
return "bool:false"
|
||||||
|
case ValueNumber:
|
||||||
|
return fmt.Sprintf("number:%g", value.Data.(float64))
|
||||||
|
case ValueString:
|
||||||
|
return fmt.Sprintf("string:%s", value.Data.(string))
|
||||||
|
default:
|
||||||
|
// For complex types, use memory address as fallback
|
||||||
|
return fmt.Sprintf("%T:%p", value.Data, value.Data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytecode emission methods
|
||||||
|
func (cs *CompilerState) EmitByte(byte uint8) {
|
||||||
|
cs.Chunk.Code = append(cs.Chunk.Code, byte)
|
||||||
|
cs.Chunk.Lines = append(cs.Chunk.Lines, 0) // Line will be set by caller
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EmitBytes(bytes ...uint8) {
|
||||||
|
for _, b := range bytes {
|
||||||
|
cs.EmitByte(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EmitInstruction(op Opcode, operands ...uint16) {
|
||||||
|
bytes := EncodeInstruction(op, operands...)
|
||||||
|
cs.EmitBytes(bytes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EmitJump(op Opcode) int {
|
||||||
|
cs.EmitByte(uint8(op))
|
||||||
|
cs.EmitByte(0xFF) // Placeholder
|
||||||
|
cs.EmitByte(0xFF) // Placeholder
|
||||||
|
return len(cs.Chunk.Code) - 2 // Return offset of jump address
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) PatchJump(offset int) {
|
||||||
|
// Calculate jump distance
|
||||||
|
jump := len(cs.Chunk.Code) - offset - 2
|
||||||
|
|
||||||
|
if jump > 65535 {
|
||||||
|
// Jump too large - would need long jump instruction
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cs.Chunk.Code[offset] = uint8(jump & 0xFF)
|
||||||
|
cs.Chunk.Code[offset+1] = uint8((jump >> 8) & 0xFF)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop management
|
||||||
|
func (cs *CompilerState) EnterLoop() {
|
||||||
|
cs.LoopStart = len(cs.Chunk.Code)
|
||||||
|
cs.LoopDepth++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) ExitLoop() {
|
||||||
|
cs.LoopDepth--
|
||||||
|
if cs.LoopDepth == 0 {
|
||||||
|
cs.LoopStart = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch break jumps
|
||||||
|
for _, jumpOffset := range cs.BreakJumps {
|
||||||
|
cs.PatchJump(jumpOffset)
|
||||||
|
}
|
||||||
|
cs.BreakJumps = cs.BreakJumps[:0]
|
||||||
|
|
||||||
|
// Patch continue jumps
|
||||||
|
for _, jumpOffset := range cs.ContinueJumps {
|
||||||
|
jump := cs.LoopStart - jumpOffset - 2
|
||||||
|
if jump < 65535 {
|
||||||
|
cs.Chunk.Code[jumpOffset] = uint8(jump & 0xFF)
|
||||||
|
cs.Chunk.Code[jumpOffset+1] = uint8((jump >> 8) & 0xFF)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cs.ContinueJumps = cs.ContinueJumps[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EmitBreak() {
|
||||||
|
jumpOffset := cs.EmitJump(OpJump)
|
||||||
|
cs.BreakJumps = append(cs.BreakJumps, jumpOffset)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cs *CompilerState) EmitContinue() {
|
||||||
|
if cs.LoopStart != -1 {
|
||||||
|
jumpOffset := cs.EmitJump(OpJump)
|
||||||
|
cs.ContinueJumps = append(cs.ContinueJumps, jumpOffset)
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user