compiler start
This commit is contained in:
parent
30e4b11a96
commit
53cdb95b6e
234
compiler/bytecode.go
Normal file
234
compiler/bytecode.go
Normal file
@ -0,0 +1,234 @@
|
||||
package compiler
|
||||
|
||||
// Opcode represents a single bytecode instruction
|
||||
type Opcode uint8
|
||||
|
||||
const (
|
||||
// Stack Operations
|
||||
OpLoadConst Opcode = iota // Load constant onto stack [idx]
|
||||
OpLoadLocal // Load local variable [slot]
|
||||
OpStoreLocal // Store top of stack to local [slot]
|
||||
OpLoadGlobal // Load global variable [idx]
|
||||
OpStoreGlobal // Store top of stack to global [idx]
|
||||
OpPop // Pop top value from stack
|
||||
OpDup // Duplicate top value on stack
|
||||
|
||||
// Arithmetic Operations
|
||||
OpAdd // a + b
|
||||
OpSub // a - b
|
||||
OpMul // a * b
|
||||
OpDiv // a / b
|
||||
OpNeg // -a
|
||||
OpMod // a % b
|
||||
|
||||
// Comparison Operations
|
||||
OpEq // a == b
|
||||
OpNeq // a != b
|
||||
OpLt // a < b
|
||||
OpLte // a <= b
|
||||
OpGt // a > b
|
||||
OpGte // a >= b
|
||||
|
||||
// Logical Operations
|
||||
OpNot // not a
|
||||
OpAnd // a and b
|
||||
OpOr // a or b
|
||||
|
||||
// Control Flow
|
||||
OpJump // Unconditional jump [offset]
|
||||
OpJumpIfTrue // Jump if top of stack is true [offset]
|
||||
OpJumpIfFalse // Jump if top of stack is false [offset]
|
||||
OpCall // Call function [argCount]
|
||||
OpReturn // Return from function
|
||||
OpReturnNil // Return nil from function
|
||||
|
||||
// Table Operations
|
||||
OpNewTable // Create new empty table
|
||||
OpGetIndex // table[key] -> value
|
||||
OpSetIndex // table[key] = value
|
||||
OpGetField // table.field -> value [fieldIdx]
|
||||
OpSetField // table.field = value [fieldIdx]
|
||||
OpTableInsert // Insert value into table at next index
|
||||
|
||||
// Struct Operations
|
||||
OpNewStruct // Create new struct instance [structId]
|
||||
OpGetProperty // struct.field -> value [fieldIdx]
|
||||
OpSetProperty // struct.field = value [fieldIdx]
|
||||
OpCallMethod // Call method on struct [methodIdx, argCount]
|
||||
|
||||
// Function Operations
|
||||
OpClosure // Create closure from function [funcIdx, upvalueCount]
|
||||
OpGetUpvalue // Get upvalue [idx]
|
||||
OpSetUpvalue // Set upvalue [idx]
|
||||
OpCloseUpvalue // Close upvalue (move to heap)
|
||||
|
||||
// Array Operations
|
||||
OpNewArray // Create new array with size [size]
|
||||
OpArrayAppend // Append value to array
|
||||
|
||||
// Type Operations
|
||||
OpGetType // Get type of value on stack
|
||||
OpCast // Cast value to type [typeId]
|
||||
|
||||
// I/O Operations
|
||||
OpEcho // Echo value to output
|
||||
OpExit // Exit with code
|
||||
|
||||
// Special Operations
|
||||
OpNoop // No operation
|
||||
OpBreak // Break from loop
|
||||
OpContinue // Continue loop iteration
|
||||
|
||||
// Debug Operations
|
||||
OpDebugPrint // Debug print stack top
|
||||
OpDebugStack // Debug print entire stack
|
||||
)
|
||||
|
||||
// Instruction represents a single bytecode instruction with operands
|
||||
type Instruction struct {
|
||||
Op Opcode
|
||||
Operands []uint16 // Variable length operands
|
||||
}
|
||||
|
||||
// Chunk represents a compiled chunk of bytecode
|
||||
type Chunk struct {
|
||||
Code []uint8 // Raw bytecode stream
|
||||
Constants []Value // Constant pool
|
||||
Lines []int // Line numbers for debugging
|
||||
Functions []Function // Function definitions
|
||||
Structs []Struct // Struct definitions
|
||||
}
|
||||
|
||||
// Value represents a runtime value in the VM
|
||||
type Value struct {
|
||||
Type ValueType
|
||||
Data any // Actual value data
|
||||
}
|
||||
|
||||
// ValueType represents the type of a runtime value
|
||||
type ValueType uint8
|
||||
|
||||
const (
|
||||
ValueNil ValueType = iota
|
||||
ValueBool
|
||||
ValueNumber
|
||||
ValueString
|
||||
ValueTable
|
||||
ValueFunction
|
||||
ValueStruct
|
||||
ValueArray
|
||||
ValueUpvalue
|
||||
)
|
||||
|
||||
// Function represents a compiled function
|
||||
type Function struct {
|
||||
Name string // Function name (empty for anonymous)
|
||||
Arity int // Number of parameters
|
||||
Variadic bool // Whether function accepts variable args
|
||||
LocalCount int // Number of local variable slots
|
||||
UpvalCount int // Number of upvalues
|
||||
Chunk Chunk // Function bytecode
|
||||
Defaults []Value // Default parameter values
|
||||
}
|
||||
|
||||
// Struct represents a compiled struct definition
|
||||
type Struct struct {
|
||||
Name string // Struct name
|
||||
Fields []StructField // Field definitions
|
||||
Methods map[string]uint16 // Method name -> function index
|
||||
ID uint16 // Unique struct identifier
|
||||
}
|
||||
|
||||
// StructField represents a field in a struct
|
||||
type StructField struct {
|
||||
Name string // Field name
|
||||
Type ValueType // Field type
|
||||
Offset uint16 // Offset in struct layout
|
||||
}
|
||||
|
||||
// Table represents a key-value table/map
|
||||
type Table struct {
|
||||
Array map[int]Value // Array part (integer keys)
|
||||
Hash map[string]Value // Hash part (string keys)
|
||||
Meta *Table // Metatable for operations
|
||||
}
|
||||
|
||||
// Array represents a dynamic array
|
||||
type Array struct {
|
||||
Elements []Value // Array elements
|
||||
Count int // Current element count
|
||||
Capacity int // Current capacity
|
||||
}
|
||||
|
||||
// StructInstance represents an instance of a struct
|
||||
type StructInstance struct {
|
||||
StructID uint16 // Reference to struct definition
|
||||
Fields map[string]Value // Field values
|
||||
}
|
||||
|
||||
// Upvalue represents a captured variable
|
||||
type Upvalue struct {
|
||||
Location *Value // Pointer to actual value location
|
||||
Closed Value // Closed-over value (when moved to heap)
|
||||
IsClosed bool // Whether upvalue has been closed
|
||||
}
|
||||
|
||||
// Instruction encoding helpers
|
||||
|
||||
// EncodeInstruction encodes an instruction into bytecode
|
||||
func EncodeInstruction(op Opcode, operands ...uint16) []uint8 {
|
||||
bytes := []uint8{uint8(op)}
|
||||
for _, operand := range operands {
|
||||
bytes = append(bytes, uint8(operand&0xFF), uint8(operand>>8))
|
||||
}
|
||||
return bytes
|
||||
}
|
||||
|
||||
// DecodeInstruction decodes bytecode into instruction
|
||||
func DecodeInstruction(code []uint8, offset int) (Opcode, []uint16, int) {
|
||||
if offset >= len(code) {
|
||||
return OpNoop, nil, offset
|
||||
}
|
||||
|
||||
op := Opcode(code[offset])
|
||||
operands := []uint16{}
|
||||
nextOffset := offset + 1
|
||||
|
||||
// Decode operands based on instruction type
|
||||
operandCount := GetOperandCount(op)
|
||||
for range operandCount {
|
||||
if nextOffset+1 >= len(code) {
|
||||
break
|
||||
}
|
||||
operand := uint16(code[nextOffset]) | (uint16(code[nextOffset+1]) << 8)
|
||||
operands = append(operands, operand)
|
||||
nextOffset += 2
|
||||
}
|
||||
|
||||
return op, operands, nextOffset
|
||||
}
|
||||
|
||||
// GetOperandCount returns the number of operands for an instruction
|
||||
func GetOperandCount(op Opcode) int {
|
||||
switch op {
|
||||
case OpLoadConst, OpLoadLocal, OpStoreLocal, OpLoadGlobal, OpStoreGlobal:
|
||||
return 1
|
||||
case OpJump, OpJumpIfTrue, OpJumpIfFalse:
|
||||
return 1
|
||||
case OpCall, OpNewStruct, OpGetField, OpSetField, OpGetProperty, OpSetProperty:
|
||||
return 1
|
||||
case OpCallMethod:
|
||||
return 2
|
||||
case OpClosure:
|
||||
return 2
|
||||
case OpNewArray, OpCast:
|
||||
return 1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Instruction size calculation
|
||||
func InstructionSize(op Opcode) int {
|
||||
return 1 + (GetOperandCount(op) * 2) // 1 byte opcode + 2 bytes per operand
|
||||
}
|
290
compiler/compiler.go
Normal file
290
compiler/compiler.go
Normal file
@ -0,0 +1,290 @@
|
||||
package compiler
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Constants for compiler limits
|
||||
const (
|
||||
MaxLocals = 256 // Maximum local variables per function
|
||||
MaxUpvalues = 256 // Maximum upvalues per function
|
||||
MaxConstants = 65536 // Maximum constants per chunk
|
||||
)
|
||||
|
||||
// CompilerState holds state during compilation
|
||||
type CompilerState struct {
|
||||
Chunk *Chunk // Current chunk being compiled
|
||||
Constants map[string]int // Constant pool index mapping
|
||||
Functions []Function // Compiled functions
|
||||
Structs []Struct // Compiled structs
|
||||
Locals []Local // Local variable stack
|
||||
Upvalues []UpvalueRef // Upvalue definitions
|
||||
ScopeDepth int // Current scope nesting level
|
||||
FunctionType FunctionType // Type of function being compiled
|
||||
BreakJumps []int // Break jump addresses for loops
|
||||
ContinueJumps []int // Continue jump addresses for loops
|
||||
LoopStart int // Start of current loop for continue
|
||||
LoopDepth int // Current loop nesting depth
|
||||
}
|
||||
|
||||
// Local represents a local variable during compilation
|
||||
type Local struct {
|
||||
Name string // Variable name
|
||||
Depth int // Scope depth where declared
|
||||
IsCaptured bool // Whether variable is captured by closure
|
||||
Slot int // Stack slot index
|
||||
}
|
||||
|
||||
// UpvalueRef represents an upvalue reference during compilation
|
||||
type UpvalueRef struct {
|
||||
Index uint8 // Index in enclosing function's locals or upvalues
|
||||
IsLocal bool // True if captures local, false if captures upvalue
|
||||
}
|
||||
|
||||
// FunctionType represents the type of function being compiled
|
||||
type FunctionType uint8
|
||||
|
||||
const (
|
||||
FunctionTypeScript FunctionType = iota // Top-level script
|
||||
FunctionTypeFunction // Regular function
|
||||
FunctionTypeMethod // Struct method
|
||||
)
|
||||
|
||||
// CompileError represents a compilation error with location information
|
||||
type CompileError struct {
|
||||
Message string
|
||||
Line int
|
||||
Column int
|
||||
}
|
||||
|
||||
func (ce CompileError) Error() string {
|
||||
return fmt.Sprintf("Compile error at line %d, column %d: %s", ce.Line, ce.Column, ce.Message)
|
||||
}
|
||||
|
||||
// NewCompilerState creates a new compiler state for compilation
|
||||
func NewCompilerState(functionType FunctionType) *CompilerState {
|
||||
return &CompilerState{
|
||||
Chunk: NewChunk(),
|
||||
Constants: make(map[string]int),
|
||||
Functions: make([]Function, 0),
|
||||
Structs: make([]Struct, 0),
|
||||
Locals: make([]Local, 0, MaxLocals),
|
||||
Upvalues: make([]UpvalueRef, 0, MaxUpvalues),
|
||||
ScopeDepth: 0,
|
||||
FunctionType: functionType,
|
||||
BreakJumps: make([]int, 0),
|
||||
ContinueJumps: make([]int, 0),
|
||||
LoopStart: -1,
|
||||
LoopDepth: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// NewChunk creates a new bytecode chunk
|
||||
func NewChunk() *Chunk {
|
||||
return &Chunk{
|
||||
Code: make([]uint8, 0, 256),
|
||||
Constants: make([]Value, 0, 64),
|
||||
Lines: make([]int, 0, 256),
|
||||
Functions: make([]Function, 0),
|
||||
Structs: make([]Struct, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Scope management methods
|
||||
func (cs *CompilerState) BeginScope() {
|
||||
cs.ScopeDepth++
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EndScope() {
|
||||
cs.ScopeDepth--
|
||||
|
||||
// Remove locals that go out of scope
|
||||
for len(cs.Locals) > 0 && cs.Locals[len(cs.Locals)-1].Depth > cs.ScopeDepth {
|
||||
local := cs.Locals[len(cs.Locals)-1]
|
||||
if local.IsCaptured {
|
||||
// Emit close upvalue instruction
|
||||
cs.EmitByte(uint8(OpCloseUpvalue))
|
||||
} else {
|
||||
// Emit pop instruction
|
||||
cs.EmitByte(uint8(OpPop))
|
||||
}
|
||||
cs.Locals = cs.Locals[:len(cs.Locals)-1]
|
||||
}
|
||||
}
|
||||
|
||||
// Local variable management
|
||||
func (cs *CompilerState) AddLocal(name string) error {
|
||||
if len(cs.Locals) >= MaxLocals {
|
||||
return CompileError{
|
||||
Message: "too many local variables in function",
|
||||
}
|
||||
}
|
||||
|
||||
local := Local{
|
||||
Name: name,
|
||||
Depth: -1, // Mark as uninitialized
|
||||
IsCaptured: false,
|
||||
Slot: len(cs.Locals),
|
||||
}
|
||||
|
||||
cs.Locals = append(cs.Locals, local)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cs *CompilerState) MarkInitialized() {
|
||||
if len(cs.Locals) > 0 {
|
||||
cs.Locals[len(cs.Locals)-1].Depth = cs.ScopeDepth
|
||||
}
|
||||
}
|
||||
|
||||
func (cs *CompilerState) ResolveLocal(name string) int {
|
||||
for i := len(cs.Locals) - 1; i >= 0; i-- {
|
||||
local := &cs.Locals[i]
|
||||
if local.Name == name {
|
||||
if local.Depth == -1 {
|
||||
// Variable used before initialization
|
||||
return -2
|
||||
}
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// Upvalue management
|
||||
func (cs *CompilerState) AddUpvalue(index uint8, isLocal bool) int {
|
||||
upvalueCount := len(cs.Upvalues)
|
||||
|
||||
// Check if upvalue already exists
|
||||
for i := range upvalueCount {
|
||||
upvalue := &cs.Upvalues[i]
|
||||
if upvalue.Index == index && upvalue.IsLocal == isLocal {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
if upvalueCount >= MaxUpvalues {
|
||||
return -1 // Too many upvalues
|
||||
}
|
||||
|
||||
cs.Upvalues = append(cs.Upvalues, UpvalueRef{
|
||||
Index: index,
|
||||
IsLocal: isLocal,
|
||||
})
|
||||
|
||||
return upvalueCount
|
||||
}
|
||||
|
||||
// Constant pool management
|
||||
func (cs *CompilerState) AddConstant(value Value) int {
|
||||
// Check if constant already exists to avoid duplicates
|
||||
key := cs.valueKey(value)
|
||||
if index, exists := cs.Constants[key]; exists {
|
||||
return index
|
||||
}
|
||||
|
||||
if len(cs.Chunk.Constants) >= MaxConstants {
|
||||
return -1 // Too many constants
|
||||
}
|
||||
|
||||
index := len(cs.Chunk.Constants)
|
||||
cs.Chunk.Constants = append(cs.Chunk.Constants, value)
|
||||
cs.Constants[key] = index
|
||||
return index
|
||||
}
|
||||
|
||||
// Generate unique key for value in constant pool
|
||||
func (cs *CompilerState) valueKey(value Value) string {
|
||||
switch value.Type {
|
||||
case ValueNil:
|
||||
return "nil"
|
||||
case ValueBool:
|
||||
if value.Data.(bool) {
|
||||
return "bool:true"
|
||||
}
|
||||
return "bool:false"
|
||||
case ValueNumber:
|
||||
return fmt.Sprintf("number:%g", value.Data.(float64))
|
||||
case ValueString:
|
||||
return fmt.Sprintf("string:%s", value.Data.(string))
|
||||
default:
|
||||
// For complex types, use memory address as fallback
|
||||
return fmt.Sprintf("%T:%p", value.Data, value.Data)
|
||||
}
|
||||
}
|
||||
|
||||
// Bytecode emission methods
|
||||
func (cs *CompilerState) EmitByte(byte uint8) {
|
||||
cs.Chunk.Code = append(cs.Chunk.Code, byte)
|
||||
cs.Chunk.Lines = append(cs.Chunk.Lines, 0) // Line will be set by caller
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EmitBytes(bytes ...uint8) {
|
||||
for _, b := range bytes {
|
||||
cs.EmitByte(b)
|
||||
}
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EmitInstruction(op Opcode, operands ...uint16) {
|
||||
bytes := EncodeInstruction(op, operands...)
|
||||
cs.EmitBytes(bytes...)
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EmitJump(op Opcode) int {
|
||||
cs.EmitByte(uint8(op))
|
||||
cs.EmitByte(0xFF) // Placeholder
|
||||
cs.EmitByte(0xFF) // Placeholder
|
||||
return len(cs.Chunk.Code) - 2 // Return offset of jump address
|
||||
}
|
||||
|
||||
func (cs *CompilerState) PatchJump(offset int) {
|
||||
// Calculate jump distance
|
||||
jump := len(cs.Chunk.Code) - offset - 2
|
||||
|
||||
if jump > 65535 {
|
||||
// Jump too large - would need long jump instruction
|
||||
return
|
||||
}
|
||||
|
||||
cs.Chunk.Code[offset] = uint8(jump & 0xFF)
|
||||
cs.Chunk.Code[offset+1] = uint8((jump >> 8) & 0xFF)
|
||||
}
|
||||
|
||||
// Loop management
|
||||
func (cs *CompilerState) EnterLoop() {
|
||||
cs.LoopStart = len(cs.Chunk.Code)
|
||||
cs.LoopDepth++
|
||||
}
|
||||
|
||||
func (cs *CompilerState) ExitLoop() {
|
||||
cs.LoopDepth--
|
||||
if cs.LoopDepth == 0 {
|
||||
cs.LoopStart = -1
|
||||
}
|
||||
|
||||
// Patch break jumps
|
||||
for _, jumpOffset := range cs.BreakJumps {
|
||||
cs.PatchJump(jumpOffset)
|
||||
}
|
||||
cs.BreakJumps = cs.BreakJumps[:0]
|
||||
|
||||
// Patch continue jumps
|
||||
for _, jumpOffset := range cs.ContinueJumps {
|
||||
jump := cs.LoopStart - jumpOffset - 2
|
||||
if jump < 65535 {
|
||||
cs.Chunk.Code[jumpOffset] = uint8(jump & 0xFF)
|
||||
cs.Chunk.Code[jumpOffset+1] = uint8((jump >> 8) & 0xFF)
|
||||
}
|
||||
}
|
||||
cs.ContinueJumps = cs.ContinueJumps[:0]
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EmitBreak() {
|
||||
jumpOffset := cs.EmitJump(OpJump)
|
||||
cs.BreakJumps = append(cs.BreakJumps, jumpOffset)
|
||||
}
|
||||
|
||||
func (cs *CompilerState) EmitContinue() {
|
||||
if cs.LoopStart != -1 {
|
||||
jumpOffset := cs.EmitJump(OpJump)
|
||||
cs.ContinueJumps = append(cs.ContinueJumps, jumpOffset)
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user