compiler start

This commit is contained in:
Sky Johnson 2025-06-11 17:12:33 -05:00
parent 30e4b11a96
commit 53cdb95b6e
2 changed files with 524 additions and 0 deletions

234
compiler/bytecode.go Normal file
View File

@ -0,0 +1,234 @@
package compiler
// Opcode represents a single bytecode instruction
type Opcode uint8
const (
// Stack Operations
OpLoadConst Opcode = iota // Load constant onto stack [idx]
OpLoadLocal // Load local variable [slot]
OpStoreLocal // Store top of stack to local [slot]
OpLoadGlobal // Load global variable [idx]
OpStoreGlobal // Store top of stack to global [idx]
OpPop // Pop top value from stack
OpDup // Duplicate top value on stack
// Arithmetic Operations
OpAdd // a + b
OpSub // a - b
OpMul // a * b
OpDiv // a / b
OpNeg // -a
OpMod // a % b
// Comparison Operations
OpEq // a == b
OpNeq // a != b
OpLt // a < b
OpLte // a <= b
OpGt // a > b
OpGte // a >= b
// Logical Operations
OpNot // not a
OpAnd // a and b
OpOr // a or b
// Control Flow
OpJump // Unconditional jump [offset]
OpJumpIfTrue // Jump if top of stack is true [offset]
OpJumpIfFalse // Jump if top of stack is false [offset]
OpCall // Call function [argCount]
OpReturn // Return from function
OpReturnNil // Return nil from function
// Table Operations
OpNewTable // Create new empty table
OpGetIndex // table[key] -> value
OpSetIndex // table[key] = value
OpGetField // table.field -> value [fieldIdx]
OpSetField // table.field = value [fieldIdx]
OpTableInsert // Insert value into table at next index
// Struct Operations
OpNewStruct // Create new struct instance [structId]
OpGetProperty // struct.field -> value [fieldIdx]
OpSetProperty // struct.field = value [fieldIdx]
OpCallMethod // Call method on struct [methodIdx, argCount]
// Function Operations
OpClosure // Create closure from function [funcIdx, upvalueCount]
OpGetUpvalue // Get upvalue [idx]
OpSetUpvalue // Set upvalue [idx]
OpCloseUpvalue // Close upvalue (move to heap)
// Array Operations
OpNewArray // Create new array with size [size]
OpArrayAppend // Append value to array
// Type Operations
OpGetType // Get type of value on stack
OpCast // Cast value to type [typeId]
// I/O Operations
OpEcho // Echo value to output
OpExit // Exit with code
// Special Operations
OpNoop // No operation
OpBreak // Break from loop
OpContinue // Continue loop iteration
// Debug Operations
OpDebugPrint // Debug print stack top
OpDebugStack // Debug print entire stack
)
// Instruction represents a single bytecode instruction with operands
type Instruction struct {
Op Opcode
Operands []uint16 // Variable length operands
}
// Chunk represents a compiled chunk of bytecode
type Chunk struct {
Code []uint8 // Raw bytecode stream
Constants []Value // Constant pool
Lines []int // Line numbers for debugging
Functions []Function // Function definitions
Structs []Struct // Struct definitions
}
// Value represents a runtime value in the VM
type Value struct {
Type ValueType
Data any // Actual value data
}
// ValueType represents the type of a runtime value
type ValueType uint8
const (
ValueNil ValueType = iota
ValueBool
ValueNumber
ValueString
ValueTable
ValueFunction
ValueStruct
ValueArray
ValueUpvalue
)
// Function represents a compiled function
type Function struct {
Name string // Function name (empty for anonymous)
Arity int // Number of parameters
Variadic bool // Whether function accepts variable args
LocalCount int // Number of local variable slots
UpvalCount int // Number of upvalues
Chunk Chunk // Function bytecode
Defaults []Value // Default parameter values
}
// Struct represents a compiled struct definition
type Struct struct {
Name string // Struct name
Fields []StructField // Field definitions
Methods map[string]uint16 // Method name -> function index
ID uint16 // Unique struct identifier
}
// StructField represents a field in a struct
type StructField struct {
Name string // Field name
Type ValueType // Field type
Offset uint16 // Offset in struct layout
}
// Table represents a key-value table/map
type Table struct {
Array map[int]Value // Array part (integer keys)
Hash map[string]Value // Hash part (string keys)
Meta *Table // Metatable for operations
}
// Array represents a dynamic array
type Array struct {
Elements []Value // Array elements
Count int // Current element count
Capacity int // Current capacity
}
// StructInstance represents an instance of a struct
type StructInstance struct {
StructID uint16 // Reference to struct definition
Fields map[string]Value // Field values
}
// Upvalue represents a captured variable
type Upvalue struct {
Location *Value // Pointer to actual value location
Closed Value // Closed-over value (when moved to heap)
IsClosed bool // Whether upvalue has been closed
}
// Instruction encoding helpers
// EncodeInstruction encodes an instruction into bytecode
func EncodeInstruction(op Opcode, operands ...uint16) []uint8 {
bytes := []uint8{uint8(op)}
for _, operand := range operands {
bytes = append(bytes, uint8(operand&0xFF), uint8(operand>>8))
}
return bytes
}
// DecodeInstruction decodes bytecode into instruction
func DecodeInstruction(code []uint8, offset int) (Opcode, []uint16, int) {
if offset >= len(code) {
return OpNoop, nil, offset
}
op := Opcode(code[offset])
operands := []uint16{}
nextOffset := offset + 1
// Decode operands based on instruction type
operandCount := GetOperandCount(op)
for range operandCount {
if nextOffset+1 >= len(code) {
break
}
operand := uint16(code[nextOffset]) | (uint16(code[nextOffset+1]) << 8)
operands = append(operands, operand)
nextOffset += 2
}
return op, operands, nextOffset
}
// GetOperandCount returns the number of operands for an instruction
func GetOperandCount(op Opcode) int {
switch op {
case OpLoadConst, OpLoadLocal, OpStoreLocal, OpLoadGlobal, OpStoreGlobal:
return 1
case OpJump, OpJumpIfTrue, OpJumpIfFalse:
return 1
case OpCall, OpNewStruct, OpGetField, OpSetField, OpGetProperty, OpSetProperty:
return 1
case OpCallMethod:
return 2
case OpClosure:
return 2
case OpNewArray, OpCast:
return 1
default:
return 0
}
}
// Instruction size calculation
func InstructionSize(op Opcode) int {
return 1 + (GetOperandCount(op) * 2) // 1 byte opcode + 2 bytes per operand
}

290
compiler/compiler.go Normal file
View File

@ -0,0 +1,290 @@
package compiler
import "fmt"
// Constants for compiler limits
const (
MaxLocals = 256 // Maximum local variables per function
MaxUpvalues = 256 // Maximum upvalues per function
MaxConstants = 65536 // Maximum constants per chunk
)
// CompilerState holds state during compilation
type CompilerState struct {
Chunk *Chunk // Current chunk being compiled
Constants map[string]int // Constant pool index mapping
Functions []Function // Compiled functions
Structs []Struct // Compiled structs
Locals []Local // Local variable stack
Upvalues []UpvalueRef // Upvalue definitions
ScopeDepth int // Current scope nesting level
FunctionType FunctionType // Type of function being compiled
BreakJumps []int // Break jump addresses for loops
ContinueJumps []int // Continue jump addresses for loops
LoopStart int // Start of current loop for continue
LoopDepth int // Current loop nesting depth
}
// Local represents a local variable during compilation
type Local struct {
Name string // Variable name
Depth int // Scope depth where declared
IsCaptured bool // Whether variable is captured by closure
Slot int // Stack slot index
}
// UpvalueRef represents an upvalue reference during compilation
type UpvalueRef struct {
Index uint8 // Index in enclosing function's locals or upvalues
IsLocal bool // True if captures local, false if captures upvalue
}
// FunctionType represents the type of function being compiled
type FunctionType uint8
const (
FunctionTypeScript FunctionType = iota // Top-level script
FunctionTypeFunction // Regular function
FunctionTypeMethod // Struct method
)
// CompileError represents a compilation error with location information
type CompileError struct {
Message string
Line int
Column int
}
func (ce CompileError) Error() string {
return fmt.Sprintf("Compile error at line %d, column %d: %s", ce.Line, ce.Column, ce.Message)
}
// NewCompilerState creates a new compiler state for compilation
func NewCompilerState(functionType FunctionType) *CompilerState {
return &CompilerState{
Chunk: NewChunk(),
Constants: make(map[string]int),
Functions: make([]Function, 0),
Structs: make([]Struct, 0),
Locals: make([]Local, 0, MaxLocals),
Upvalues: make([]UpvalueRef, 0, MaxUpvalues),
ScopeDepth: 0,
FunctionType: functionType,
BreakJumps: make([]int, 0),
ContinueJumps: make([]int, 0),
LoopStart: -1,
LoopDepth: 0,
}
}
// NewChunk creates a new bytecode chunk
func NewChunk() *Chunk {
return &Chunk{
Code: make([]uint8, 0, 256),
Constants: make([]Value, 0, 64),
Lines: make([]int, 0, 256),
Functions: make([]Function, 0),
Structs: make([]Struct, 0),
}
}
// Scope management methods
func (cs *CompilerState) BeginScope() {
cs.ScopeDepth++
}
func (cs *CompilerState) EndScope() {
cs.ScopeDepth--
// Remove locals that go out of scope
for len(cs.Locals) > 0 && cs.Locals[len(cs.Locals)-1].Depth > cs.ScopeDepth {
local := cs.Locals[len(cs.Locals)-1]
if local.IsCaptured {
// Emit close upvalue instruction
cs.EmitByte(uint8(OpCloseUpvalue))
} else {
// Emit pop instruction
cs.EmitByte(uint8(OpPop))
}
cs.Locals = cs.Locals[:len(cs.Locals)-1]
}
}
// Local variable management
func (cs *CompilerState) AddLocal(name string) error {
if len(cs.Locals) >= MaxLocals {
return CompileError{
Message: "too many local variables in function",
}
}
local := Local{
Name: name,
Depth: -1, // Mark as uninitialized
IsCaptured: false,
Slot: len(cs.Locals),
}
cs.Locals = append(cs.Locals, local)
return nil
}
func (cs *CompilerState) MarkInitialized() {
if len(cs.Locals) > 0 {
cs.Locals[len(cs.Locals)-1].Depth = cs.ScopeDepth
}
}
func (cs *CompilerState) ResolveLocal(name string) int {
for i := len(cs.Locals) - 1; i >= 0; i-- {
local := &cs.Locals[i]
if local.Name == name {
if local.Depth == -1 {
// Variable used before initialization
return -2
}
return i
}
}
return -1
}
// Upvalue management
func (cs *CompilerState) AddUpvalue(index uint8, isLocal bool) int {
upvalueCount := len(cs.Upvalues)
// Check if upvalue already exists
for i := range upvalueCount {
upvalue := &cs.Upvalues[i]
if upvalue.Index == index && upvalue.IsLocal == isLocal {
return i
}
}
if upvalueCount >= MaxUpvalues {
return -1 // Too many upvalues
}
cs.Upvalues = append(cs.Upvalues, UpvalueRef{
Index: index,
IsLocal: isLocal,
})
return upvalueCount
}
// Constant pool management
func (cs *CompilerState) AddConstant(value Value) int {
// Check if constant already exists to avoid duplicates
key := cs.valueKey(value)
if index, exists := cs.Constants[key]; exists {
return index
}
if len(cs.Chunk.Constants) >= MaxConstants {
return -1 // Too many constants
}
index := len(cs.Chunk.Constants)
cs.Chunk.Constants = append(cs.Chunk.Constants, value)
cs.Constants[key] = index
return index
}
// Generate unique key for value in constant pool
func (cs *CompilerState) valueKey(value Value) string {
switch value.Type {
case ValueNil:
return "nil"
case ValueBool:
if value.Data.(bool) {
return "bool:true"
}
return "bool:false"
case ValueNumber:
return fmt.Sprintf("number:%g", value.Data.(float64))
case ValueString:
return fmt.Sprintf("string:%s", value.Data.(string))
default:
// For complex types, use memory address as fallback
return fmt.Sprintf("%T:%p", value.Data, value.Data)
}
}
// Bytecode emission methods
func (cs *CompilerState) EmitByte(byte uint8) {
cs.Chunk.Code = append(cs.Chunk.Code, byte)
cs.Chunk.Lines = append(cs.Chunk.Lines, 0) // Line will be set by caller
}
func (cs *CompilerState) EmitBytes(bytes ...uint8) {
for _, b := range bytes {
cs.EmitByte(b)
}
}
func (cs *CompilerState) EmitInstruction(op Opcode, operands ...uint16) {
bytes := EncodeInstruction(op, operands...)
cs.EmitBytes(bytes...)
}
func (cs *CompilerState) EmitJump(op Opcode) int {
cs.EmitByte(uint8(op))
cs.EmitByte(0xFF) // Placeholder
cs.EmitByte(0xFF) // Placeholder
return len(cs.Chunk.Code) - 2 // Return offset of jump address
}
func (cs *CompilerState) PatchJump(offset int) {
// Calculate jump distance
jump := len(cs.Chunk.Code) - offset - 2
if jump > 65535 {
// Jump too large - would need long jump instruction
return
}
cs.Chunk.Code[offset] = uint8(jump & 0xFF)
cs.Chunk.Code[offset+1] = uint8((jump >> 8) & 0xFF)
}
// Loop management
func (cs *CompilerState) EnterLoop() {
cs.LoopStart = len(cs.Chunk.Code)
cs.LoopDepth++
}
func (cs *CompilerState) ExitLoop() {
cs.LoopDepth--
if cs.LoopDepth == 0 {
cs.LoopStart = -1
}
// Patch break jumps
for _, jumpOffset := range cs.BreakJumps {
cs.PatchJump(jumpOffset)
}
cs.BreakJumps = cs.BreakJumps[:0]
// Patch continue jumps
for _, jumpOffset := range cs.ContinueJumps {
jump := cs.LoopStart - jumpOffset - 2
if jump < 65535 {
cs.Chunk.Code[jumpOffset] = uint8(jump & 0xFF)
cs.Chunk.Code[jumpOffset+1] = uint8((jump >> 8) & 0xFF)
}
}
cs.ContinueJumps = cs.ContinueJumps[:0]
}
func (cs *CompilerState) EmitBreak() {
jumpOffset := cs.EmitJump(OpJump)
cs.BreakJumps = append(cs.BreakJumps, jumpOffset)
}
func (cs *CompilerState) EmitContinue() {
if cs.LoopStart != -1 {
jumpOffset := cs.EmitJump(OpJump)
cs.ContinueJumps = append(cs.ContinueJumps, jumpOffset)
}
}