From cff5c0ae876b403afa1ea7c931bdf609963036df Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Tue, 6 May 2025 11:14:36 -0500 Subject: [PATCH] pass 1 --- compiler/compiler.go | 67 ++++++++++++++++++++++++ go.mod | 3 ++ lexer/lexer.go | 121 +++++++++++++++++++++++++++++++++++++++++++ main.go | 45 ++++++++++++++++ parser/ast.go | 69 ++++++++++++++++++++++++ parser/parser.go | 110 +++++++++++++++++++++++++++++++++++++++ types/types.go | 31 +++++++++++ vm/vm.go | 104 +++++++++++++++++++++++++++++++++++++ 8 files changed, 550 insertions(+) create mode 100644 compiler/compiler.go create mode 100644 go.mod create mode 100644 lexer/lexer.go create mode 100644 main.go create mode 100644 parser/ast.go create mode 100644 parser/parser.go create mode 100644 types/types.go create mode 100644 vm/vm.go diff --git a/compiler/compiler.go b/compiler/compiler.go new file mode 100644 index 0000000..eb7615c --- /dev/null +++ b/compiler/compiler.go @@ -0,0 +1,67 @@ +package compiler + +import ( + "git.sharkk.net/Sharkk/Mako/parser" + "git.sharkk.net/Sharkk/Mako/vm" +) + +// Compiler converts AST to bytecode +func Compile(program *parser.Program) *vm.Bytecode { + c := &compiler{ + constants: []any{}, + instructions: []vm.Instruction{}, + } + + for _, stmt := range program.Statements { + c.compileStatement(stmt) + } + + return &vm.Bytecode{ + Constants: c.constants, + Instructions: c.instructions, + } +} + +type compiler struct { + constants []any + instructions []vm.Instruction +} + +func (c *compiler) compileStatement(stmt parser.Statement) { + switch s := stmt.(type) { + case *parser.VariableStatement: + c.compileExpression(s.Value) + nameIndex := c.addConstant(s.Name.Value) + c.emit(vm.OpSetGlobal, nameIndex) + case *parser.EchoStatement: + c.compileExpression(s.Value) + c.emit(vm.OpEcho, 0) + } +} + +func (c *compiler) compileExpression(expr parser.Expression) { + switch e := expr.(type) { + case *parser.StringLiteral: + constIndex := c.addConstant(e.Value) + c.emit(vm.OpConstant, constIndex) + case *parser.NumberLiteral: + constIndex := c.addConstant(e.Value) + c.emit(vm.OpConstant, constIndex) + case *parser.Identifier: + nameIndex := c.addConstant(e.Value) + c.emit(vm.OpGetGlobal, nameIndex) + } +} + +func (c *compiler) addConstant(value any) int { + c.constants = append(c.constants, value) + return len(c.constants) - 1 +} + +func (c *compiler) emit(op vm.Opcode, operand int) { + instruction := vm.Instruction{ + Opcode: op, + Operand: operand, + } + c.instructions = append(c.instructions, instruction) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9130d44 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.sharkk.net/Sharkk/Mako + +go 1.24.1 diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..75382aa --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,121 @@ +package lexer + +type TokenType byte + +const ( + TokenEOF TokenType = iota + TokenIdentifier + TokenString + TokenNumber + TokenEqual + TokenEcho + TokenSemicolon +) + +type Token struct { + Type TokenType + Value string +} + +type Lexer struct { + input string + pos int + readPos int + ch byte +} + +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPos >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPos] + } + l.pos = l.readPos + l.readPos++ +} + +func (l *Lexer) NextToken() Token { + var tok Token + + l.skipWhitespace() + + switch l.ch { + case '=': + tok = Token{Type: TokenEqual, Value: "="} + case ';': + tok = Token{Type: TokenSemicolon, Value: ";"} + case '"': + tok = Token{Type: TokenString, Value: l.readString()} + return tok + case 0: + tok = Token{Type: TokenEOF, Value: ""} + default: + if isLetter(l.ch) { + tok.Value = l.readIdentifier() + if tok.Value == "echo" { + tok.Type = TokenEcho + } else { + tok.Type = TokenIdentifier + } + return tok + } else if isDigit(l.ch) { + tok.Type = TokenNumber + tok.Value = l.readNumber() + return tok + } else { + tok = Token{Type: TokenEOF, Value: ""} + } + } + + l.readChar() + return tok +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readIdentifier() string { + pos := l.pos + for isLetter(l.ch) || isDigit(l.ch) { + l.readChar() + } + return l.input[pos:l.pos] +} + +func (l *Lexer) readNumber() string { + pos := l.pos + for isDigit(l.ch) { + l.readChar() + } + return l.input[pos:l.pos] +} + +func (l *Lexer) readString() string { + pos := l.pos + 1 + for { + l.readChar() + if l.ch == '"' || l.ch == 0 { + break + } + } + str := l.input[pos:l.pos] + l.readChar() // Skip closing quote + return str +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..faad6c4 --- /dev/null +++ b/main.go @@ -0,0 +1,45 @@ +// File: cmd/main.go +package main + +import ( + "bufio" + "fmt" + "os" + + "git.sharkk.net/Sharkk/Mako/compiler" + "git.sharkk.net/Sharkk/Mako/lexer" + "git.sharkk.net/Sharkk/Mako/parser" + "git.sharkk.net/Sharkk/Mako/vm" +) + +func main() { + scanner := bufio.NewScanner(os.Stdin) + virtualMachine := vm.New() + + fmt.Println("LuaGo Interpreter (type 'exit' to quit)") + for { + fmt.Print(">> ") + if !scanner.Scan() { + break + } + + input := scanner.Text() + if input == "exit" { + break + } + + lex := lexer.New(input) + p := parser.New(lex) + program := p.ParseProgram() + + if len(p.Errors()) > 0 { + for _, err := range p.Errors() { + fmt.Printf("Error: %s\n", err) + } + continue + } + + bytecode := compiler.Compile(program) + virtualMachine.Run(bytecode) + } +} diff --git a/parser/ast.go b/parser/ast.go new file mode 100644 index 0000000..dd3780c --- /dev/null +++ b/parser/ast.go @@ -0,0 +1,69 @@ +package parser + +import "git.sharkk.net/Sharkk/Mako/lexer" + +type Node interface { + TokenLiteral() string +} + +type Statement interface { + Node + statementNode() +} + +type Expression interface { + Node + expressionNode() +} + +type Program struct { + Statements []Statement +} + +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } + return "" +} + +type VariableStatement struct { + Token lexer.Token + Name *Identifier + Value Expression +} + +func (vs *VariableStatement) statementNode() {} +func (vs *VariableStatement) TokenLiteral() string { return vs.Token.Value } + +type EchoStatement struct { + Token lexer.Token + Value Expression +} + +func (es *EchoStatement) statementNode() {} +func (es *EchoStatement) TokenLiteral() string { return es.Token.Value } + +type Identifier struct { + Token lexer.Token + Value string +} + +func (i *Identifier) expressionNode() {} +func (i *Identifier) TokenLiteral() string { return i.Token.Value } + +type StringLiteral struct { + Token lexer.Token + Value string +} + +func (sl *StringLiteral) expressionNode() {} +func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Value } + +type NumberLiteral struct { + Token lexer.Token + Value float64 +} + +func (nl *NumberLiteral) expressionNode() {} +func (nl *NumberLiteral) TokenLiteral() string { return nl.Token.Value } diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..459f9bd --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,110 @@ +package parser + +import ( + "fmt" + "strconv" + + "git.sharkk.net/Sharkk/Mako/lexer" +) + +type Parser struct { + l *lexer.Lexer + curToken lexer.Token + peekToken lexer.Token + errors []string +} + +func New(l *lexer.Lexer) *Parser { + p := &Parser{l: l, errors: []string{}} + p.nextToken() + p.nextToken() + return p +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() +} + +func (p *Parser) Errors() []string { + return p.errors +} + +func (p *Parser) ParseProgram() *Program { + program := &Program{Statements: []Statement{}} + + for p.curToken.Type != lexer.TokenEOF { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + p.nextToken() + } + + return program +} + +func (p *Parser) parseStatement() Statement { + switch p.curToken.Type { + case lexer.TokenIdentifier: + if p.peekToken.Type == lexer.TokenEqual { + return p.parseVariableStatement() + } + case lexer.TokenEcho: + return p.parseEchoStatement() + } + return nil +} + +func (p *Parser) parseVariableStatement() *VariableStatement { + stmt := &VariableStatement{Token: p.curToken} + + stmt.Name = &Identifier{Token: p.curToken, Value: p.curToken.Value} + + p.nextToken() // Skip identifier + p.nextToken() // Skip = + + switch p.curToken.Type { + case lexer.TokenString: + stmt.Value = &StringLiteral{Token: p.curToken, Value: p.curToken.Value} + case lexer.TokenNumber: + num, err := strconv.ParseFloat(p.curToken.Value, 64) + if err != nil { + p.errors = append(p.errors, fmt.Sprintf("could not parse %q as float", p.curToken.Value)) + } + stmt.Value = &NumberLiteral{Token: p.curToken, Value: num} + case lexer.TokenIdentifier: + stmt.Value = &Identifier{Token: p.curToken, Value: p.curToken.Value} + } + + if p.peekToken.Type == lexer.TokenSemicolon { + p.nextToken() + } + + return stmt +} + +func (p *Parser) parseEchoStatement() *EchoStatement { + stmt := &EchoStatement{Token: p.curToken} + + p.nextToken() + + switch p.curToken.Type { + case lexer.TokenString: + stmt.Value = &StringLiteral{Token: p.curToken, Value: p.curToken.Value} + case lexer.TokenNumber: + num, err := strconv.ParseFloat(p.curToken.Value, 64) + if err != nil { + p.errors = append(p.errors, fmt.Sprintf("could not parse %q as float", p.curToken.Value)) + } + stmt.Value = &NumberLiteral{Token: p.curToken, Value: num} + case lexer.TokenIdentifier: + stmt.Value = &Identifier{Token: p.curToken, Value: p.curToken.Value} + } + + if p.peekToken.Type == lexer.TokenSemicolon { + p.nextToken() + } + + return stmt +} diff --git a/types/types.go b/types/types.go new file mode 100644 index 0000000..812d015 --- /dev/null +++ b/types/types.go @@ -0,0 +1,31 @@ +package types + +type ValueType byte + +const ( + TypeNull ValueType = iota + TypeNumber + TypeString + TypeBoolean +) + +type Value struct { + Type ValueType + Data any +} + +func NewString(s string) Value { + return Value{Type: TypeString, Data: s} +} + +func NewNumber(n float64) Value { + return Value{Type: TypeNumber, Data: n} +} + +func NewBoolean(b bool) Value { + return Value{Type: TypeBoolean, Data: b} +} + +func NewNull() Value { + return Value{Type: TypeNull, Data: nil} +} diff --git a/vm/vm.go b/vm/vm.go new file mode 100644 index 0000000..2ffe12c --- /dev/null +++ b/vm/vm.go @@ -0,0 +1,104 @@ +package vm + +import ( + "fmt" + + "git.sharkk.net/Sharkk/Mako/types" +) + +type Opcode byte + +const ( + OpConstant Opcode = iota + OpSetGlobal + OpGetGlobal + OpEcho +) + +type Instruction struct { + Opcode Opcode + Operand int +} + +type Bytecode struct { + Constants []any + Instructions []Instruction +} + +type VM struct { + constants []any + globals map[string]types.Value + stack []types.Value + sp int // Stack pointer +} + +func New() *VM { + return &VM{ + globals: make(map[string]types.Value), + stack: make([]types.Value, 1024), // Fixed stack size for now + sp: 0, + } +} + +func (vm *VM) Run(bytecode *Bytecode) { + vm.constants = bytecode.Constants + + for ip := 0; ip < len(bytecode.Instructions); ip++ { + instruction := bytecode.Instructions[ip] + + switch instruction.Opcode { + case OpConstant: + // Push constant to stack + constIndex := instruction.Operand + constant := vm.constants[constIndex] + + switch v := constant.(type) { + case string: + vm.push(types.NewString(v)) + case float64: + vm.push(types.NewNumber(v)) + } + + case OpSetGlobal: + // Set global variable + constIndex := instruction.Operand + name := vm.constants[constIndex].(string) + value := vm.pop() + vm.globals[name] = value + + case OpGetGlobal: + // Get global variable + constIndex := instruction.Operand + name := vm.constants[constIndex].(string) + if val, ok := vm.globals[name]; ok { + vm.push(val) + } else { + vm.push(types.NewNull()) + } + + case OpEcho: + // Print value + value := vm.pop() + switch value.Type { + case types.TypeString: + fmt.Println(value.Data.(string)) + case types.TypeNumber: + fmt.Println(value.Data.(float64)) + case types.TypeBoolean: + fmt.Println(value.Data.(bool)) + case types.TypeNull: + fmt.Println("null") + } + } + } +} + +func (vm *VM) push(value types.Value) { + vm.stack[vm.sp] = value + vm.sp++ +} + +func (vm *VM) pop() types.Value { + vm.sp-- + return vm.stack[vm.sp] +}