From 80b121a9ab6d401095e1b46e5eb8496423db3f6f Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Sun, 8 Jun 2025 21:45:18 -0500 Subject: [PATCH] first commit --- .gitignore | 26 ++++++ README.md | 50 +++++++++++ go.mod | 3 + go.sum | 2 + mako.go | 33 +++++++ parser/ast.go | 80 +++++++++++++++++ parser/lexer.go | 183 +++++++++++++++++++++++++++++++++++++ parser/parser.go | 228 +++++++++++++++++++++++++++++++++++++++++++++++ parser/token.go | 69 ++++++++++++++ 9 files changed, 674 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 mako.go create mode 100644 parser/ast.go create mode 100644 parser/lexer.go create mode 100644 parser/parser.go create mode 100644 parser/token.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..51782d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +# ---> Go +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work +go.work.sum + +# env file +.env \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d83f2f --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# Mako + +Scripting language! + +``` +// C-style comments +/* + C-style multiline comments +*/ + +/* + The language's intent and design should mimic Lua as much as possible. + There is a global table that is accessible, but all variables are implicitly + local. +*/ +var = 2 // in the global scope +var2 = 4 // also in global scope +echo var + var2 // outputs 6 + +fn function_name(arg1, arg2) + var3 = "hi" // implicitly local to this block + var4 = var + var3 // var4 = "2hi" + var5 = "hello" + "world" + return var5 +end + +/* + Tables work like lua but are ordered hash maps in implementation, and coerced to arrays + if it makes sense to do so. Trailing comma is optional +*/ +var6 = { + table1 = "foo", + "table2" = 42, + 240 = anothertable +} +var6[lol] = "foo" + +if condition then + // do stuff +elseif condition2 then + // do stuff +else + // do stuff +end + +var7 = condition ? left : right + +for k, v in any_table do + // ordered hash map, so ipairs/pairs not necessary +end diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9130d44 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.sharkk.net/Sharkk/Mako + +go 1.24.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..115793b --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac h1:B6iLK3nv2ubDfk5Ve9Z2sRPqpTgPWgsm7PyaWlwr3NY= +git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM= diff --git a/mako.go b/mako.go new file mode 100644 index 0000000..9a8c40a --- /dev/null +++ b/mako.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + + "git.sharkk.net/Sharkk/Mako/parser" +) + +func main() { + input := ` + // This is a comment + name = "John" + age = 25 + /* Block comment + Multiple lines */ + result = age + 10 + ` + + lexer := parser.NewLexer(input) + parser := parser.NewParser(lexer) + program := parser.ParseProgram() + + if len(parser.Errors()) > 0 { + fmt.Println("Parse errors:") + for _, err := range parser.Errors() { + fmt.Printf(" %s\n", err) + } + return + } + + fmt.Println("AST:") + fmt.Print(program.String()) +} diff --git a/parser/ast.go b/parser/ast.go new file mode 100644 index 0000000..f1aa459 --- /dev/null +++ b/parser/ast.go @@ -0,0 +1,80 @@ +package parser + +import "fmt" + +// Node represents any node in the AST +type Node interface { + String() string +} + +// Statement represents statement nodes +type Statement interface { + Node + statementNode() +} + +// Expression represents expression nodes +type Expression interface { + Node + expressionNode() +} + +// Program represents the root of the AST +type Program struct { + Statements []Statement +} + +func (p *Program) String() string { + var result string + for _, stmt := range p.Statements { + result += stmt.String() + "\n" + } + return result +} + +// AssignStatement represents variable assignment +type AssignStatement struct { + Name *Identifier + Value Expression +} + +func (as *AssignStatement) statementNode() {} +func (as *AssignStatement) String() string { + return fmt.Sprintf("%s = %s", as.Name.String(), as.Value.String()) +} + +// Identifier represents identifiers +type Identifier struct { + Value string +} + +func (i *Identifier) expressionNode() {} +func (i *Identifier) String() string { return i.Value } + +// NumberLiteral represents numeric literals +type NumberLiteral struct { + Value float64 +} + +func (nl *NumberLiteral) expressionNode() {} +func (nl *NumberLiteral) String() string { return fmt.Sprintf("%.2f", nl.Value) } + +// StringLiteral represents string literals +type StringLiteral struct { + Value string +} + +func (sl *StringLiteral) expressionNode() {} +func (sl *StringLiteral) String() string { return fmt.Sprintf(`"%s"`, sl.Value) } + +// InfixExpression represents binary operations +type InfixExpression struct { + Left Expression + Operator string + Right Expression +} + +func (ie *InfixExpression) expressionNode() {} +func (ie *InfixExpression) String() string { + return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String()) +} diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..b94a30f --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,183 @@ +package parser + +// Lexer tokenizes input source code +type Lexer struct { + input string + position int + readPosition int + ch byte + line int + column int +} + +// NewLexer creates a new lexer instance +func NewLexer(input string) *Lexer { + l := &Lexer{ + input: input, + line: 1, + column: 0, + } + l.readChar() + return l +} + +// readChar reads the next character and advances position +func (l *Lexer) readChar() { + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + l.position = l.readPosition + l.readPosition++ + + if l.ch == '\n' { + l.line++ + l.column = 0 + } else { + l.column++ + } +} + +// peekChar returns the next character without advancing position +func (l *Lexer) peekChar() byte { + if l.readPosition >= len(l.input) { + return 0 + } + return l.input[l.readPosition] +} + +// skipWhitespace skips whitespace characters +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +// skipComment skips both line and block comments +func (l *Lexer) skipComment() { + if l.ch == '/' && l.peekChar() == '/' { + // Line comment + for l.ch != '\n' && l.ch != 0 { + l.readChar() + } + } else if l.ch == '/' && l.peekChar() == '*' { + // Block comment + l.readChar() // skip '/' + l.readChar() // skip '*' + + for { + if l.ch == 0 { + break + } + if l.ch == '*' && l.peekChar() == '/' { + l.readChar() // skip '*' + l.readChar() // skip '/' + break + } + l.readChar() + } + } +} + +// readIdentifier reads an identifier +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) || isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +// readNumber reads a number (including decimals) +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + + // Handle decimal points + if l.ch == '.' && isDigit(l.peekChar()) { + l.readChar() + for isDigit(l.ch) { + l.readChar() + } + } + + return l.input[position:l.position] +} + +// readString reads a string literal +func (l *Lexer) readString() string { + position := l.position + 1 + for { + l.readChar() + if l.ch == '"' || l.ch == 0 { + break + } + } + return l.input[position:l.position] +} + +// NextToken returns the next token from the input +func (l *Lexer) NextToken() Token { + var tok Token + + l.skipWhitespace() + + // Handle comments + if l.ch == '/' && (l.peekChar() == '/' || l.peekChar() == '*') { + l.skipComment() + l.skipWhitespace() + } + + tok.Line = l.line + tok.Column = l.column + + switch l.ch { + case '=': + tok = Token{Type: ASSIGN, Literal: string(l.ch), Line: l.line, Column: l.column} + case '+': + tok = Token{Type: PLUS, Literal: string(l.ch), Line: l.line, Column: l.column} + case '-': + tok = Token{Type: MINUS, Literal: string(l.ch), Line: l.line, Column: l.column} + case '*': + tok = Token{Type: STAR, Literal: string(l.ch), Line: l.line, Column: l.column} + case '/': + tok = Token{Type: SLASH, Literal: string(l.ch), Line: l.line, Column: l.column} + case '(': + tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} + case ')': + tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} + case '"': + tok.Type = STRING + tok.Literal = l.readString() + case 0: + tok.Literal = "" + tok.Type = EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = lookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = NUMBER + tok.Literal = l.readNumber() + return tok + } else { + tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column} + } + } + + l.readChar() + return tok +} + +// Helper functions +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..749172e --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,228 @@ +package parser + +import ( + "fmt" + "strconv" +) + +// Parser implements a recursive descent Pratt parser +type Parser struct { + lexer *Lexer + + curToken Token + peekToken Token + + prefixParseFns map[TokenType]func() Expression + infixParseFns map[TokenType]func(Expression) Expression + + errors []string +} + +// NewParser creates a new parser instance +func NewParser(lexer *Lexer) *Parser { + p := &Parser{ + lexer: lexer, + errors: []string{}, + } + + p.prefixParseFns = make(map[TokenType]func() Expression) + p.registerPrefix(IDENT, p.parseIdentifier) + p.registerPrefix(NUMBER, p.parseNumberLiteral) + p.registerPrefix(STRING, p.parseStringLiteral) + p.registerPrefix(LPAREN, p.parseGroupedExpression) + + p.infixParseFns = make(map[TokenType]func(Expression) Expression) + p.registerInfix(PLUS, p.parseInfixExpression) + p.registerInfix(MINUS, p.parseInfixExpression) + p.registerInfix(SLASH, p.parseInfixExpression) + p.registerInfix(STAR, p.parseInfixExpression) + + // Read two tokens, so curToken and peekToken are both set + p.nextToken() + p.nextToken() + + return p +} + +// registerPrefix registers a prefix parse function +func (p *Parser) registerPrefix(tokenType TokenType, fn func() Expression) { + p.prefixParseFns[tokenType] = fn +} + +// registerInfix registers an infix parse function +func (p *Parser) registerInfix(tokenType TokenType, fn func(Expression) Expression) { + p.infixParseFns[tokenType] = fn +} + +// nextToken advances to the next token +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.lexer.NextToken() +} + +// ParseProgram parses the entire program +func (p *Parser) ParseProgram() *Program { + program := &Program{} + program.Statements = []Statement{} + + for !p.curTokenIs(EOF) { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + p.nextToken() + } + + return program +} + +// parseStatement parses a statement +func (p *Parser) parseStatement() Statement { + if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) { + return p.parseAssignStatement() + } + + // Skip unknown statements for now + return nil +} + +// parseAssignStatement parses variable assignment +func (p *Parser) parseAssignStatement() *AssignStatement { + stmt := &AssignStatement{} + + if !p.curTokenIs(IDENT) { + return nil + } + + stmt.Name = &Identifier{Value: p.curToken.Literal} + + if !p.expectPeek(ASSIGN) { + return nil + } + + p.nextToken() + + stmt.Value = p.parseExpression(LOWEST) + + return stmt +} + +// parseExpression parses expressions using Pratt parsing +func (p *Parser) parseExpression(precedence Precedence) Expression { + prefix := p.prefixParseFns[p.curToken.Type] + if prefix == nil { + p.noPrefixParseFnError(p.curToken.Type) + return nil + } + + leftExp := prefix() + + for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() { + infix := p.infixParseFns[p.peekToken.Type] + if infix == nil { + return leftExp + } + + p.nextToken() + leftExp = infix(leftExp) + } + + return leftExp +} + +// Expression parsing functions +func (p *Parser) parseIdentifier() Expression { + return &Identifier{Value: p.curToken.Literal} +} + +func (p *Parser) parseNumberLiteral() Expression { + lit := &NumberLiteral{} + + value, err := strconv.ParseFloat(p.curToken.Literal, 64) + if err != nil { + msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal) + p.errors = append(p.errors, msg) + return nil + } + + lit.Value = value + return lit +} + +func (p *Parser) parseStringLiteral() Expression { + return &StringLiteral{Value: p.curToken.Literal} +} + +func (p *Parser) parseGroupedExpression() Expression { + p.nextToken() + + exp := p.parseExpression(LOWEST) + + if !p.expectPeek(RPAREN) { + return nil + } + + return exp +} + +func (p *Parser) parseInfixExpression(left Expression) Expression { + expression := &InfixExpression{ + Left: left, + Operator: p.curToken.Literal, + } + + precedence := p.curPrecedence() + p.nextToken() + expression.Right = p.parseExpression(precedence) + + return expression +} + +// Helper methods +func (p *Parser) curTokenIs(t TokenType) bool { + return p.curToken.Type == t +} + +func (p *Parser) peekTokenIs(t TokenType) bool { + return p.peekToken.Type == t +} + +func (p *Parser) expectPeek(t TokenType) bool { + if p.peekTokenIs(t) { + p.nextToken() + return true + } else { + p.peekError(t) + return false + } +} + +func (p *Parser) peekError(t TokenType) { + msg := fmt.Sprintf("expected next token to be %v, got %v instead", + t, p.peekToken.Type) + p.errors = append(p.errors, msg) +} + +func (p *Parser) noPrefixParseFnError(t TokenType) { + msg := fmt.Sprintf("no prefix parse function for %v found", t) + p.errors = append(p.errors, msg) +} + +func (p *Parser) peekPrecedence() Precedence { + if p, ok := precedences[p.peekToken.Type]; ok { + return p + } + return LOWEST +} + +func (p *Parser) curPrecedence() Precedence { + if p, ok := precedences[p.curToken.Type]; ok { + return p + } + return LOWEST +} + +// Errors returns all parsing errors +func (p *Parser) Errors() []string { + return p.errors +} diff --git a/parser/token.go b/parser/token.go new file mode 100644 index 0000000..dca0bd1 --- /dev/null +++ b/parser/token.go @@ -0,0 +1,69 @@ +package parser + +// TokenType represents the type of a token +type TokenType int + +const ( + // Literals + IDENT TokenType = iota + NUMBER + STRING + + // Operators + ASSIGN // = + PLUS // + + MINUS // - + STAR // * + SLASH // / + + // Delimiters + LPAREN // ( + RPAREN // ) + + // Keywords + VAR + + // Special + EOF + ILLEGAL +) + +// Token represents a single token +type Token struct { + Type TokenType + Literal string + Line int + Column int +} + +// Precedence levels for Pratt parsing +type Precedence int + +const ( + _ Precedence = iota + LOWEST + SUM // + + PRODUCT // * + PREFIX // -x, !x + CALL // function() +) + +// precedences maps token types to their precedence levels +var precedences = map[TokenType]Precedence{ + PLUS: SUM, + MINUS: SUM, + SLASH: PRODUCT, + STAR: PRODUCT, +} + +// lookupIdent checks if an identifier is a keyword +func lookupIdent(ident string) TokenType { + keywords := map[string]TokenType{ + "var": VAR, + } + + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}