first commit

2025-06-08 21:45:18 -05:00 · 2025-06-08 21:45:18 -05:00 · 80b121a9ab
commit 80b121a9ab
9 changed files with 674 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,26 @@
+# ---> Go
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+go.work.sum
+
+# env file
+.env
--- a/README.md
+++ b/README.md
@ -0,0 +1,50 @@
+# Mako
+
+Scripting language!
+
+```
+// C-style comments
+/*
+	C-style multiline comments
+*/
+
+/*
+	The language's intent and design should mimic Lua as much as possible.
+	There is a global table that is accessible, but all variables are implicitly
+	local.
+*/
+var = 2 // in the global scope
+var2 = 4 // also in global scope
+echo var + var2 // outputs 6
+
+fn function_name(arg1, arg2)
+	var3 = "hi" // implicitly local to this block
+	var4 = var + var3 // var4 = "2hi"
+	var5 = "hello" + "world"
+	return var5
+end
+
+/*
+	Tables work like lua but are ordered hash maps in implementation, and coerced to arrays
+	if it makes sense to do so. Trailing comma is optional
+*/
+var6 = {
+	table1 = "foo",
+	"table2" = 42,
+	240 = anothertable
+}
+var6[lol] = "foo"
+
+if condition then
+	// do stuff
+elseif condition2 then
+	// do stuff
+else
+	// do stuff
+end
+
+var7 = condition ? left : right
+
+for k, v in any_table do
+	// ordered hash map, so ipairs/pairs not necessary
+end
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
+module git.sharkk.net/Sharkk/Mako
+
+go 1.24.1
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,2 @@
+git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac h1:B6iLK3nv2ubDfk5Ve9Z2sRPqpTgPWgsm7PyaWlwr3NY=
+git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM=
--- a/mako.go
+++ b/mako.go
@ -0,0 +1,33 @@
+package main
+
+import (
+	"fmt"
+
+	"git.sharkk.net/Sharkk/Mako/parser"
+)
+
+func main() {
+	input := `
+	// This is a comment
+	name = "John"
+	age = 25
+	/* Block comment
+	   Multiple lines */
+	result = age + 10
+	`
+
+	lexer := parser.NewLexer(input)
+	parser := parser.NewParser(lexer)
+	program := parser.ParseProgram()
+
+	if len(parser.Errors()) > 0 {
+		fmt.Println("Parse errors:")
+		for _, err := range parser.Errors() {
+			fmt.Printf("  %s\n", err)
+		}
+		return
+	}
+
+	fmt.Println("AST:")
+	fmt.Print(program.String())
+}
--- a/parser/ast.go
+++ b/parser/ast.go
@ -0,0 +1,80 @@
+package parser
+
+import "fmt"
+
+// Node represents any node in the AST
+type Node interface {
+	String() string
+}
+
+// Statement represents statement nodes
+type Statement interface {
+	Node
+	statementNode()
+}
+
+// Expression represents expression nodes
+type Expression interface {
+	Node
+	expressionNode()
+}
+
+// Program represents the root of the AST
+type Program struct {
+	Statements []Statement
+}
+
+func (p *Program) String() string {
+	var result string
+	for _, stmt := range p.Statements {
+		result += stmt.String() + "\n"
+	}
+	return result
+}
+
+// AssignStatement represents variable assignment
+type AssignStatement struct {
+	Name  *Identifier
+	Value Expression
+}
+
+func (as *AssignStatement) statementNode() {}
+func (as *AssignStatement) String() string {
+	return fmt.Sprintf("%s = %s", as.Name.String(), as.Value.String())
+}
+
+// Identifier represents identifiers
+type Identifier struct {
+	Value string
+}
+
+func (i *Identifier) expressionNode() {}
+func (i *Identifier) String() string  { return i.Value }
+
+// NumberLiteral represents numeric literals
+type NumberLiteral struct {
+	Value float64
+}
+
+func (nl *NumberLiteral) expressionNode() {}
+func (nl *NumberLiteral) String() string  { return fmt.Sprintf("%.2f", nl.Value) }
+
+// StringLiteral represents string literals
+type StringLiteral struct {
+	Value string
+}
+
+func (sl *StringLiteral) expressionNode() {}
+func (sl *StringLiteral) String() string  { return fmt.Sprintf(`"%s"`, sl.Value) }
+
+// InfixExpression represents binary operations
+type InfixExpression struct {
+	Left     Expression
+	Operator string
+	Right    Expression
+}
+
+func (ie *InfixExpression) expressionNode() {}
+func (ie *InfixExpression) String() string {
+	return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String())
+}
--- a/parser/lexer.go
+++ b/parser/lexer.go
@ -0,0 +1,183 @@
+package parser
+
+// Lexer tokenizes input source code
+type Lexer struct {
+	input        string
+	position     int
+	readPosition int
+	ch           byte
+	line         int
+	column       int
+}
+
+// NewLexer creates a new lexer instance
+func NewLexer(input string) *Lexer {
+	l := &Lexer{
+		input:  input,
+		line:   1,
+		column: 0,
+	}
+	l.readChar()
+	return l
+}
+
+// readChar reads the next character and advances position
+func (l *Lexer) readChar() {
+	if l.readPosition >= len(l.input) {
+		l.ch = 0
+	} else {
+		l.ch = l.input[l.readPosition]
+	}
+	l.position = l.readPosition
+	l.readPosition++
+
+	if l.ch == '\n' {
+		l.line++
+		l.column = 0
+	} else {
+		l.column++
+	}
+}
+
+// peekChar returns the next character without advancing position
+func (l *Lexer) peekChar() byte {
+	if l.readPosition >= len(l.input) {
+		return 0
+	}
+	return l.input[l.readPosition]
+}
+
+// skipWhitespace skips whitespace characters
+func (l *Lexer) skipWhitespace() {
+	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
+		l.readChar()
+	}
+}
+
+// skipComment skips both line and block comments
+func (l *Lexer) skipComment() {
+	if l.ch == '/' && l.peekChar() == '/' {
+		// Line comment
+		for l.ch != '\n' && l.ch != 0 {
+			l.readChar()
+		}
+	} else if l.ch == '/' && l.peekChar() == '*' {
+		// Block comment
+		l.readChar() // skip '/'
+		l.readChar() // skip '*'
+
+		for {
+			if l.ch == 0 {
+				break
+			}
+			if l.ch == '*' && l.peekChar() == '/' {
+				l.readChar() // skip '*'
+				l.readChar() // skip '/'
+				break
+			}
+			l.readChar()
+		}
+	}
+}
+
+// readIdentifier reads an identifier
+func (l *Lexer) readIdentifier() string {
+	position := l.position
+	for isLetter(l.ch) || isDigit(l.ch) {
+		l.readChar()
+	}
+	return l.input[position:l.position]
+}
+
+// readNumber reads a number (including decimals)
+func (l *Lexer) readNumber() string {
+	position := l.position
+	for isDigit(l.ch) {
+		l.readChar()
+	}
+
+	// Handle decimal points
+	if l.ch == '.' && isDigit(l.peekChar()) {
+		l.readChar()
+		for isDigit(l.ch) {
+			l.readChar()
+		}
+	}
+
+	return l.input[position:l.position]
+}
+
+// readString reads a string literal
+func (l *Lexer) readString() string {
+	position := l.position + 1
+	for {
+		l.readChar()
+		if l.ch == '"' || l.ch == 0 {
+			break
+		}
+	}
+	return l.input[position:l.position]
+}
+
+// NextToken returns the next token from the input
+func (l *Lexer) NextToken() Token {
+	var tok Token
+
+	l.skipWhitespace()
+
+	// Handle comments
+	if l.ch == '/' && (l.peekChar() == '/' || l.peekChar() == '*') {
+		l.skipComment()
+		l.skipWhitespace()
+	}
+
+	tok.Line = l.line
+	tok.Column = l.column
+
+	switch l.ch {
+	case '=':
+		tok = Token{Type: ASSIGN, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '+':
+		tok = Token{Type: PLUS, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '-':
+		tok = Token{Type: MINUS, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '*':
+		tok = Token{Type: STAR, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '/':
+		tok = Token{Type: SLASH, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '(':
+		tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case ')':
+		tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column}
+	case '"':
+		tok.Type = STRING
+		tok.Literal = l.readString()
+	case 0:
+		tok.Literal = ""
+		tok.Type = EOF
+	default:
+		if isLetter(l.ch) {
+			tok.Literal = l.readIdentifier()
+			tok.Type = lookupIdent(tok.Literal)
+			return tok
+		} else if isDigit(l.ch) {
+			tok.Type = NUMBER
+			tok.Literal = l.readNumber()
+			return tok
+		} else {
+			tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column}
+		}
+	}
+
+	l.readChar()
+	return tok
+}
+
+// Helper functions
+func isLetter(ch byte) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
+}
+
+func isDigit(ch byte) bool {
+	return '0' <= ch && ch <= '9'
+}
--- a/parser/parser.go
+++ b/parser/parser.go
@ -0,0 +1,228 @@
+package parser
+
+import (
+	"fmt"
+	"strconv"
+)
+
+// Parser implements a recursive descent Pratt parser
+type Parser struct {
+	lexer *Lexer
+
+	curToken  Token
+	peekToken Token
+
+	prefixParseFns map[TokenType]func() Expression
+	infixParseFns  map[TokenType]func(Expression) Expression
+
+	errors []string
+}
+
+// NewParser creates a new parser instance
+func NewParser(lexer *Lexer) *Parser {
+	p := &Parser{
+		lexer:  lexer,
+		errors: []string{},
+	}
+
+	p.prefixParseFns = make(map[TokenType]func() Expression)
+	p.registerPrefix(IDENT, p.parseIdentifier)
+	p.registerPrefix(NUMBER, p.parseNumberLiteral)
+	p.registerPrefix(STRING, p.parseStringLiteral)
+	p.registerPrefix(LPAREN, p.parseGroupedExpression)
+
+	p.infixParseFns = make(map[TokenType]func(Expression) Expression)
+	p.registerInfix(PLUS, p.parseInfixExpression)
+	p.registerInfix(MINUS, p.parseInfixExpression)
+	p.registerInfix(SLASH, p.parseInfixExpression)
+	p.registerInfix(STAR, p.parseInfixExpression)
+
+	// Read two tokens, so curToken and peekToken are both set
+	p.nextToken()
+	p.nextToken()
+
+	return p
+}
+
+// registerPrefix registers a prefix parse function
+func (p *Parser) registerPrefix(tokenType TokenType, fn func() Expression) {
+	p.prefixParseFns[tokenType] = fn
+}
+
+// registerInfix registers an infix parse function
+func (p *Parser) registerInfix(tokenType TokenType, fn func(Expression) Expression) {
+	p.infixParseFns[tokenType] = fn
+}
+
+// nextToken advances to the next token
+func (p *Parser) nextToken() {
+	p.curToken = p.peekToken
+	p.peekToken = p.lexer.NextToken()
+}
+
+// ParseProgram parses the entire program
+func (p *Parser) ParseProgram() *Program {
+	program := &Program{}
+	program.Statements = []Statement{}
+
+	for !p.curTokenIs(EOF) {
+		stmt := p.parseStatement()
+		if stmt != nil {
+			program.Statements = append(program.Statements, stmt)
+		}
+		p.nextToken()
+	}
+
+	return program
+}
+
+// parseStatement parses a statement
+func (p *Parser) parseStatement() Statement {
+	if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) {
+		return p.parseAssignStatement()
+	}
+
+	// Skip unknown statements for now
+	return nil
+}
+
+// parseAssignStatement parses variable assignment
+func (p *Parser) parseAssignStatement() *AssignStatement {
+	stmt := &AssignStatement{}
+
+	if !p.curTokenIs(IDENT) {
+		return nil
+	}
+
+	stmt.Name = &Identifier{Value: p.curToken.Literal}
+
+	if !p.expectPeek(ASSIGN) {
+		return nil
+	}
+
+	p.nextToken()
+
+	stmt.Value = p.parseExpression(LOWEST)
+
+	return stmt
+}
+
+// parseExpression parses expressions using Pratt parsing
+func (p *Parser) parseExpression(precedence Precedence) Expression {
+	prefix := p.prefixParseFns[p.curToken.Type]
+	if prefix == nil {
+		p.noPrefixParseFnError(p.curToken.Type)
+		return nil
+	}
+
+	leftExp := prefix()
+
+	for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() {
+		infix := p.infixParseFns[p.peekToken.Type]
+		if infix == nil {
+			return leftExp
+		}
+
+		p.nextToken()
+		leftExp = infix(leftExp)
+	}
+
+	return leftExp
+}
+
+// Expression parsing functions
+func (p *Parser) parseIdentifier() Expression {
+	return &Identifier{Value: p.curToken.Literal}
+}
+
+func (p *Parser) parseNumberLiteral() Expression {
+	lit := &NumberLiteral{}
+
+	value, err := strconv.ParseFloat(p.curToken.Literal, 64)
+	if err != nil {
+		msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal)
+		p.errors = append(p.errors, msg)
+		return nil
+	}
+
+	lit.Value = value
+	return lit
+}
+
+func (p *Parser) parseStringLiteral() Expression {
+	return &StringLiteral{Value: p.curToken.Literal}
+}
+
+func (p *Parser) parseGroupedExpression() Expression {
+	p.nextToken()
+
+	exp := p.parseExpression(LOWEST)
+
+	if !p.expectPeek(RPAREN) {
+		return nil
+	}
+
+	return exp
+}
+
+func (p *Parser) parseInfixExpression(left Expression) Expression {
+	expression := &InfixExpression{
+		Left:     left,
+		Operator: p.curToken.Literal,
+	}
+
+	precedence := p.curPrecedence()
+	p.nextToken()
+	expression.Right = p.parseExpression(precedence)
+
+	return expression
+}
+
+// Helper methods
+func (p *Parser) curTokenIs(t TokenType) bool {
+	return p.curToken.Type == t
+}
+
+func (p *Parser) peekTokenIs(t TokenType) bool {
+	return p.peekToken.Type == t
+}
+
+func (p *Parser) expectPeek(t TokenType) bool {
+	if p.peekTokenIs(t) {
+		p.nextToken()
+		return true
+	} else {
+		p.peekError(t)
+		return false
+	}
+}
+
+func (p *Parser) peekError(t TokenType) {
+	msg := fmt.Sprintf("expected next token to be %v, got %v instead",
+		t, p.peekToken.Type)
+	p.errors = append(p.errors, msg)
+}
+
+func (p *Parser) noPrefixParseFnError(t TokenType) {
+	msg := fmt.Sprintf("no prefix parse function for %v found", t)
+	p.errors = append(p.errors, msg)
+}
+
+func (p *Parser) peekPrecedence() Precedence {
+	if p, ok := precedences[p.peekToken.Type]; ok {
+		return p
+	}
+	return LOWEST
+}
+
+func (p *Parser) curPrecedence() Precedence {
+	if p, ok := precedences[p.curToken.Type]; ok {
+		return p
+	}
+	return LOWEST
+}
+
+// Errors returns all parsing errors
+func (p *Parser) Errors() []string {
+	return p.errors
+}
--- a/parser/token.go
+++ b/parser/token.go
@ -0,0 +1,69 @@
+package parser
+
+// TokenType represents the type of a token
+type TokenType int
+
+const (
+	// Literals
+	IDENT TokenType = iota
+	NUMBER
+	STRING
+
+	// Operators
+	ASSIGN // =
+	PLUS   // +
+	MINUS  // -
+	STAR   // *
+	SLASH  // /
+
+	// Delimiters
+	LPAREN // (
+	RPAREN // )
+
+	// Keywords
+	VAR
+
+	// Special
+	EOF
+	ILLEGAL
+)
+
+// Token represents a single token
+type Token struct {
+	Type    TokenType
+	Literal string
+	Line    int
+	Column  int
+}
+
+// Precedence levels for Pratt parsing
+type Precedence int
+
+const (
+	_ Precedence = iota
+	LOWEST
+	SUM     // +
+	PRODUCT // *
+	PREFIX  // -x, !x
+	CALL    // function()
+)
+
+// precedences maps token types to their precedence levels
+var precedences = map[TokenType]Precedence{
+	PLUS:  SUM,
+	MINUS: SUM,
+	SLASH: PRODUCT,
+	STAR:  PRODUCT,
+}
+
+// lookupIdent checks if an identifier is a keyword
+func lookupIdent(ident string) TokenType {
+	keywords := map[string]TokenType{
+		"var": VAR,
+	}
+
+	if tok, ok := keywords[ident]; ok {
+		return tok
+	}
+	return IDENT
+}