diff --git a/go.mod b/go.mod index ba8bdda..ec9e50d 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module git.sharkk.net/Sharkk/Mako go 1.24.1 -require git.sharkk.net/Go/Assert v1.1.0 +require git.sharkk.net/Go/Assert v1.2.0 diff --git a/go.sum b/go.sum index 54a850a..67378a1 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,4 @@ git.sharkk.net/Go/Assert v1.1.0 h1:1Nbu8C9vmv3gXaLR4S+NBXfQ01gnh3IHHD7PQRIVIe8= git.sharkk.net/Go/Assert v1.1.0/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM= +git.sharkk.net/Go/Assert v1.2.0 h1:E8N2JGagU9oQILSrLZZBKQF7q91sqkR2inpvDIIENfc= +git.sharkk.net/Go/Assert v1.2.0/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM= diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..d659424 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,549 @@ +package parser + +import ( + "git.sharkk.net/Sharkk/Mako/scanner" + "git.sharkk.net/Sharkk/Mako/types" +) + +// Parser manages the state needed for parsing +type Parser struct { + scanner *scanner.Scanner + tokens []types.Token // Token buffer for lookahead + current int // Index into tokens + hadError bool + panicMode bool +} + +// New creates a new parser for the given source +func New(source string) *Parser { + p := &Parser{ + scanner: scanner.New(source), + } + + // Fill the token buffer with at least 2 tokens + p.tokens = append(p.tokens, p.scanner.NextToken()) + p.tokens = append(p.tokens, p.scanner.NextToken()) + + return p +} + +// Parse parses the source and returns the statements +func (p *Parser) Parse() []types.Statement { + var statements []types.Statement + + for !p.check(types.EOF) { + statements = append(statements, p.declaration()) + } + + return statements +} + +// advance moves to the next token +func (p *Parser) advance() { + p.current++ + + // Ensure we always have at least 2 tokens in the buffer + if p.current >= len(p.tokens)-1 { + p.tokens = append(p.tokens, p.scanner.NextToken()) + } + + // Skip error tokens + for p.currentToken().Type == types.ERROR { + p.error(p.currentToken().Lexeme) + p.current++ + + // Ensure we have enough tokens + if p.current >= len(p.tokens)-1 { + p.tokens = append(p.tokens, p.scanner.NextToken()) + } + } +} + +// currentToken returns the current token +func (p *Parser) currentToken() types.Token { + return p.tokens[p.current] +} + +// previousToken returns the previous token +func (p *Parser) previousToken() types.Token { + return p.tokens[p.current-1] +} + +// peek returns the next token without consuming it +func (p *Parser) peek() types.Token { + return p.tokens[p.current+1] +} + +// check checks if the current token is of the given type +func (p *Parser) check(t types.TokenType) bool { + return p.currentToken().Type == t +} + +// match checks if the current token is of the given type +// If it is, it consumes the token and returns true +func (p *Parser) match(types ...types.TokenType) bool { + for _, t := range types { + if p.check(t) { + p.advance() + return true + } + } + + return false +} + +// consume consumes the current token if it matches the given type +// Otherwise, it reports an error +func (p *Parser) consume(t types.TokenType, message string) types.Token { + if p.check(t) { + token := p.currentToken() + p.advance() + return token + } + + p.error(message) + return p.currentToken() +} + +// error reports a parse error +func (p *Parser) error(message string) *types.MakoError { + p.hadError = true + + if p.panicMode { + return nil + } + + p.panicMode = true + + token := p.currentToken() + return types.NewError(message, token.Line, token.Column) +} + +// synchronize skips tokens until it finds a statement boundary +func (p *Parser) synchronize() { + p.panicMode = false + + for !p.check(types.EOF) { + if p.previousToken().Type == types.END { + return + } + + switch p.currentToken().Type { + case types.FN, types.IF, types.RETURN, types.ECHO: + return + } + + p.advance() + } +} + +// declaration parses a declaration +func (p *Parser) declaration() types.Statement { + // Only treat it as a function declaration if there's an identifier after 'fn' + if p.check(types.FN) && p.peek().Type == types.IDENTIFIER { + p.advance() // Consume the FN token + return p.function() + } + + stmt := p.statement() + + if p.hadError { + p.synchronize() + } + + return stmt +} + +// function parses a function declaration +func (p *Parser) function() types.Statement { + name := p.consume(types.IDENTIFIER, "Expected function name.") + + p.consume(types.LEFT_PAREN, "Expected '(' after function name.") + + var params []types.Token + isVariadic := false + + // Parse parameter list + if !p.check(types.RIGHT_PAREN) { + for { + if p.match(types.ELLIPSIS) { + isVariadic = true + break + } + + param := p.consume(types.IDENTIFIER, "Expected parameter name.") + params = append(params, param) + + if !p.match(types.COMMA) { + break + } + + if p.match(types.ELLIPSIS) { + isVariadic = true + break + } + } + } + + p.consume(types.RIGHT_PAREN, "Expected ')' after parameters.") + + // Parse function body + var body []types.Statement + for !p.check(types.END) && !p.check(types.EOF) { + body = append(body, p.declaration()) + } + + p.consume(types.END, "Expected 'end' after function body.") + + return types.FunctionStmt{ + Name: name, + Params: params, + IsVariadic: isVariadic, + Body: body, + } +} + +// statement parses a statement +func (p *Parser) statement() types.Statement { + if p.match(types.IF) { + return p.ifStatement() + } + + if p.match(types.RETURN) { + return p.returnStatement() + } + + if p.match(types.ECHO) { + return p.echoStatement() + } + + // Check for assignment + if p.check(types.IDENTIFIER) && p.peek().Type == types.EQUAL { + name := p.currentToken() + p.advance() // Consume the identifier + p.advance() // Consume the equals sign + + value := p.expression() + + return types.AssignStmt{ + Name: name, + Value: value, + } + } + + return p.expressionStatement() +} + +// ifStatement parses an if statement +func (p *Parser) ifStatement() types.Statement { + condition := p.expression() + + p.consume(types.THEN, "Expected 'then' after if condition.") + + var thenBranch []types.Statement + for !p.check(types.END) && !p.check(types.ELSEIF) && !p.check(types.ELSE) && !p.check(types.EOF) { + thenBranch = append(thenBranch, p.declaration()) + } + + var elseIfs []struct { + Condition types.Expression + Body []types.Statement + } + + // Parse 'elseif' branches + for p.match(types.ELSEIF) { + elseifCondition := p.expression() + p.consume(types.THEN, "Expected 'then' after elseif condition.") + + var body []types.Statement + for !p.check(types.END) && !p.check(types.ELSEIF) && !p.check(types.ELSE) && !p.check(types.EOF) { + body = append(body, p.declaration()) + } + + elseIfs = append(elseIfs, struct { + Condition types.Expression + Body []types.Statement + }{ + Condition: elseifCondition, + Body: body, + }) + } + + var elseBranch []types.Statement + + // Parse 'else' branch + if p.match(types.ELSE) { + for !p.check(types.END) && !p.check(types.EOF) { + elseBranch = append(elseBranch, p.declaration()) + } + } + + p.consume(types.END, "Expected 'end' after if statement.") + + return types.IfStmt{ + Condition: condition, + ThenBranch: thenBranch, + ElseIfs: elseIfs, + ElseBranch: elseBranch, + } +} + +// returnStatement parses a return statement +func (p *Parser) returnStatement() types.Statement { + keyword := p.previousToken() + + var value types.Expression + if !p.check(types.END) && !p.check(types.EOF) { + value = p.expression() + } + + return types.ReturnStmt{ + Keyword: keyword, + Value: value, + } +} + +// echoStatement parses an echo statement +func (p *Parser) echoStatement() types.Statement { + keyword := p.previousToken() + value := p.expression() + + return types.EchoStmt{ + Keyword: keyword, + Value: value, + } +} + +// expressionStatement parses an expression statement +func (p *Parser) expressionStatement() types.Statement { + expr := p.expression() + return types.ExpressionStmt{Expression: expr} +} + +// expression parses an expression +func (p *Parser) expression() types.Expression { + return p.or() +} + +// or parses a logical OR expression +func (p *Parser) or() types.Expression { + expr := p.and() + + for p.match(types.OR) { + operator := p.previousToken() + right := p.and() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// and parses a logical AND expression +func (p *Parser) and() types.Expression { + expr := p.equality() + + for p.match(types.AND) { + operator := p.previousToken() + right := p.equality() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// equality parses an equality expression +func (p *Parser) equality() types.Expression { + expr := p.comparison() + + for p.match(types.EQUAL_EQUAL, types.BANG_EQUAL) { + operator := p.previousToken() + right := p.comparison() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// comparison parses a comparison expression +func (p *Parser) comparison() types.Expression { + expr := p.term() + + for p.match(types.LESS, types.LESS_EQUAL, types.GREATER, types.GREATER_EQUAL) { + operator := p.previousToken() + right := p.term() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// term parses a term expression +func (p *Parser) term() types.Expression { + expr := p.factor() + + for p.match(types.PLUS, types.MINUS) { + operator := p.previousToken() + right := p.factor() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// factor parses a factor expression +func (p *Parser) factor() types.Expression { + expr := p.unary() + + for p.match(types.STAR, types.SLASH) { + operator := p.previousToken() + right := p.unary() + expr = types.BinaryExpr{ + Left: expr, + Operator: operator, + Right: right, + } + } + + return expr +} + +// unary parses a unary expression +func (p *Parser) unary() types.Expression { + if p.match(types.MINUS) { + operator := p.previousToken() + right := p.unary() + return types.UnaryExpr{ + Operator: operator, + Right: right, + } + } + + return p.call() +} + +// call parses a function call +func (p *Parser) call() types.Expression { + expr := p.primary() + + for p.match(types.LEFT_PAREN) { + var arguments []types.Expression + + if !p.check(types.RIGHT_PAREN) { + for { + arguments = append(arguments, p.expression()) + + if !p.match(types.COMMA) { + break + } + } + } + + paren := p.consume(types.RIGHT_PAREN, "Expected ')' after arguments.") + + expr = types.CallExpr{ + Callee: expr, + Paren: paren, + Arguments: arguments, + } + } + + return expr +} + +// primary parses a primary expression +func (p *Parser) primary() types.Expression { + if p.match(types.NIL) { + return types.LiteralExpr{Value: nil} + } + + if p.match(types.TRUE) { + return types.LiteralExpr{Value: true} + } + + if p.match(types.FALSE) { + return types.LiteralExpr{Value: false} + } + + if p.match(types.NUMBER, types.STRING) { + return types.LiteralExpr{Value: p.previousToken().Literal} + } + + if p.match(types.IDENTIFIER) { + return types.VariableExpr{Name: p.previousToken()} + } + + if p.match(types.LEFT_PAREN) { + expr := p.expression() + p.consume(types.RIGHT_PAREN, "Expected ')' after expression.") + return expr + } + + // Anonymous function expression + if p.match(types.FN) { + p.consume(types.LEFT_PAREN, "Expected '(' after 'fn'.") + + var params []types.Token + isVariadic := false + + // Parse parameter list + if !p.check(types.RIGHT_PAREN) { + for { + if p.match(types.ELLIPSIS) { + isVariadic = true + break + } + + param := p.consume(types.IDENTIFIER, "Expected parameter name.") + params = append(params, param) + + if !p.match(types.COMMA) { + break + } + + if p.match(types.ELLIPSIS) { + isVariadic = true + break + } + } + } + + p.consume(types.RIGHT_PAREN, "Expected ')' after parameters.") + + // Parse function body + var body []types.Statement + for !p.check(types.END) && !p.check(types.EOF) { + body = append(body, p.declaration()) + } + + p.consume(types.END, "Expected 'end' after function body.") + + return types.FunctionExpr{ + Params: params, + IsVariadic: isVariadic, + Body: body, + } + } + + p.error("Expected expression.") + return nil +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..a6760cb --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,228 @@ +package parser + +import ( + "testing" + + assert "git.sharkk.net/Go/Assert" + "git.sharkk.net/Sharkk/Mako/types" +) + +func TestParserBasic(t *testing.T) { + // Test basic variable assignment + source := "x = 123" + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + assignStmt, ok := stmts[0].(types.AssignStmt) + assert.True(t, ok) + assert.Equal(t, "x", assignStmt.Name.Lexeme) + + literal, ok := assignStmt.Value.(types.LiteralExpr) + assert.True(t, ok) + assert.Equal(t, float64(123), literal.Value.(float64)) +} + +func TestParserFunction(t *testing.T) { + source := `fn add(a, b) + return a + b + end` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + funcStmt, ok := stmts[0].(types.FunctionStmt) + assert.True(t, ok) + assert.Equal(t, "add", funcStmt.Name.Lexeme) + assert.Equal(t, 2, len(funcStmt.Params)) + assert.Equal(t, "a", funcStmt.Params[0].Lexeme) + assert.Equal(t, "b", funcStmt.Params[1].Lexeme) + assert.Equal(t, false, funcStmt.IsVariadic) + assert.Equal(t, 1, len(funcStmt.Body)) +} + +func TestParserVariadicFunction(t *testing.T) { + source := `fn concat(first, ...) + return first + end` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + funcStmt, ok := stmts[0].(types.FunctionStmt) + assert.True(t, ok) + assert.Equal(t, true, funcStmt.IsVariadic) + assert.Equal(t, 1, len(funcStmt.Params)) +} + +func TestParserIf(t *testing.T) { + source := `if x > 10 then + y = 20 + elseif x < 5 then + y = 10 + else + y = 15 + end` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + ifStmt, ok := stmts[0].(types.IfStmt) + assert.True(t, ok) + + // Check condition + binary, ok := ifStmt.Condition.(types.BinaryExpr) + assert.True(t, ok) + assert.Equal(t, types.GREATER, binary.Operator.Type) + + // Check then branch + assert.Equal(t, 1, len(ifStmt.ThenBranch)) + + // Check elseif branch + assert.Equal(t, 1, len(ifStmt.ElseIfs)) + + // Check else branch + assert.Equal(t, 1, len(ifStmt.ElseBranch)) +} + +func TestParserEchoStatement(t *testing.T) { + source := `echo "Hello, world!"` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + echoStmt, ok := stmts[0].(types.EchoStmt) + assert.True(t, ok) + + literal, ok := echoStmt.Value.(types.LiteralExpr) + assert.True(t, ok) + assert.DeepEqual(t, "Hello, world!", literal.Value) +} + +func TestParserExpression(t *testing.T) { + source := "1 + 2 * 3" + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + exprStmt, ok := stmts[0].(types.ExpressionStmt) + assert.True(t, ok) + + // Verify that * has higher precedence than + + binary, ok := exprStmt.Expression.(types.BinaryExpr) + assert.True(t, ok) + assert.Equal(t, types.PLUS, binary.Operator.Type) + + leftLiteral, ok := binary.Left.(types.LiteralExpr) + assert.True(t, ok) + assert.Equal(t, float64(1), leftLiteral.Value.(float64)) + + rightBinary, ok := binary.Right.(types.BinaryExpr) + assert.True(t, ok) + assert.Equal(t, types.STAR, rightBinary.Operator.Type) +} + +func TestParserFunctionCall(t *testing.T) { + source := "add(1, 2)" + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + exprStmt, ok := stmts[0].(types.ExpressionStmt) + assert.True(t, ok) + + call, ok := exprStmt.Expression.(types.CallExpr) + assert.True(t, ok) + + variable, ok := call.Callee.(types.VariableExpr) + assert.True(t, ok) + assert.Equal(t, "add", variable.Name.Lexeme) + + assert.Equal(t, 2, len(call.Arguments)) +} + +func TestParserAnonymousFunction(t *testing.T) { + source := `fn(x) return x * x end` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 1, len(stmts)) + exprStmt, ok := stmts[0].(types.ExpressionStmt) + assert.True(t, ok) + + fnExpr, ok := exprStmt.Expression.(types.FunctionExpr) + assert.True(t, ok) + + assert.Equal(t, 1, len(fnExpr.Params)) + assert.Equal(t, "x", fnExpr.Params[0].Lexeme) + assert.Equal(t, 1, len(fnExpr.Body)) +} + +func TestParserComplex(t *testing.T) { + source := ` + fn fibonacci(n) + if n < 2 then + return n + else + return fibonacci(n - 1) + fibonacci(n - 2) + end + end + + echo fibonacci(10) + ` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 2, len(stmts)) + _, ok := stmts[0].(types.FunctionStmt) + assert.True(t, ok) + + _, ok = stmts[1].(types.EchoStmt) + assert.True(t, ok) +} + +func TestParserErrorHandling(t *testing.T) { + // Missing closing parenthesis + source := "fn add(a, b" + + p := New(source) + _ = p.Parse() + + assert.True(t, p.hadError) + + // Continue parsing after error + source = ` + x = 1 + fn bad + y = 2 + ` + + p = New(source) + stmts := p.Parse() + + assert.True(t, p.hadError) + assert.Equal(t, 2, len(stmts)) // Should recover and parse the other statements +} + +func TestParserComments(t *testing.T) { + source := ` + // This is a comment + x = 1 // Inline comment + // Another comment + y = 2 + ` + + p := New(source) + stmts := p.Parse() + + assert.Equal(t, 2, len(stmts)) + assert.False(t, p.hadError) +}