This commit is contained in:
Sky Johnson 2025-05-07 09:08:51 -05:00
parent 9716e88dcb
commit b24c0d3abd
4 changed files with 780 additions and 1 deletions

2
go.mod
View File

@ -2,4 +2,4 @@ module git.sharkk.net/Sharkk/Mako
go 1.24.1
require git.sharkk.net/Go/Assert v1.1.0
require git.sharkk.net/Go/Assert v1.2.0

2
go.sum
View File

@ -1,2 +1,4 @@
git.sharkk.net/Go/Assert v1.1.0 h1:1Nbu8C9vmv3gXaLR4S+NBXfQ01gnh3IHHD7PQRIVIe8=
git.sharkk.net/Go/Assert v1.1.0/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM=
git.sharkk.net/Go/Assert v1.2.0 h1:E8N2JGagU9oQILSrLZZBKQF7q91sqkR2inpvDIIENfc=
git.sharkk.net/Go/Assert v1.2.0/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM=

549
parser/parser.go Normal file
View File

@ -0,0 +1,549 @@
package parser
import (
"git.sharkk.net/Sharkk/Mako/scanner"
"git.sharkk.net/Sharkk/Mako/types"
)
// Parser manages the state needed for parsing
type Parser struct {
scanner *scanner.Scanner
tokens []types.Token // Token buffer for lookahead
current int // Index into tokens
hadError bool
panicMode bool
}
// New creates a new parser for the given source
func New(source string) *Parser {
p := &Parser{
scanner: scanner.New(source),
}
// Fill the token buffer with at least 2 tokens
p.tokens = append(p.tokens, p.scanner.NextToken())
p.tokens = append(p.tokens, p.scanner.NextToken())
return p
}
// Parse parses the source and returns the statements
func (p *Parser) Parse() []types.Statement {
var statements []types.Statement
for !p.check(types.EOF) {
statements = append(statements, p.declaration())
}
return statements
}
// advance moves to the next token
func (p *Parser) advance() {
p.current++
// Ensure we always have at least 2 tokens in the buffer
if p.current >= len(p.tokens)-1 {
p.tokens = append(p.tokens, p.scanner.NextToken())
}
// Skip error tokens
for p.currentToken().Type == types.ERROR {
p.error(p.currentToken().Lexeme)
p.current++
// Ensure we have enough tokens
if p.current >= len(p.tokens)-1 {
p.tokens = append(p.tokens, p.scanner.NextToken())
}
}
}
// currentToken returns the current token
func (p *Parser) currentToken() types.Token {
return p.tokens[p.current]
}
// previousToken returns the previous token
func (p *Parser) previousToken() types.Token {
return p.tokens[p.current-1]
}
// peek returns the next token without consuming it
func (p *Parser) peek() types.Token {
return p.tokens[p.current+1]
}
// check checks if the current token is of the given type
func (p *Parser) check(t types.TokenType) bool {
return p.currentToken().Type == t
}
// match checks if the current token is of the given type
// If it is, it consumes the token and returns true
func (p *Parser) match(types ...types.TokenType) bool {
for _, t := range types {
if p.check(t) {
p.advance()
return true
}
}
return false
}
// consume consumes the current token if it matches the given type
// Otherwise, it reports an error
func (p *Parser) consume(t types.TokenType, message string) types.Token {
if p.check(t) {
token := p.currentToken()
p.advance()
return token
}
p.error(message)
return p.currentToken()
}
// error reports a parse error
func (p *Parser) error(message string) *types.MakoError {
p.hadError = true
if p.panicMode {
return nil
}
p.panicMode = true
token := p.currentToken()
return types.NewError(message, token.Line, token.Column)
}
// synchronize skips tokens until it finds a statement boundary
func (p *Parser) synchronize() {
p.panicMode = false
for !p.check(types.EOF) {
if p.previousToken().Type == types.END {
return
}
switch p.currentToken().Type {
case types.FN, types.IF, types.RETURN, types.ECHO:
return
}
p.advance()
}
}
// declaration parses a declaration
func (p *Parser) declaration() types.Statement {
// Only treat it as a function declaration if there's an identifier after 'fn'
if p.check(types.FN) && p.peek().Type == types.IDENTIFIER {
p.advance() // Consume the FN token
return p.function()
}
stmt := p.statement()
if p.hadError {
p.synchronize()
}
return stmt
}
// function parses a function declaration
func (p *Parser) function() types.Statement {
name := p.consume(types.IDENTIFIER, "Expected function name.")
p.consume(types.LEFT_PAREN, "Expected '(' after function name.")
var params []types.Token
isVariadic := false
// Parse parameter list
if !p.check(types.RIGHT_PAREN) {
for {
if p.match(types.ELLIPSIS) {
isVariadic = true
break
}
param := p.consume(types.IDENTIFIER, "Expected parameter name.")
params = append(params, param)
if !p.match(types.COMMA) {
break
}
if p.match(types.ELLIPSIS) {
isVariadic = true
break
}
}
}
p.consume(types.RIGHT_PAREN, "Expected ')' after parameters.")
// Parse function body
var body []types.Statement
for !p.check(types.END) && !p.check(types.EOF) {
body = append(body, p.declaration())
}
p.consume(types.END, "Expected 'end' after function body.")
return types.FunctionStmt{
Name: name,
Params: params,
IsVariadic: isVariadic,
Body: body,
}
}
// statement parses a statement
func (p *Parser) statement() types.Statement {
if p.match(types.IF) {
return p.ifStatement()
}
if p.match(types.RETURN) {
return p.returnStatement()
}
if p.match(types.ECHO) {
return p.echoStatement()
}
// Check for assignment
if p.check(types.IDENTIFIER) && p.peek().Type == types.EQUAL {
name := p.currentToken()
p.advance() // Consume the identifier
p.advance() // Consume the equals sign
value := p.expression()
return types.AssignStmt{
Name: name,
Value: value,
}
}
return p.expressionStatement()
}
// ifStatement parses an if statement
func (p *Parser) ifStatement() types.Statement {
condition := p.expression()
p.consume(types.THEN, "Expected 'then' after if condition.")
var thenBranch []types.Statement
for !p.check(types.END) && !p.check(types.ELSEIF) && !p.check(types.ELSE) && !p.check(types.EOF) {
thenBranch = append(thenBranch, p.declaration())
}
var elseIfs []struct {
Condition types.Expression
Body []types.Statement
}
// Parse 'elseif' branches
for p.match(types.ELSEIF) {
elseifCondition := p.expression()
p.consume(types.THEN, "Expected 'then' after elseif condition.")
var body []types.Statement
for !p.check(types.END) && !p.check(types.ELSEIF) && !p.check(types.ELSE) && !p.check(types.EOF) {
body = append(body, p.declaration())
}
elseIfs = append(elseIfs, struct {
Condition types.Expression
Body []types.Statement
}{
Condition: elseifCondition,
Body: body,
})
}
var elseBranch []types.Statement
// Parse 'else' branch
if p.match(types.ELSE) {
for !p.check(types.END) && !p.check(types.EOF) {
elseBranch = append(elseBranch, p.declaration())
}
}
p.consume(types.END, "Expected 'end' after if statement.")
return types.IfStmt{
Condition: condition,
ThenBranch: thenBranch,
ElseIfs: elseIfs,
ElseBranch: elseBranch,
}
}
// returnStatement parses a return statement
func (p *Parser) returnStatement() types.Statement {
keyword := p.previousToken()
var value types.Expression
if !p.check(types.END) && !p.check(types.EOF) {
value = p.expression()
}
return types.ReturnStmt{
Keyword: keyword,
Value: value,
}
}
// echoStatement parses an echo statement
func (p *Parser) echoStatement() types.Statement {
keyword := p.previousToken()
value := p.expression()
return types.EchoStmt{
Keyword: keyword,
Value: value,
}
}
// expressionStatement parses an expression statement
func (p *Parser) expressionStatement() types.Statement {
expr := p.expression()
return types.ExpressionStmt{Expression: expr}
}
// expression parses an expression
func (p *Parser) expression() types.Expression {
return p.or()
}
// or parses a logical OR expression
func (p *Parser) or() types.Expression {
expr := p.and()
for p.match(types.OR) {
operator := p.previousToken()
right := p.and()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// and parses a logical AND expression
func (p *Parser) and() types.Expression {
expr := p.equality()
for p.match(types.AND) {
operator := p.previousToken()
right := p.equality()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// equality parses an equality expression
func (p *Parser) equality() types.Expression {
expr := p.comparison()
for p.match(types.EQUAL_EQUAL, types.BANG_EQUAL) {
operator := p.previousToken()
right := p.comparison()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// comparison parses a comparison expression
func (p *Parser) comparison() types.Expression {
expr := p.term()
for p.match(types.LESS, types.LESS_EQUAL, types.GREATER, types.GREATER_EQUAL) {
operator := p.previousToken()
right := p.term()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// term parses a term expression
func (p *Parser) term() types.Expression {
expr := p.factor()
for p.match(types.PLUS, types.MINUS) {
operator := p.previousToken()
right := p.factor()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// factor parses a factor expression
func (p *Parser) factor() types.Expression {
expr := p.unary()
for p.match(types.STAR, types.SLASH) {
operator := p.previousToken()
right := p.unary()
expr = types.BinaryExpr{
Left: expr,
Operator: operator,
Right: right,
}
}
return expr
}
// unary parses a unary expression
func (p *Parser) unary() types.Expression {
if p.match(types.MINUS) {
operator := p.previousToken()
right := p.unary()
return types.UnaryExpr{
Operator: operator,
Right: right,
}
}
return p.call()
}
// call parses a function call
func (p *Parser) call() types.Expression {
expr := p.primary()
for p.match(types.LEFT_PAREN) {
var arguments []types.Expression
if !p.check(types.RIGHT_PAREN) {
for {
arguments = append(arguments, p.expression())
if !p.match(types.COMMA) {
break
}
}
}
paren := p.consume(types.RIGHT_PAREN, "Expected ')' after arguments.")
expr = types.CallExpr{
Callee: expr,
Paren: paren,
Arguments: arguments,
}
}
return expr
}
// primary parses a primary expression
func (p *Parser) primary() types.Expression {
if p.match(types.NIL) {
return types.LiteralExpr{Value: nil}
}
if p.match(types.TRUE) {
return types.LiteralExpr{Value: true}
}
if p.match(types.FALSE) {
return types.LiteralExpr{Value: false}
}
if p.match(types.NUMBER, types.STRING) {
return types.LiteralExpr{Value: p.previousToken().Literal}
}
if p.match(types.IDENTIFIER) {
return types.VariableExpr{Name: p.previousToken()}
}
if p.match(types.LEFT_PAREN) {
expr := p.expression()
p.consume(types.RIGHT_PAREN, "Expected ')' after expression.")
return expr
}
// Anonymous function expression
if p.match(types.FN) {
p.consume(types.LEFT_PAREN, "Expected '(' after 'fn'.")
var params []types.Token
isVariadic := false
// Parse parameter list
if !p.check(types.RIGHT_PAREN) {
for {
if p.match(types.ELLIPSIS) {
isVariadic = true
break
}
param := p.consume(types.IDENTIFIER, "Expected parameter name.")
params = append(params, param)
if !p.match(types.COMMA) {
break
}
if p.match(types.ELLIPSIS) {
isVariadic = true
break
}
}
}
p.consume(types.RIGHT_PAREN, "Expected ')' after parameters.")
// Parse function body
var body []types.Statement
for !p.check(types.END) && !p.check(types.EOF) {
body = append(body, p.declaration())
}
p.consume(types.END, "Expected 'end' after function body.")
return types.FunctionExpr{
Params: params,
IsVariadic: isVariadic,
Body: body,
}
}
p.error("Expected expression.")
return nil
}

228
parser/parser_test.go Normal file
View File

@ -0,0 +1,228 @@
package parser
import (
"testing"
assert "git.sharkk.net/Go/Assert"
"git.sharkk.net/Sharkk/Mako/types"
)
func TestParserBasic(t *testing.T) {
// Test basic variable assignment
source := "x = 123"
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
assignStmt, ok := stmts[0].(types.AssignStmt)
assert.True(t, ok)
assert.Equal(t, "x", assignStmt.Name.Lexeme)
literal, ok := assignStmt.Value.(types.LiteralExpr)
assert.True(t, ok)
assert.Equal(t, float64(123), literal.Value.(float64))
}
func TestParserFunction(t *testing.T) {
source := `fn add(a, b)
return a + b
end`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
funcStmt, ok := stmts[0].(types.FunctionStmt)
assert.True(t, ok)
assert.Equal(t, "add", funcStmt.Name.Lexeme)
assert.Equal(t, 2, len(funcStmt.Params))
assert.Equal(t, "a", funcStmt.Params[0].Lexeme)
assert.Equal(t, "b", funcStmt.Params[1].Lexeme)
assert.Equal(t, false, funcStmt.IsVariadic)
assert.Equal(t, 1, len(funcStmt.Body))
}
func TestParserVariadicFunction(t *testing.T) {
source := `fn concat(first, ...)
return first
end`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
funcStmt, ok := stmts[0].(types.FunctionStmt)
assert.True(t, ok)
assert.Equal(t, true, funcStmt.IsVariadic)
assert.Equal(t, 1, len(funcStmt.Params))
}
func TestParserIf(t *testing.T) {
source := `if x > 10 then
y = 20
elseif x < 5 then
y = 10
else
y = 15
end`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
ifStmt, ok := stmts[0].(types.IfStmt)
assert.True(t, ok)
// Check condition
binary, ok := ifStmt.Condition.(types.BinaryExpr)
assert.True(t, ok)
assert.Equal(t, types.GREATER, binary.Operator.Type)
// Check then branch
assert.Equal(t, 1, len(ifStmt.ThenBranch))
// Check elseif branch
assert.Equal(t, 1, len(ifStmt.ElseIfs))
// Check else branch
assert.Equal(t, 1, len(ifStmt.ElseBranch))
}
func TestParserEchoStatement(t *testing.T) {
source := `echo "Hello, world!"`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
echoStmt, ok := stmts[0].(types.EchoStmt)
assert.True(t, ok)
literal, ok := echoStmt.Value.(types.LiteralExpr)
assert.True(t, ok)
assert.DeepEqual(t, "Hello, world!", literal.Value)
}
func TestParserExpression(t *testing.T) {
source := "1 + 2 * 3"
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
exprStmt, ok := stmts[0].(types.ExpressionStmt)
assert.True(t, ok)
// Verify that * has higher precedence than +
binary, ok := exprStmt.Expression.(types.BinaryExpr)
assert.True(t, ok)
assert.Equal(t, types.PLUS, binary.Operator.Type)
leftLiteral, ok := binary.Left.(types.LiteralExpr)
assert.True(t, ok)
assert.Equal(t, float64(1), leftLiteral.Value.(float64))
rightBinary, ok := binary.Right.(types.BinaryExpr)
assert.True(t, ok)
assert.Equal(t, types.STAR, rightBinary.Operator.Type)
}
func TestParserFunctionCall(t *testing.T) {
source := "add(1, 2)"
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
exprStmt, ok := stmts[0].(types.ExpressionStmt)
assert.True(t, ok)
call, ok := exprStmt.Expression.(types.CallExpr)
assert.True(t, ok)
variable, ok := call.Callee.(types.VariableExpr)
assert.True(t, ok)
assert.Equal(t, "add", variable.Name.Lexeme)
assert.Equal(t, 2, len(call.Arguments))
}
func TestParserAnonymousFunction(t *testing.T) {
source := `fn(x) return x * x end`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 1, len(stmts))
exprStmt, ok := stmts[0].(types.ExpressionStmt)
assert.True(t, ok)
fnExpr, ok := exprStmt.Expression.(types.FunctionExpr)
assert.True(t, ok)
assert.Equal(t, 1, len(fnExpr.Params))
assert.Equal(t, "x", fnExpr.Params[0].Lexeme)
assert.Equal(t, 1, len(fnExpr.Body))
}
func TestParserComplex(t *testing.T) {
source := `
fn fibonacci(n)
if n < 2 then
return n
else
return fibonacci(n - 1) + fibonacci(n - 2)
end
end
echo fibonacci(10)
`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 2, len(stmts))
_, ok := stmts[0].(types.FunctionStmt)
assert.True(t, ok)
_, ok = stmts[1].(types.EchoStmt)
assert.True(t, ok)
}
func TestParserErrorHandling(t *testing.T) {
// Missing closing parenthesis
source := "fn add(a, b"
p := New(source)
_ = p.Parse()
assert.True(t, p.hadError)
// Continue parsing after error
source = `
x = 1
fn bad
y = 2
`
p = New(source)
stmts := p.Parse()
assert.True(t, p.hadError)
assert.Equal(t, 2, len(stmts)) // Should recover and parse the other statements
}
func TestParserComments(t *testing.T) {
source := `
// This is a comment
x = 1 // Inline comment
// Another comment
y = 2
`
p := New(source)
stmts := p.Parse()
assert.Equal(t, 2, len(stmts))
assert.False(t, p.hadError)
}