520 lines
12 KiB
Go
520 lines
12 KiB
Go
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
|
|
"git.sharkk.net/Sharkk/Mako/lexer"
|
|
)
|
|
|
|
// Precedence levels for expression parsing
|
|
const (
|
|
_ int = iota
|
|
LOWEST
|
|
SUM // +, -
|
|
PRODUCT // *, /
|
|
PREFIX // -X or !X
|
|
INDEX // array[index]
|
|
)
|
|
|
|
var precedences = map[lexer.TokenType]int{
|
|
lexer.TokenPlus: SUM,
|
|
lexer.TokenMinus: SUM,
|
|
lexer.TokenStar: PRODUCT,
|
|
lexer.TokenSlash: PRODUCT,
|
|
lexer.TokenLeftBracket: INDEX,
|
|
lexer.TokenEqualEqual: LOWEST + 1,
|
|
lexer.TokenNotEqual: LOWEST + 1,
|
|
lexer.TokenLessThan: LOWEST + 1,
|
|
lexer.TokenGreaterThan: LOWEST + 1,
|
|
lexer.TokenLessEqual: LOWEST + 1,
|
|
lexer.TokenGreaterEqual: LOWEST + 1,
|
|
}
|
|
|
|
type (
|
|
prefixParseFn func() Expression
|
|
infixParseFn func(Expression) Expression
|
|
)
|
|
|
|
type Parser struct {
|
|
l *lexer.Lexer
|
|
errors []string
|
|
|
|
curToken lexer.Token
|
|
peekToken lexer.Token
|
|
|
|
prefixParseFns map[lexer.TokenType]prefixParseFn
|
|
infixParseFns map[lexer.TokenType]infixParseFn
|
|
}
|
|
|
|
func New(l *lexer.Lexer) *Parser {
|
|
p := &Parser{
|
|
l: l,
|
|
errors: []string{},
|
|
}
|
|
|
|
// Initialize prefix parse functions
|
|
p.prefixParseFns = make(map[lexer.TokenType]prefixParseFn)
|
|
p.registerPrefix(lexer.TokenIdentifier, p.parseIdentifier)
|
|
p.registerPrefix(lexer.TokenString, p.parseStringLiteral)
|
|
p.registerPrefix(lexer.TokenNumber, p.parseNumberLiteral)
|
|
p.registerPrefix(lexer.TokenLeftBrace, p.parseTableLiteral)
|
|
p.registerPrefix(lexer.TokenMinus, p.parsePrefixExpression)
|
|
p.registerPrefix(lexer.TokenLeftParen, p.parseGroupedExpression)
|
|
p.registerPrefix(lexer.TokenIf, p.parseIfExpression)
|
|
p.registerPrefix(lexer.TokenElse, p.parseUnexpectedToken)
|
|
p.registerPrefix(lexer.TokenEnd, p.parseUnexpectedToken)
|
|
p.registerPrefix(lexer.TokenThen, p.parseUnexpectedToken)
|
|
p.registerPrefix(lexer.TokenTrue, p.parseBooleanLiteral)
|
|
p.registerPrefix(lexer.TokenFalse, p.parseBooleanLiteral)
|
|
|
|
// Initialize infix parse functions
|
|
p.infixParseFns = make(map[lexer.TokenType]infixParseFn)
|
|
p.registerInfix(lexer.TokenPlus, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenMinus, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenStar, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenSlash, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenLeftBracket, p.parseIndexExpression)
|
|
|
|
// Register comparison operators
|
|
p.registerInfix(lexer.TokenEqualEqual, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenNotEqual, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenLessThan, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenGreaterThan, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenLessEqual, p.parseInfixExpression)
|
|
p.registerInfix(lexer.TokenGreaterEqual, p.parseInfixExpression)
|
|
|
|
// Read two tokens, so curToken and peekToken are both set
|
|
p.nextToken()
|
|
p.nextToken()
|
|
|
|
return p
|
|
}
|
|
|
|
func (p *Parser) registerPrefix(tokenType lexer.TokenType, fn prefixParseFn) {
|
|
p.prefixParseFns[tokenType] = fn
|
|
}
|
|
|
|
func (p *Parser) registerInfix(tokenType lexer.TokenType, fn infixParseFn) {
|
|
p.infixParseFns[tokenType] = fn
|
|
}
|
|
|
|
func (p *Parser) nextToken() {
|
|
p.curToken = p.peekToken
|
|
p.peekToken = p.l.NextToken()
|
|
}
|
|
|
|
func (p *Parser) curTokenIs(t lexer.TokenType) bool {
|
|
return p.curToken.Type == t
|
|
}
|
|
|
|
func (p *Parser) peekTokenIs(t lexer.TokenType) bool {
|
|
return p.peekToken.Type == t
|
|
}
|
|
|
|
func (p *Parser) expectPeek(t lexer.TokenType) bool {
|
|
if p.peekTokenIs(t) {
|
|
p.nextToken()
|
|
return true
|
|
}
|
|
p.peekError(t)
|
|
return false
|
|
}
|
|
|
|
func (p *Parser) peekError(t lexer.TokenType) {
|
|
msg := fmt.Sprintf("line %d: expected next token to be %d, got %d instead",
|
|
p.peekToken.Line, t, p.peekToken.Type)
|
|
p.errors = append(p.errors, msg)
|
|
}
|
|
|
|
func (p *Parser) Errors() []string {
|
|
return p.errors
|
|
}
|
|
|
|
func (p *Parser) peekPrecedence() int {
|
|
if p, ok := precedences[p.peekToken.Type]; ok {
|
|
return p
|
|
}
|
|
return LOWEST
|
|
}
|
|
|
|
func (p *Parser) curPrecedence() int {
|
|
if p, ok := precedences[p.curToken.Type]; ok {
|
|
return p
|
|
}
|
|
return LOWEST
|
|
}
|
|
|
|
func (p *Parser) ParseProgram() *Program {
|
|
program := &Program{Statements: []Statement{}}
|
|
|
|
for !p.curTokenIs(lexer.TokenEOF) {
|
|
stmt := p.parseStatement()
|
|
program.Statements = append(program.Statements, stmt)
|
|
p.nextToken()
|
|
}
|
|
|
|
return program
|
|
}
|
|
|
|
func (p *Parser) parseStatement() Statement {
|
|
switch p.curToken.Type {
|
|
case lexer.TokenIdentifier:
|
|
if p.peekTokenIs(lexer.TokenEqual) {
|
|
return p.parseVariableStatement()
|
|
} else if p.peekTokenIs(lexer.TokenLeftBracket) {
|
|
return p.parseIndexAssignmentStatement()
|
|
}
|
|
return p.parseExpressionStatement()
|
|
case lexer.TokenEcho:
|
|
return p.parseEchoStatement()
|
|
default:
|
|
return p.parseExpressionStatement()
|
|
}
|
|
}
|
|
|
|
// New method for expression statements
|
|
func (p *Parser) parseExpressionStatement() *ExpressionStatement {
|
|
stmt := &ExpressionStatement{Token: p.curToken}
|
|
|
|
stmt.Expression = p.parseExpression(LOWEST)
|
|
|
|
if p.peekTokenIs(lexer.TokenSemicolon) {
|
|
p.nextToken()
|
|
}
|
|
|
|
return stmt
|
|
}
|
|
|
|
// Add ExpressionStatement to ast.go
|
|
type ExpressionStatement struct {
|
|
Token lexer.Token
|
|
Expression Expression
|
|
}
|
|
|
|
func (es *ExpressionStatement) statementNode() {}
|
|
func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Value }
|
|
|
|
func (p *Parser) parseBlockStatement() *BlockStatement {
|
|
block := &BlockStatement{Token: p.curToken}
|
|
block.Statements = []Statement{}
|
|
|
|
p.nextToken() // Skip '{'
|
|
|
|
for p.curToken.Type != lexer.TokenRightBrace && p.curToken.Type != lexer.TokenEOF {
|
|
stmt := p.parseStatement()
|
|
block.Statements = append(block.Statements, stmt)
|
|
p.nextToken()
|
|
}
|
|
|
|
return block
|
|
}
|
|
|
|
func (p *Parser) parseVariableStatement() *VariableStatement {
|
|
stmt := &VariableStatement{Token: p.curToken}
|
|
|
|
stmt.Name = &Identifier{Token: p.curToken, Value: p.curToken.Value}
|
|
|
|
if !p.expectPeek(lexer.TokenEqual) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip the equals sign
|
|
|
|
stmt.Value = p.parseExpression(LOWEST)
|
|
|
|
if p.peekTokenIs(lexer.TokenSemicolon) {
|
|
p.nextToken()
|
|
}
|
|
|
|
return stmt
|
|
}
|
|
|
|
func (p *Parser) parseEchoStatement() *EchoStatement {
|
|
stmt := &EchoStatement{Token: p.curToken}
|
|
|
|
p.nextToken()
|
|
|
|
stmt.Value = p.parseExpression(LOWEST)
|
|
|
|
if p.peekTokenIs(lexer.TokenSemicolon) {
|
|
p.nextToken()
|
|
}
|
|
|
|
return stmt
|
|
}
|
|
|
|
func (p *Parser) parseIndexAssignmentStatement() *IndexAssignmentStatement {
|
|
stmt := &IndexAssignmentStatement{
|
|
Token: p.curToken,
|
|
Left: &Identifier{Token: p.curToken, Value: p.curToken.Value},
|
|
}
|
|
|
|
p.nextToken() // Skip identifier
|
|
if !p.expectPeek(lexer.TokenLeftBracket) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip '['
|
|
stmt.Index = p.parseExpression(LOWEST)
|
|
|
|
if !p.expectPeek(lexer.TokenRightBracket) {
|
|
return nil
|
|
}
|
|
|
|
if !p.expectPeek(lexer.TokenEqual) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip '='
|
|
stmt.Value = p.parseExpression(LOWEST)
|
|
|
|
if p.peekTokenIs(lexer.TokenSemicolon) {
|
|
p.nextToken()
|
|
}
|
|
|
|
return stmt
|
|
}
|
|
|
|
// Core expression parser with precedence climbing
|
|
func (p *Parser) parseExpression(precedence int) Expression {
|
|
prefix := p.prefixParseFns[p.curToken.Type]
|
|
if prefix == nil {
|
|
p.noPrefixParseFnError(p.curToken.Type)
|
|
return nil
|
|
}
|
|
leftExp := prefix()
|
|
|
|
// Continue while we have valid infix operators
|
|
// and stop at special tokens that end expressions
|
|
for !p.peekTokenIs(lexer.TokenSemicolon) &&
|
|
!p.peekTokenIs(lexer.TokenEnd) &&
|
|
!p.peekTokenIs(lexer.TokenThen) &&
|
|
!p.peekTokenIs(lexer.TokenElse) &&
|
|
precedence < p.peekPrecedence() {
|
|
|
|
infix := p.infixParseFns[p.peekToken.Type]
|
|
if infix == nil {
|
|
return leftExp
|
|
}
|
|
|
|
p.nextToken()
|
|
leftExp = infix(leftExp)
|
|
}
|
|
|
|
return leftExp
|
|
}
|
|
|
|
func (p *Parser) noPrefixParseFnError(t lexer.TokenType) {
|
|
msg := fmt.Sprintf("line %d: no prefix parse function for %d found",
|
|
p.curToken.Line, t)
|
|
p.errors = append(p.errors, msg)
|
|
}
|
|
|
|
// Expression parsing methods
|
|
func (p *Parser) parseIdentifier() Expression {
|
|
return &Identifier{Token: p.curToken, Value: p.curToken.Value}
|
|
}
|
|
|
|
func (p *Parser) parseStringLiteral() Expression {
|
|
return &StringLiteral{Token: p.curToken, Value: p.curToken.Value}
|
|
}
|
|
|
|
func (p *Parser) parseNumberLiteral() Expression {
|
|
lit := &NumberLiteral{Token: p.curToken}
|
|
|
|
value, err := strconv.ParseFloat(p.curToken.Value, 64)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("could not parse %q as float", p.curToken.Value)
|
|
p.errors = append(p.errors, msg)
|
|
return nil
|
|
}
|
|
|
|
lit.Value = value
|
|
return lit
|
|
}
|
|
|
|
func (p *Parser) parseTableLiteral() Expression {
|
|
table := &TableLiteral{
|
|
Token: p.curToken, // This should be '{'
|
|
Pairs: make(map[Expression]Expression),
|
|
}
|
|
|
|
p.nextToken() // Skip '{'
|
|
|
|
if p.curTokenIs(lexer.TokenRightBrace) {
|
|
return table // Empty table
|
|
}
|
|
|
|
// Parse the first key-value pair
|
|
key := p.parseExpression(LOWEST)
|
|
|
|
if !p.expectPeek(lexer.TokenEqual) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip '='
|
|
value := p.parseExpression(LOWEST)
|
|
table.Pairs[key] = value
|
|
|
|
// Parse remaining key-value pairs
|
|
for p.peekTokenIs(lexer.TokenComma) {
|
|
p.nextToken() // Skip current value
|
|
p.nextToken() // Skip comma
|
|
|
|
if p.curTokenIs(lexer.TokenRightBrace) {
|
|
break // Allow trailing comma
|
|
}
|
|
|
|
key = p.parseExpression(LOWEST)
|
|
|
|
if !p.expectPeek(lexer.TokenEqual) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip '='
|
|
value = p.parseExpression(LOWEST)
|
|
table.Pairs[key] = value
|
|
}
|
|
|
|
if !p.expectPeek(lexer.TokenRightBrace) {
|
|
return nil
|
|
}
|
|
|
|
return table
|
|
}
|
|
|
|
func (p *Parser) parseIndexExpression(left Expression) Expression {
|
|
exp := &IndexExpression{
|
|
Token: p.curToken,
|
|
Left: left,
|
|
}
|
|
|
|
p.nextToken() // Skip '['
|
|
exp.Index = p.parseExpression(LOWEST)
|
|
|
|
if !p.expectPeek(lexer.TokenRightBracket) {
|
|
return nil
|
|
}
|
|
|
|
return exp
|
|
}
|
|
|
|
// New methods for arithmetic expressions
|
|
func (p *Parser) parsePrefixExpression() Expression {
|
|
expression := &PrefixExpression{
|
|
Token: p.curToken,
|
|
Operator: p.curToken.Value,
|
|
}
|
|
|
|
p.nextToken() // Skip the prefix token
|
|
expression.Right = p.parseExpression(PREFIX)
|
|
|
|
return expression
|
|
}
|
|
|
|
func (p *Parser) parseInfixExpression(left Expression) Expression {
|
|
expression := &InfixExpression{
|
|
Token: p.curToken,
|
|
Operator: p.curToken.Value,
|
|
Left: left,
|
|
}
|
|
|
|
precedence := p.curPrecedence()
|
|
p.nextToken() // Skip the operator
|
|
expression.Right = p.parseExpression(precedence)
|
|
|
|
return expression
|
|
}
|
|
|
|
func (p *Parser) parseGroupedExpression() Expression {
|
|
p.nextToken() // Skip '('
|
|
|
|
exp := p.parseExpression(LOWEST)
|
|
|
|
if !p.expectPeek(lexer.TokenRightParen) {
|
|
return nil
|
|
}
|
|
|
|
// Wrap in GroupedExpression to maintain the AST structure
|
|
return &GroupedExpression{
|
|
Token: p.curToken,
|
|
Expr: exp,
|
|
}
|
|
}
|
|
|
|
func (p *Parser) parseBooleanLiteral() Expression {
|
|
return &BooleanLiteral{
|
|
Token: p.curToken,
|
|
Value: p.curTokenIs(lexer.TokenTrue),
|
|
}
|
|
}
|
|
|
|
func (p *Parser) parseIfExpression() Expression {
|
|
expression := &IfExpression{Token: p.curToken}
|
|
|
|
p.nextToken() // Skip 'if'
|
|
|
|
// Parse condition
|
|
expression.Condition = p.parseExpression(LOWEST)
|
|
|
|
// Expect 'then' after condition
|
|
if !p.expectPeek(lexer.TokenThen) {
|
|
return nil
|
|
}
|
|
|
|
p.nextToken() // Skip 'then'
|
|
|
|
// Create a block statement for the consequence
|
|
consequence := &BlockStatement{Token: p.curToken}
|
|
consequence.Statements = []Statement{}
|
|
|
|
// Parse statements until we hit 'else' or 'end'
|
|
for !p.curTokenIs(lexer.TokenElse) && !p.curTokenIs(lexer.TokenEnd) && !p.curTokenIs(lexer.TokenEOF) {
|
|
stmt := p.parseStatement()
|
|
consequence.Statements = append(consequence.Statements, stmt)
|
|
p.nextToken()
|
|
}
|
|
|
|
expression.Consequence = consequence
|
|
|
|
// Check for 'else'
|
|
if p.curTokenIs(lexer.TokenElse) {
|
|
p.nextToken() // Skip 'else'
|
|
|
|
// Create a block statement for the alternative
|
|
alternative := &BlockStatement{Token: p.curToken}
|
|
alternative.Statements = []Statement{}
|
|
|
|
// Parse statements until we hit 'end'
|
|
for !p.curTokenIs(lexer.TokenEnd) && !p.curTokenIs(lexer.TokenEOF) {
|
|
stmt := p.parseStatement()
|
|
alternative.Statements = append(alternative.Statements, stmt)
|
|
p.nextToken()
|
|
}
|
|
|
|
expression.Alternative = alternative
|
|
}
|
|
|
|
// We should now be at the 'end' token
|
|
if !p.curTokenIs(lexer.TokenEnd) {
|
|
p.errors = append(p.errors, fmt.Sprintf("line %d: expected 'end' to close if expression",
|
|
p.curToken.Line))
|
|
return nil
|
|
}
|
|
|
|
return expression
|
|
}
|
|
|
|
func (p *Parser) parseErrorToken() Expression {
|
|
msg := fmt.Sprintf("unexpected token: %s", p.curToken.Value)
|
|
p.errors = append(p.errors, msg)
|
|
return nil
|
|
}
|
|
|
|
func (p *Parser) parseUnexpectedToken() Expression {
|
|
p.errors = append(p.errors, fmt.Sprintf("line %d: unexpected token: %s",
|
|
p.curToken.Line, p.curToken.Value))
|
|
return nil
|
|
}
|