Mako/parser/parser.go
2025-06-09 12:50:03 -05:00

492 lines
11 KiB
Go

package parser
import (
"fmt"
"strconv"
"strings"
)
// ParseError represents a parsing error with location information
type ParseError struct {
Message string
Line int
Column int
Token Token
}
func (pe ParseError) Error() string {
return fmt.Sprintf("Parse error at line %d, column %d: %s (near '%s')",
pe.Line, pe.Column, pe.Message, pe.Token.Literal)
}
// Parser implements a recursive descent Pratt parser
type Parser struct {
lexer *Lexer
curToken Token
peekToken Token
prefixParseFns map[TokenType]func() Expression
infixParseFns map[TokenType]func(Expression) Expression
errors []ParseError
}
// NewParser creates a new parser instance
func NewParser(lexer *Lexer) *Parser {
p := &Parser{
lexer: lexer,
errors: []ParseError{},
}
p.prefixParseFns = make(map[TokenType]func() Expression)
p.registerPrefix(IDENT, p.parseIdentifier)
p.registerPrefix(NUMBER, p.parseNumberLiteral)
p.registerPrefix(STRING, p.parseStringLiteral)
p.registerPrefix(TRUE, p.parseBooleanLiteral)
p.registerPrefix(FALSE, p.parseBooleanLiteral)
p.registerPrefix(NIL, p.parseNilLiteral)
p.registerPrefix(LPAREN, p.parseGroupedExpression)
p.registerPrefix(LBRACE, p.parseTableLiteral)
p.infixParseFns = make(map[TokenType]func(Expression) Expression)
p.registerInfix(PLUS, p.parseInfixExpression)
p.registerInfix(MINUS, p.parseInfixExpression)
p.registerInfix(SLASH, p.parseInfixExpression)
p.registerInfix(STAR, p.parseInfixExpression)
// Read two tokens, so curToken and peekToken are both set
p.nextToken()
p.nextToken()
return p
}
// registerPrefix registers a prefix parse function
func (p *Parser) registerPrefix(tokenType TokenType, fn func() Expression) {
p.prefixParseFns[tokenType] = fn
}
// registerInfix registers an infix parse function
func (p *Parser) registerInfix(tokenType TokenType, fn func(Expression) Expression) {
p.infixParseFns[tokenType] = fn
}
// nextToken advances to the next token
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.lexer.NextToken()
}
// ParseProgram parses the entire program
func (p *Parser) ParseProgram() *Program {
program := &Program{}
program.Statements = []Statement{}
for !p.curTokenIs(EOF) {
stmt := p.parseStatement()
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
p.nextToken()
}
return program
}
// parseStatement parses a statement
func (p *Parser) parseStatement() Statement {
switch p.curToken.Type {
case IDENT:
if p.peekTokenIs(ASSIGN) {
return p.parseAssignStatement()
}
p.addError("unexpected identifier, expected assignment or declaration")
return nil
case ASSIGN:
p.addError("assignment operator '=' without left-hand side identifier")
return nil
case ILLEGAL:
p.addError(fmt.Sprintf("unexpected token '%s'", p.curToken.Literal))
return nil
case EOF:
return nil
default:
p.addError(fmt.Sprintf("unexpected token '%s', expected statement", p.curToken.Literal))
return nil
}
}
// parseAssignStatement parses variable assignment
func (p *Parser) parseAssignStatement() *AssignStatement {
stmt := &AssignStatement{}
if !p.curTokenIs(IDENT) {
p.addError("expected identifier for assignment")
return nil
}
stmt.Name = &Identifier{Value: p.curToken.Literal}
if !p.expectPeek(ASSIGN) {
return nil
}
p.nextToken()
stmt.Value = p.parseExpression(LOWEST)
if stmt.Value == nil {
p.addError("expected expression after assignment operator")
return nil
}
return stmt
}
// parseExpression parses expressions using Pratt parsing
func (p *Parser) parseExpression(precedence Precedence) Expression {
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
p.noPrefixParseFnError(p.curToken.Type)
return nil
}
leftExp := prefix()
if leftExp == nil {
return nil
}
for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() {
infix := p.infixParseFns[p.peekToken.Type]
if infix == nil {
return leftExp
}
p.nextToken()
leftExp = infix(leftExp)
if leftExp == nil {
return nil
}
}
return leftExp
}
// Expression parsing functions
func (p *Parser) parseIdentifier() Expression {
return &Identifier{Value: p.curToken.Literal}
}
func (p *Parser) parseNumberLiteral() Expression {
lit := &NumberLiteral{}
literal := p.curToken.Literal
var value float64
var err error
// Check for hexadecimal (0x/0X prefix)
if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") {
// Validate hex format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
hexPart := literal[2:]
for _, ch := range hexPart {
if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
}
// Parse as hex and convert to float64
intVal, parseErr := strconv.ParseInt(literal, 0, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
value = float64(intVal)
} else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") {
// Validate binary format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
binaryPart := literal[2:]
for _, ch := range binaryPart {
if ch != '0' && ch != '1' {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
}
// Parse binary manually since Go doesn't support 0b in ParseInt with base 0
binaryStr := literal[2:] // remove "0b" prefix
intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
value = float64(intVal)
} else {
// Parse as regular decimal (handles scientific notation automatically)
value, err = strconv.ParseFloat(literal, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse '%s' as number", literal))
return nil
}
}
lit.Value = value
return lit
}
func (p *Parser) parseStringLiteral() Expression {
return &StringLiteral{Value: p.curToken.Literal}
}
func (p *Parser) parseBooleanLiteral() Expression {
return &BooleanLiteral{Value: p.curTokenIs(TRUE)}
}
func (p *Parser) parseNilLiteral() Expression {
return &NilLiteral{}
}
func (p *Parser) parseGroupedExpression() Expression {
p.nextToken()
exp := p.parseExpression(LOWEST)
if exp == nil {
return nil
}
if !p.expectPeek(RPAREN) {
return nil
}
return exp
}
func (p *Parser) parseTableLiteral() Expression {
table := &TableLiteral{}
table.Pairs = []TablePair{}
if p.peekTokenIs(RBRACE) {
p.nextToken()
return table
}
p.nextToken()
for {
// Check for EOF
if p.curTokenIs(EOF) {
p.addError("unexpected end of input, expected }")
return nil
}
pair := TablePair{}
// Check if this is a key=value pair
if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) {
pair.Key = &Identifier{Value: p.curToken.Literal}
p.nextToken() // move to =
p.nextToken() // move past =
// Check for EOF after =
if p.curTokenIs(EOF) {
p.addError("expected expression after assignment operator")
return nil
}
pair.Value = p.parseExpression(LOWEST)
} else {
// Array-style element
pair.Value = p.parseExpression(LOWEST)
}
if pair.Value == nil {
return nil
}
table.Pairs = append(table.Pairs, pair)
if !p.peekTokenIs(COMMA) {
break
}
p.nextToken() // consume comma
p.nextToken() // move to next element
// Allow trailing comma
if p.curTokenIs(RBRACE) {
break
}
// Check for EOF after comma
if p.curTokenIs(EOF) {
p.addError("expected next token to be }")
return nil
}
}
if !p.expectPeek(RBRACE) {
return nil
}
return table
}
func (p *Parser) parseInfixExpression(left Expression) Expression {
expression := &InfixExpression{
Left: left,
Operator: p.curToken.Literal,
}
precedence := p.curPrecedence()
p.nextToken()
expression.Right = p.parseExpression(precedence)
if expression.Right == nil {
p.addError(fmt.Sprintf("expected expression after operator '%s'", expression.Operator))
return nil
}
return expression
}
// Helper methods
func (p *Parser) curTokenIs(t TokenType) bool {
return p.curToken.Type == t
}
func (p *Parser) peekTokenIs(t TokenType) bool {
return p.peekToken.Type == t
}
func (p *Parser) expectPeek(t TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
}
p.peekError(t)
return false
}
// Error handling methods
func (p *Parser) addError(message string) {
p.errors = append(p.errors, ParseError{
Message: message,
Line: p.curToken.Line,
Column: p.curToken.Column,
Token: p.curToken,
})
}
func (p *Parser) peekError(t TokenType) {
message := fmt.Sprintf("expected next token to be %s, got %s instead",
tokenTypeString(t), tokenTypeString(p.peekToken.Type))
p.errors = append(p.errors, ParseError{
Message: message,
Line: p.peekToken.Line,
Column: p.peekToken.Column,
Token: p.peekToken,
})
}
func (p *Parser) noPrefixParseFnError(t TokenType) {
var message string
switch t {
case ASSIGN:
message = "unexpected assignment operator, missing left-hand side identifier"
case PLUS, MINUS, STAR, SLASH:
message = fmt.Sprintf("unexpected operator '%s', missing left operand", tokenTypeString(t))
case RPAREN:
message = "unexpected closing parenthesis"
case RBRACE:
message = "unexpected closing brace"
case EOF:
message = "unexpected end of input"
default:
message = fmt.Sprintf("unexpected token '%s'", tokenTypeString(t))
}
p.addError(message)
}
func (p *Parser) peekPrecedence() Precedence {
if p, ok := precedences[p.peekToken.Type]; ok {
return p
}
return LOWEST
}
func (p *Parser) curPrecedence() Precedence {
if p, ok := precedences[p.curToken.Type]; ok {
return p
}
return LOWEST
}
// Errors returns all parsing errors
func (p *Parser) Errors() []ParseError {
return p.errors
}
// HasErrors returns true if there are any parsing errors
func (p *Parser) HasErrors() bool {
return len(p.errors) > 0
}
// ErrorStrings returns error messages as strings for backward compatibility
func (p *Parser) ErrorStrings() []string {
result := make([]string, len(p.errors))
for i, err := range p.errors {
result[i] = err.Error()
}
return result
}
// tokenTypeString returns a human-readable string for token types
func tokenTypeString(t TokenType) string {
switch t {
case IDENT:
return "identifier"
case NUMBER:
return "number"
case STRING:
return "string"
case TRUE, FALSE:
return "boolean"
case NIL:
return "nil"
case ASSIGN:
return "="
case PLUS:
return "+"
case MINUS:
return "-"
case STAR:
return "*"
case SLASH:
return "/"
case LPAREN:
return "("
case RPAREN:
return ")"
case LBRACE:
return "{"
case RBRACE:
return "}"
case COMMA:
return ","
case VAR:
return "var"
case EOF:
return "end of file"
case ILLEGAL:
return "illegal token"
default:
return "unknown"
}
}