package parser import ( "fmt" "strconv" "strings" ) // ParseError represents a parsing error with location information type ParseError struct { Message string Line int Column int Token Token } func (pe ParseError) Error() string { return fmt.Sprintf("Parse error at line %d, column %d: %s (near '%s')", pe.Line, pe.Column, pe.Message, pe.Token.Literal) } // Parser implements a recursive descent Pratt parser type Parser struct { lexer *Lexer curToken Token peekToken Token prefixParseFns map[TokenType]func() Expression infixParseFns map[TokenType]func(Expression) Expression errors []ParseError } // NewParser creates a new parser instance func NewParser(lexer *Lexer) *Parser { p := &Parser{ lexer: lexer, errors: []ParseError{}, } p.prefixParseFns = make(map[TokenType]func() Expression) p.registerPrefix(IDENT, p.parseIdentifier) p.registerPrefix(NUMBER, p.parseNumberLiteral) p.registerPrefix(STRING, p.parseStringLiteral) p.registerPrefix(TRUE, p.parseBooleanLiteral) p.registerPrefix(FALSE, p.parseBooleanLiteral) p.registerPrefix(NIL, p.parseNilLiteral) p.registerPrefix(LPAREN, p.parseGroupedExpression) p.registerPrefix(LBRACE, p.parseTableLiteral) p.infixParseFns = make(map[TokenType]func(Expression) Expression) p.registerInfix(PLUS, p.parseInfixExpression) p.registerInfix(MINUS, p.parseInfixExpression) p.registerInfix(SLASH, p.parseInfixExpression) p.registerInfix(STAR, p.parseInfixExpression) // Read two tokens, so curToken and peekToken are both set p.nextToken() p.nextToken() return p } // registerPrefix registers a prefix parse function func (p *Parser) registerPrefix(tokenType TokenType, fn func() Expression) { p.prefixParseFns[tokenType] = fn } // registerInfix registers an infix parse function func (p *Parser) registerInfix(tokenType TokenType, fn func(Expression) Expression) { p.infixParseFns[tokenType] = fn } // nextToken advances to the next token func (p *Parser) nextToken() { p.curToken = p.peekToken p.peekToken = p.lexer.NextToken() } // ParseProgram parses the entire program func (p *Parser) ParseProgram() *Program { program := &Program{} program.Statements = []Statement{} for !p.curTokenIs(EOF) { stmt := p.parseStatement() if stmt != nil { program.Statements = append(program.Statements, stmt) } p.nextToken() } return program } // parseStatement parses a statement func (p *Parser) parseStatement() Statement { switch p.curToken.Type { case IDENT: if p.peekTokenIs(ASSIGN) { return p.parseAssignStatement() } p.addError("unexpected identifier, expected assignment or declaration") return nil case IF: return p.parseIfStatement() case ECHO: return p.parseEchoStatement() case ASSIGN: p.addError("assignment operator '=' without left-hand side identifier") return nil case ILLEGAL: p.addError(fmt.Sprintf("unexpected token '%s'", p.curToken.Literal)) return nil case EOF: return nil default: p.addError(fmt.Sprintf("unexpected token '%s', expected statement", p.curToken.Literal)) return nil } } // parseAssignStatement parses variable assignment func (p *Parser) parseAssignStatement() *AssignStatement { stmt := &AssignStatement{} if !p.curTokenIs(IDENT) { p.addError("expected identifier for assignment") return nil } stmt.Name = &Identifier{Value: p.curToken.Literal} if !p.expectPeek(ASSIGN) { return nil } p.nextToken() stmt.Value = p.ParseExpression(LOWEST) if stmt.Value == nil { p.addError("expected expression after assignment operator") return nil } return stmt } // parseEchoStatement parses echo statements func (p *Parser) parseEchoStatement() *EchoStatement { stmt := &EchoStatement{} p.nextToken() // move past 'echo' stmt.Value = p.ParseExpression(LOWEST) if stmt.Value == nil { p.addError("expected expression after 'echo'") return nil } return stmt } // parseIfStatement parses if/elseif/else/end statements func (p *Parser) parseIfStatement() *IfStatement { stmt := &IfStatement{} p.nextToken() // move past 'if' stmt.Condition = p.ParseExpression(LOWEST) if stmt.Condition == nil { p.addError("expected condition after 'if'") return nil } // Optional 'then' keyword if p.peekTokenIs(THEN) { p.nextToken() } p.nextToken() // move past condition (and optional 'then') // Check if we immediately hit END (missing body) if p.curTokenIs(END) { p.addError("expected 'end' to close if statement") return nil } // Parse if body stmt.Body = p.parseBlockStatements(ELSEIF, ELSE, END) // Parse elseif clauses for p.curTokenIs(ELSEIF) { elseif := ElseIfClause{} p.nextToken() // move past 'elseif' elseif.Condition = p.ParseExpression(LOWEST) if elseif.Condition == nil { p.addError("expected condition after 'elseif'") return nil } // Optional 'then' keyword if p.peekTokenIs(THEN) { p.nextToken() } p.nextToken() // move past condition (and optional 'then') elseif.Body = p.parseBlockStatements(ELSEIF, ELSE, END) stmt.ElseIfs = append(stmt.ElseIfs, elseif) } // Parse else clause if p.curTokenIs(ELSE) { p.nextToken() // move past 'else' stmt.Else = p.parseBlockStatements(END) } if !p.curTokenIs(END) { p.addError("expected 'end' to close if statement") return nil } return stmt } // parseBlockStatements parses statements until one of the terminator tokens func (p *Parser) parseBlockStatements(terminators ...TokenType) []Statement { statements := []Statement{} for !p.curTokenIs(EOF) && !p.isTerminator(terminators...) { stmt := p.parseStatement() if stmt != nil { statements = append(statements, stmt) } p.nextToken() } return statements } // isTerminator checks if current token is one of the terminators func (p *Parser) isTerminator(terminators ...TokenType) bool { for _, terminator := range terminators { if p.curTokenIs(terminator) { return true } } return false } // ParseExpression parses expressions using Pratt parsing func (p *Parser) ParseExpression(precedence Precedence) Expression { prefix := p.prefixParseFns[p.curToken.Type] if prefix == nil { p.noPrefixParseFnError(p.curToken.Type) return nil } leftExp := prefix() if leftExp == nil { return nil } for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() { infix := p.infixParseFns[p.peekToken.Type] if infix == nil { return leftExp } p.nextToken() leftExp = infix(leftExp) if leftExp == nil { return nil } } return leftExp } // Expression parsing functions func (p *Parser) parseIdentifier() Expression { return &Identifier{Value: p.curToken.Literal} } func (p *Parser) parseNumberLiteral() Expression { lit := &NumberLiteral{} literal := p.curToken.Literal var value float64 var err error // Check for hexadecimal (0x/0X prefix) if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") { // Validate hex format if len(literal) <= 2 { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } hexPart := literal[2:] for _, ch := range hexPart { if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } } // Parse as hex and convert to float64 intVal, parseErr := strconv.ParseInt(literal, 0, 64) if parseErr != nil { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } value = float64(intVal) } else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") { // Validate binary format if len(literal) <= 2 { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } binaryPart := literal[2:] for _, ch := range binaryPart { if ch != '0' && ch != '1' { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } } // Parse binary manually since Go doesn't support 0b in ParseInt with base 0 binaryStr := literal[2:] // remove "0b" prefix intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64) if parseErr != nil { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } value = float64(intVal) } else { // Parse as regular decimal (handles scientific notation automatically) value, err = strconv.ParseFloat(literal, 64) if err != nil { p.addError(fmt.Sprintf("could not parse '%s' as number", literal)) return nil } } lit.Value = value return lit } func (p *Parser) parseStringLiteral() Expression { return &StringLiteral{Value: p.curToken.Literal} } func (p *Parser) parseBooleanLiteral() Expression { return &BooleanLiteral{Value: p.curTokenIs(TRUE)} } func (p *Parser) parseNilLiteral() Expression { return &NilLiteral{} } func (p *Parser) parseGroupedExpression() Expression { p.nextToken() exp := p.ParseExpression(LOWEST) if exp == nil { return nil } if !p.expectPeek(RPAREN) { return nil } return exp } func (p *Parser) parseTableLiteral() Expression { table := &TableLiteral{} table.Pairs = []TablePair{} if p.peekTokenIs(RBRACE) { p.nextToken() return table } p.nextToken() for { // Check for EOF if p.curTokenIs(EOF) { p.addError("unexpected end of input, expected }") return nil } pair := TablePair{} // Check if this is a key=value pair (identifier or string key) if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) { if p.curTokenIs(IDENT) { pair.Key = &Identifier{Value: p.curToken.Literal} } else { pair.Key = &StringLiteral{Value: p.curToken.Literal} } p.nextToken() // move to = p.nextToken() // move past = // Check for EOF after = if p.curTokenIs(EOF) { p.addError("expected expression after assignment operator") return nil } pair.Value = p.ParseExpression(LOWEST) } else { // Array-style element pair.Value = p.ParseExpression(LOWEST) } if pair.Value == nil { return nil } table.Pairs = append(table.Pairs, pair) if !p.peekTokenIs(COMMA) { break } p.nextToken() // consume comma p.nextToken() // move to next element // Allow trailing comma if p.curTokenIs(RBRACE) { break } // Check for EOF after comma if p.curTokenIs(EOF) { p.addError("expected next token to be }") return nil } } if !p.expectPeek(RBRACE) { return nil } return table } func (p *Parser) parseInfixExpression(left Expression) Expression { expression := &InfixExpression{ Left: left, Operator: p.curToken.Literal, } precedence := p.curPrecedence() p.nextToken() expression.Right = p.ParseExpression(precedence) if expression.Right == nil { p.addError(fmt.Sprintf("expected expression after operator '%s'", expression.Operator)) return nil } return expression } // Helper methods func (p *Parser) curTokenIs(t TokenType) bool { return p.curToken.Type == t } func (p *Parser) peekTokenIs(t TokenType) bool { return p.peekToken.Type == t } func (p *Parser) expectPeek(t TokenType) bool { if p.peekTokenIs(t) { p.nextToken() return true } p.peekError(t) return false } // Error handling methods func (p *Parser) addError(message string) { p.errors = append(p.errors, ParseError{ Message: message, Line: p.curToken.Line, Column: p.curToken.Column, Token: p.curToken, }) } func (p *Parser) peekError(t TokenType) { message := fmt.Sprintf("expected next token to be %s, got %s instead", tokenTypeString(t), tokenTypeString(p.peekToken.Type)) p.errors = append(p.errors, ParseError{ Message: message, Line: p.peekToken.Line, Column: p.peekToken.Column, Token: p.peekToken, }) } func (p *Parser) noPrefixParseFnError(t TokenType) { var message string switch t { case ASSIGN: message = "unexpected assignment operator, missing left-hand side identifier" case PLUS, MINUS, STAR, SLASH: message = fmt.Sprintf("unexpected operator '%s', missing left operand", tokenTypeString(t)) case RPAREN: message = "unexpected closing parenthesis" case RBRACE: message = "unexpected closing brace" case EOF: message = "unexpected end of input" default: message = fmt.Sprintf("unexpected token '%s'", tokenTypeString(t)) } p.addError(message) } func (p *Parser) peekPrecedence() Precedence { if p, ok := precedences[p.peekToken.Type]; ok { return p } return LOWEST } func (p *Parser) curPrecedence() Precedence { if p, ok := precedences[p.curToken.Type]; ok { return p } return LOWEST } // Errors returns all parsing errors func (p *Parser) Errors() []ParseError { return p.errors } // HasErrors returns true if there are any parsing errors func (p *Parser) HasErrors() bool { return len(p.errors) > 0 } // ErrorStrings returns error messages as strings for backward compatibility func (p *Parser) ErrorStrings() []string { result := make([]string, len(p.errors)) for i, err := range p.errors { result[i] = err.Error() } return result } // tokenTypeString returns a human-readable string for token types func tokenTypeString(t TokenType) string { switch t { case IDENT: return "identifier" case NUMBER: return "number" case STRING: return "string" case TRUE, FALSE: return "boolean" case NIL: return "nil" case ASSIGN: return "=" case PLUS: return "+" case MINUS: return "-" case STAR: return "*" case SLASH: return "/" case LPAREN: return "(" case RPAREN: return ")" case LBRACE: return "{" case RBRACE: return "}" case COMMA: return "," case VAR: return "var" case IF: return "if" case THEN: return "then" case ELSEIF: return "elseif" case ELSE: return "else" case END: return "end" case ECHO: return "echo" case EOF: return "end of file" case ILLEGAL: return "illegal token" default: return "unknown" } }