package parser import ( "fmt" "strconv" "strings" ) // ParseError represents a parsing error with location information type ParseError struct { Message string Line int Column int Token Token } func (pe ParseError) Error() string { return fmt.Sprintf("Parse error at line %d, column %d: %s (near '%s')", pe.Line, pe.Column, pe.Message, pe.Token.Literal) } // Parser implements a recursive descent Pratt parser type Parser struct { lexer *Lexer curToken Token peekToken Token prefixParseFns map[TokenType]func() Expression infixParseFns map[TokenType]func(Expression) Expression errors []ParseError } // NewParser creates a new parser instance func NewParser(lexer *Lexer) *Parser { p := &Parser{ lexer: lexer, errors: []ParseError{}, } p.prefixParseFns = make(map[TokenType]func() Expression) p.registerPrefix(IDENT, p.parseIdentifier) p.registerPrefix(NUMBER, p.parseNumberLiteral) p.registerPrefix(STRING, p.parseStringLiteral) p.registerPrefix(TRUE, p.parseBooleanLiteral) p.registerPrefix(FALSE, p.parseBooleanLiteral) p.registerPrefix(NIL, p.parseNilLiteral) p.registerPrefix(LPAREN, p.parseGroupedExpression) p.registerPrefix(LBRACE, p.parseTableLiteral) p.registerPrefix(MINUS, p.parsePrefixExpression) p.infixParseFns = make(map[TokenType]func(Expression) Expression) p.registerInfix(PLUS, p.parseInfixExpression) p.registerInfix(MINUS, p.parseInfixExpression) p.registerInfix(SLASH, p.parseInfixExpression) p.registerInfix(STAR, p.parseInfixExpression) p.registerInfix(EQ, p.parseInfixExpression) p.registerInfix(NOT_EQ, p.parseInfixExpression) p.registerInfix(LT, p.parseInfixExpression) p.registerInfix(GT, p.parseInfixExpression) p.registerInfix(LT_EQ, p.parseInfixExpression) p.registerInfix(GT_EQ, p.parseInfixExpression) p.registerInfix(DOT, p.parseDotExpression) p.registerInfix(LBRACKET, p.parseIndexExpression) // Read two tokens, so curToken and peekToken are both set p.nextToken() p.nextToken() return p } // registerPrefix registers a prefix parse function func (p *Parser) registerPrefix(tokenType TokenType, fn func() Expression) { p.prefixParseFns[tokenType] = fn } // registerInfix registers an infix parse function func (p *Parser) registerInfix(tokenType TokenType, fn func(Expression) Expression) { p.infixParseFns[tokenType] = fn } // nextToken advances to the next token func (p *Parser) nextToken() { p.curToken = p.peekToken p.peekToken = p.lexer.NextToken() } // ParseProgram parses the entire program func (p *Parser) ParseProgram() *Program { program := &Program{} program.Statements = []Statement{} for !p.curTokenIs(EOF) { stmt := p.parseStatement() if stmt != nil { program.Statements = append(program.Statements, stmt) } p.nextToken() } return program } // parseStatement parses a statement func (p *Parser) parseStatement() Statement { switch p.curToken.Type { case IDENT: // Try to parse as assignment (handles both simple and member access) return p.parseAssignStatement() case IF: return p.parseIfStatement() case FOR: return p.parseForStatement() case WHILE: return p.parseWhileStatement() case ECHO: return p.parseEchoStatement() case BREAK: return p.parseBreakStatement() case EXIT: return p.parseExitStatement() case ASSIGN: p.addError("assignment operator '=' without left-hand side identifier") return nil case ILLEGAL: p.addError(fmt.Sprintf("unexpected token '%s'", p.curToken.Literal)) return nil case EOF: return nil default: p.addError(fmt.Sprintf("unexpected token '%s', expected statement", p.curToken.Literal)) return nil } } // parseAssignStatement parses variable assignment func (p *Parser) parseAssignStatement() *AssignStatement { stmt := &AssignStatement{} // Parse left-hand side expression (can be identifier or member access) stmt.Name = p.ParseExpression(LOWEST) if stmt.Name == nil { p.addError("expected expression for assignment left-hand side") return nil } // Check if next token is assignment operator if !p.peekTokenIs(ASSIGN) { p.addError("unexpected identifier, expected assignment or declaration") return nil } // Validate assignment target switch stmt.Name.(type) { case *Identifier, *DotExpression, *IndexExpression: // Valid assignment targets default: p.addError("invalid assignment target") return nil } if !p.expectPeek(ASSIGN) { return nil } p.nextToken() stmt.Value = p.ParseExpression(LOWEST) if stmt.Value == nil { p.addError("expected expression after assignment operator") return nil } return stmt } // parseEchoStatement parses echo statements func (p *Parser) parseEchoStatement() *EchoStatement { stmt := &EchoStatement{} p.nextToken() // move past 'echo' stmt.Value = p.ParseExpression(LOWEST) if stmt.Value == nil { p.addError("expected expression after 'echo'") return nil } return stmt } // parseBreakStatement parses break statements func (p *Parser) parseBreakStatement() *BreakStatement { return &BreakStatement{} } // parseExitStatement parses exit statements func (p *Parser) parseExitStatement() *ExitStatement { stmt := &ExitStatement{} // Check if there's an optional expression after 'exit' // Only parse expression if next token can start an expression if p.canStartExpression(p.peekToken.Type) { p.nextToken() // move past 'exit' stmt.Value = p.ParseExpression(LOWEST) if stmt.Value == nil { p.addError("expected expression after 'exit'") return nil } } return stmt } // canStartExpression checks if a token type can start an expression func (p *Parser) canStartExpression(tokenType TokenType) bool { switch tokenType { case IDENT, NUMBER, STRING, TRUE, FALSE, NIL, LPAREN, LBRACE, MINUS: return true default: return false } } // parseWhileStatement parses while loops: while condition do ... end func (p *Parser) parseWhileStatement() *WhileStatement { stmt := &WhileStatement{} p.nextToken() // move past 'while' stmt.Condition = p.ParseExpression(LOWEST) if stmt.Condition == nil { p.addError("expected condition after 'while'") return nil } if !p.expectPeek(DO) { p.addError("expected 'do' after while condition") return nil } p.nextToken() // move past 'do' // Parse loop body stmt.Body = p.parseBlockStatements(END) if !p.curTokenIs(END) { p.addError("expected 'end' to close while loop") return nil } return stmt } // parseForStatement parses for loops (both numeric and for-in) func (p *Parser) parseForStatement() Statement { p.nextToken() // move past 'for' if !p.curTokenIs(IDENT) { p.addError("expected identifier after 'for'") return nil } firstVar := &Identifier{Value: p.curToken.Literal} // Look ahead to determine which type of for loop if p.peekTokenIs(ASSIGN) { // Numeric for loop: for i = start, end, step do return p.parseNumericForStatement(firstVar) } else if p.peekTokenIs(COMMA) || p.peekTokenIs(IN) { // For-in loop: for k, v in expr do or for v in expr do return p.parseForInStatement(firstVar) } else { p.addError("expected '=', ',' or 'in' after for loop variable") return nil } } // parseNumericForStatement parses numeric for loops: for i = start, end, step do func (p *Parser) parseNumericForStatement(variable *Identifier) *ForStatement { stmt := &ForStatement{Variable: variable} if !p.expectPeek(ASSIGN) { return nil } p.nextToken() // move past '=' // Parse start expression stmt.Start = p.ParseExpression(LOWEST) if stmt.Start == nil { p.addError("expected start expression in for loop") return nil } if !p.expectPeek(COMMA) { p.addError("expected ',' after start expression in for loop") return nil } p.nextToken() // move past ',' // Parse end expression stmt.End = p.ParseExpression(LOWEST) if stmt.End == nil { p.addError("expected end expression in for loop") return nil } // Optional step expression if p.peekTokenIs(COMMA) { p.nextToken() // move to ',' p.nextToken() // move past ',' stmt.Step = p.ParseExpression(LOWEST) if stmt.Step == nil { p.addError("expected step expression in for loop") return nil } } if !p.expectPeek(DO) { p.addError("expected 'do' after for loop header") return nil } p.nextToken() // move past 'do' // Parse loop body stmt.Body = p.parseBlockStatements(END) if !p.curTokenIs(END) { p.addError("expected 'end' to close for loop") return nil } return stmt } // parseForInStatement parses for-in loops: for k, v in expr do or for v in expr do func (p *Parser) parseForInStatement(firstVar *Identifier) *ForInStatement { stmt := &ForInStatement{} if p.peekTokenIs(COMMA) { // Two variables: for k, v in expr do stmt.Key = firstVar p.nextToken() // move to ',' p.nextToken() // move past ',' if !p.curTokenIs(IDENT) { p.addError("expected identifier after ',' in for loop") return nil } stmt.Value = &Identifier{Value: p.curToken.Literal} } else { // Single variable: for v in expr do stmt.Value = firstVar } if !p.expectPeek(IN) { p.addError("expected 'in' in for loop") return nil } p.nextToken() // move past 'in' // Parse iterable expression stmt.Iterable = p.ParseExpression(LOWEST) if stmt.Iterable == nil { p.addError("expected expression after 'in' in for loop") return nil } if !p.expectPeek(DO) { p.addError("expected 'do' after for loop header") return nil } p.nextToken() // move past 'do' // Parse loop body stmt.Body = p.parseBlockStatements(END) if !p.curTokenIs(END) { p.addError("expected 'end' to close for loop") return nil } return stmt } // parseIfStatement parses if/elseif/else/end statements func (p *Parser) parseIfStatement() *IfStatement { stmt := &IfStatement{} p.nextToken() // move past 'if' stmt.Condition = p.ParseExpression(LOWEST) if stmt.Condition == nil { p.addError("expected condition after 'if'") return nil } // Optional 'then' keyword if p.peekTokenIs(THEN) { p.nextToken() } p.nextToken() // move past condition (and optional 'then') // Check if we immediately hit END (missing body) if p.curTokenIs(END) { p.addError("expected 'end' to close if statement") return nil } // Parse if body stmt.Body = p.parseBlockStatements(ELSEIF, ELSE, END) // Parse elseif clauses for p.curTokenIs(ELSEIF) { elseif := ElseIfClause{} p.nextToken() // move past 'elseif' elseif.Condition = p.ParseExpression(LOWEST) if elseif.Condition == nil { p.addError("expected condition after 'elseif'") return nil } // Optional 'then' keyword if p.peekTokenIs(THEN) { p.nextToken() } p.nextToken() // move past condition (and optional 'then') elseif.Body = p.parseBlockStatements(ELSEIF, ELSE, END) stmt.ElseIfs = append(stmt.ElseIfs, elseif) } // Parse else clause if p.curTokenIs(ELSE) { p.nextToken() // move past 'else' stmt.Else = p.parseBlockStatements(END) } if !p.curTokenIs(END) { p.addError("expected 'end' to close if statement") return nil } return stmt } // parseBlockStatements parses statements until one of the terminator tokens func (p *Parser) parseBlockStatements(terminators ...TokenType) []Statement { statements := []Statement{} for !p.curTokenIs(EOF) && !p.isTerminator(terminators...) { stmt := p.parseStatement() if stmt != nil { statements = append(statements, stmt) } p.nextToken() } return statements } // isTerminator checks if current token is one of the terminators func (p *Parser) isTerminator(terminators ...TokenType) bool { for _, terminator := range terminators { if p.curTokenIs(terminator) { return true } } return false } // ParseExpression parses expressions using Pratt parsing func (p *Parser) ParseExpression(precedence Precedence) Expression { prefix := p.prefixParseFns[p.curToken.Type] if prefix == nil { p.noPrefixParseFnError(p.curToken.Type) return nil } leftExp := prefix() if leftExp == nil { return nil } for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() { infix := p.infixParseFns[p.peekToken.Type] if infix == nil { return leftExp } p.nextToken() leftExp = infix(leftExp) if leftExp == nil { return nil } } return leftExp } // Expression parsing functions func (p *Parser) parseIdentifier() Expression { return &Identifier{Value: p.curToken.Literal} } func (p *Parser) parseNumberLiteral() Expression { lit := &NumberLiteral{} literal := p.curToken.Literal var value float64 var err error // Check for hexadecimal (0x/0X prefix) if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") { // Validate hex format if len(literal) <= 2 { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } hexPart := literal[2:] for _, ch := range hexPart { if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } } // Parse as hex and convert to float64 intVal, parseErr := strconv.ParseInt(literal, 0, 64) if parseErr != nil { p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) return nil } value = float64(intVal) } else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") { // Validate binary format if len(literal) <= 2 { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } binaryPart := literal[2:] for _, ch := range binaryPart { if ch != '0' && ch != '1' { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } } // Parse binary manually since Go doesn't support 0b in ParseInt with base 0 binaryStr := literal[2:] // remove "0b" prefix intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64) if parseErr != nil { p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) return nil } value = float64(intVal) } else { // Parse as regular decimal (handles scientific notation automatically) value, err = strconv.ParseFloat(literal, 64) if err != nil { p.addError(fmt.Sprintf("could not parse '%s' as number", literal)) return nil } } lit.Value = value return lit } func (p *Parser) parseStringLiteral() Expression { return &StringLiteral{Value: p.curToken.Literal} } func (p *Parser) parseBooleanLiteral() Expression { return &BooleanLiteral{Value: p.curTokenIs(TRUE)} } func (p *Parser) parseNilLiteral() Expression { return &NilLiteral{} } func (p *Parser) parsePrefixExpression() Expression { expression := &PrefixExpression{ Operator: p.curToken.Literal, } p.nextToken() expression.Right = p.ParseExpression(PREFIX) if expression.Right == nil { p.addError(fmt.Sprintf("expected expression after prefix operator '%s'", expression.Operator)) return nil } return expression } func (p *Parser) parseGroupedExpression() Expression { p.nextToken() exp := p.ParseExpression(LOWEST) if exp == nil { return nil } if !p.expectPeek(RPAREN) { return nil } return exp } func (p *Parser) parseTableLiteral() Expression { table := &TableLiteral{} table.Pairs = []TablePair{} if p.peekTokenIs(RBRACE) { p.nextToken() return table } p.nextToken() for { // Check for EOF if p.curTokenIs(EOF) { p.addError("unexpected end of input, expected }") return nil } pair := TablePair{} // Check if this is a key=value pair (identifier or string key) if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) { if p.curTokenIs(IDENT) { pair.Key = &Identifier{Value: p.curToken.Literal} } else { pair.Key = &StringLiteral{Value: p.curToken.Literal} } p.nextToken() // move to = p.nextToken() // move past = // Check for EOF after = if p.curTokenIs(EOF) { p.addError("expected expression after assignment operator") return nil } pair.Value = p.ParseExpression(LOWEST) } else { // Array-style element pair.Value = p.ParseExpression(LOWEST) } if pair.Value == nil { return nil } table.Pairs = append(table.Pairs, pair) if !p.peekTokenIs(COMMA) { break } p.nextToken() // consume comma p.nextToken() // move to next element // Allow trailing comma if p.curTokenIs(RBRACE) { break } // Check for EOF after comma if p.curTokenIs(EOF) { p.addError("expected next token to be }") return nil } } if !p.expectPeek(RBRACE) { return nil } return table } func (p *Parser) parseInfixExpression(left Expression) Expression { expression := &InfixExpression{ Left: left, Operator: p.curToken.Literal, } precedence := p.curPrecedence() p.nextToken() expression.Right = p.ParseExpression(precedence) if expression.Right == nil { p.addError(fmt.Sprintf("expected expression after operator '%s'", expression.Operator)) return nil } return expression } func (p *Parser) parseDotExpression(left Expression) Expression { if !p.expectPeekIdent() { p.addError("expected identifier after '.'") return nil } return &DotExpression{ Left: left, Key: p.curToken.Literal, } } func (p *Parser) parseIndexExpression(left Expression) Expression { p.nextToken() // move past '[' index := p.ParseExpression(LOWEST) if index == nil { p.addError("expected expression inside brackets") return nil } if !p.expectPeek(RBRACKET) { p.addError("expected ']' after index expression") return nil } return &IndexExpression{ Left: left, Index: index, } } // Helper methods func (p *Parser) curTokenIs(t TokenType) bool { return p.curToken.Type == t } func (p *Parser) peekTokenIs(t TokenType) bool { return p.peekToken.Type == t } func (p *Parser) expectPeek(t TokenType) bool { if p.peekTokenIs(t) { p.nextToken() return true } p.peekError(t) return false } // expectPeekIdent accepts IDENT or keyword tokens as identifiers func (p *Parser) expectPeekIdent() bool { if p.peekTokenIs(IDENT) || p.isKeyword(p.peekToken.Type) { p.nextToken() return true } p.peekError(IDENT) return false } // isKeyword checks if a token type is a keyword that can be used as identifier func (p *Parser) isKeyword(t TokenType) bool { switch t { case TRUE, FALSE, NIL, IF, THEN, ELSEIF, ELSE, END, ECHO, FOR, WHILE, IN, DO, BREAK, EXIT: return true default: return false } } // Error handling methods func (p *Parser) addError(message string) { p.errors = append(p.errors, ParseError{ Message: message, Line: p.curToken.Line, Column: p.curToken.Column, Token: p.curToken, }) } func (p *Parser) peekError(t TokenType) { message := fmt.Sprintf("expected next token to be %s, got %s instead", tokenTypeString(t), tokenTypeString(p.peekToken.Type)) p.errors = append(p.errors, ParseError{ Message: message, Line: p.peekToken.Line, Column: p.peekToken.Column, Token: p.peekToken, }) } func (p *Parser) noPrefixParseFnError(t TokenType) { var message string switch t { case ASSIGN: message = "unexpected assignment operator, missing left-hand side identifier" case PLUS, MINUS, STAR, SLASH: message = fmt.Sprintf("unexpected operator '%s', missing left operand", tokenTypeString(t)) case RPAREN: message = "unexpected closing parenthesis" case RBRACE: message = "unexpected closing brace" case RBRACKET: message = "unexpected closing bracket" case EOF: message = "unexpected end of input" default: message = fmt.Sprintf("unexpected token '%s'", tokenTypeString(t)) } p.addError(message) } func (p *Parser) peekPrecedence() Precedence { if p, ok := precedences[p.peekToken.Type]; ok { return p } return LOWEST } func (p *Parser) curPrecedence() Precedence { if p, ok := precedences[p.curToken.Type]; ok { return p } return LOWEST } // Errors returns all parsing errors func (p *Parser) Errors() []ParseError { return p.errors } // HasErrors returns true if there are any parsing errors func (p *Parser) HasErrors() bool { return len(p.errors) > 0 } // ErrorStrings returns error messages as strings for backward compatibility func (p *Parser) ErrorStrings() []string { result := make([]string, len(p.errors)) for i, err := range p.errors { result[i] = err.Error() } return result } // tokenTypeString returns a human-readable string for token types func tokenTypeString(t TokenType) string { switch t { case IDENT: return "identifier" case NUMBER: return "number" case STRING: return "string" case TRUE, FALSE: return "boolean" case NIL: return "nil" case ASSIGN: return "=" case PLUS: return "+" case MINUS: return "-" case STAR: return "*" case SLASH: return "/" case DOT: return "." case EQ: return "==" case NOT_EQ: return "!=" case LT: return "<" case GT: return ">" case LT_EQ: return "<=" case GT_EQ: return ">=" case LPAREN: return "(" case RPAREN: return ")" case LBRACE: return "{" case RBRACE: return "}" case LBRACKET: return "[" case RBRACKET: return "]" case COMMA: return "," case IF: return "if" case THEN: return "then" case ELSEIF: return "elseif" case ELSE: return "else" case END: return "end" case ECHO: return "echo" case FOR: return "for" case WHILE: return "while" case IN: return "in" case DO: return "do" case BREAK: return "break" case EXIT: return "exit" case EOF: return "end of file" case ILLEGAL: return "illegal token" default: return "unknown" } }