diff --git a/parser/parser.go b/parser/parser.go index c8dd8da..6813d05 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5,6 +5,19 @@ import ( "strconv" ) +// ParseError represents a parsing error with location information +type ParseError struct { + Message string + Line int + Column int + Token Token +} + +func (pe ParseError) Error() string { + return fmt.Sprintf("Parse error at line %d, column %d: %s (near '%s')", + pe.Line, pe.Column, pe.Message, pe.Token.Literal) +} + // Parser implements a recursive descent Pratt parser type Parser struct { lexer *Lexer @@ -15,14 +28,14 @@ type Parser struct { prefixParseFns map[TokenType]func() Expression infixParseFns map[TokenType]func(Expression) Expression - errors []string + errors []ParseError } // NewParser creates a new parser instance func NewParser(lexer *Lexer) *Parser { p := &Parser{ lexer: lexer, - errors: []string{}, + errors: []ParseError{}, } p.prefixParseFns = make(map[TokenType]func() Expression) @@ -81,12 +94,25 @@ func (p *Parser) ParseProgram() *Program { // parseStatement parses a statement func (p *Parser) parseStatement() Statement { - if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) { - return p.parseAssignStatement() + switch p.curToken.Type { + case IDENT: + if p.peekTokenIs(ASSIGN) { + return p.parseAssignStatement() + } + p.addError("unexpected identifier, expected assignment or declaration") + return nil + case ASSIGN: + p.addError("assignment operator '=' without left-hand side identifier") + return nil + case ILLEGAL: + p.addError(fmt.Sprintf("unexpected token '%s'", p.curToken.Literal)) + return nil + case EOF: + return nil + default: + p.addError(fmt.Sprintf("unexpected token '%s', expected statement", p.curToken.Literal)) + return nil } - - // Skip unknown statements for now - return nil } // parseAssignStatement parses variable assignment @@ -94,6 +120,7 @@ func (p *Parser) parseAssignStatement() *AssignStatement { stmt := &AssignStatement{} if !p.curTokenIs(IDENT) { + p.addError("expected identifier for assignment") return nil } @@ -106,6 +133,10 @@ func (p *Parser) parseAssignStatement() *AssignStatement { p.nextToken() stmt.Value = p.parseExpression(LOWEST) + if stmt.Value == nil { + p.addError("expected expression after assignment operator") + return nil + } return stmt } @@ -119,6 +150,9 @@ func (p *Parser) parseExpression(precedence Precedence) Expression { } leftExp := prefix() + if leftExp == nil { + return nil + } for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() { infix := p.infixParseFns[p.peekToken.Type] @@ -128,6 +162,9 @@ func (p *Parser) parseExpression(precedence Precedence) Expression { p.nextToken() leftExp = infix(leftExp) + if leftExp == nil { + return nil + } } return leftExp @@ -143,8 +180,7 @@ func (p *Parser) parseNumberLiteral() Expression { value, err := strconv.ParseFloat(p.curToken.Literal, 64) if err != nil { - msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal) - p.errors = append(p.errors, msg) + p.addError(fmt.Sprintf("could not parse '%s' as number", p.curToken.Literal)) return nil } @@ -168,6 +204,9 @@ func (p *Parser) parseGroupedExpression() Expression { p.nextToken() exp := p.parseExpression(LOWEST) + if exp == nil { + return nil + } if !p.expectPeek(RPAREN) { return nil @@ -186,6 +225,11 @@ func (p *Parser) parseInfixExpression(left Expression) Expression { p.nextToken() expression.Right = p.parseExpression(precedence) + if expression.Right == nil { + p.addError(fmt.Sprintf("expected expression after operator '%s'", expression.Operator)) + return nil + } + return expression } @@ -202,21 +246,48 @@ func (p *Parser) expectPeek(t TokenType) bool { if p.peekTokenIs(t) { p.nextToken() return true - } else { - p.peekError(t) - return false } + p.peekError(t) + return false +} + +// Error handling methods +func (p *Parser) addError(message string) { + p.errors = append(p.errors, ParseError{ + Message: message, + Line: p.curToken.Line, + Column: p.curToken.Column, + Token: p.curToken, + }) } func (p *Parser) peekError(t TokenType) { - msg := fmt.Sprintf("expected next token to be %v, got %v instead", - t, p.peekToken.Type) - p.errors = append(p.errors, msg) + message := fmt.Sprintf("expected next token to be %s, got %s instead", + tokenTypeString(t), tokenTypeString(p.peekToken.Type)) + p.errors = append(p.errors, ParseError{ + Message: message, + Line: p.peekToken.Line, + Column: p.peekToken.Column, + Token: p.peekToken, + }) } func (p *Parser) noPrefixParseFnError(t TokenType) { - msg := fmt.Sprintf("no prefix parse function for %v found", t) - p.errors = append(p.errors, msg) + var message string + switch t { + case ASSIGN: + message = "unexpected assignment operator, missing left-hand side identifier" + case PLUS, MINUS, STAR, SLASH: + message = fmt.Sprintf("unexpected operator '%s', missing left operand", tokenTypeString(t)) + case RPAREN: + message = "unexpected closing parenthesis" + case EOF: + message = "unexpected end of input" + default: + message = fmt.Sprintf("unexpected token '%s'", tokenTypeString(t)) + } + + p.addError(message) } func (p *Parser) peekPrecedence() Precedence { @@ -234,6 +305,58 @@ func (p *Parser) curPrecedence() Precedence { } // Errors returns all parsing errors -func (p *Parser) Errors() []string { +func (p *Parser) Errors() []ParseError { return p.errors } + +// HasErrors returns true if there are any parsing errors +func (p *Parser) HasErrors() bool { + return len(p.errors) > 0 +} + +// ErrorStrings returns error messages as strings for backward compatibility +func (p *Parser) ErrorStrings() []string { + result := make([]string, len(p.errors)) + for i, err := range p.errors { + result[i] = err.Error() + } + return result +} + +// tokenTypeString returns a human-readable string for token types +func tokenTypeString(t TokenType) string { + switch t { + case IDENT: + return "identifier" + case NUMBER: + return "number" + case STRING: + return "string" + case TRUE, FALSE: + return "boolean" + case NIL: + return "nil" + case ASSIGN: + return "=" + case PLUS: + return "+" + case MINUS: + return "-" + case STAR: + return "*" + case SLASH: + return "/" + case LPAREN: + return "(" + case RPAREN: + return ")" + case VAR: + return "var" + case EOF: + return "end of file" + case ILLEGAL: + return "illegal token" + default: + return "unknown" + } +} diff --git a/parser/parser_test.go b/parser/parser_test.go index d94ddb8..0208ca9 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "strings" "testing" ) @@ -19,19 +20,11 @@ func TestLiterals(t *testing.T) { for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { + // Parse as expression directly - literals are not valid statements l := NewLexer(tt.input) p := NewParser(l) - program := p.ParseProgram() - checkParserErrors(t, p) - - if len(program.Statements) != 0 { - t.Fatalf("expected 0 statements for literal, got %d", len(program.Statements)) - } - - // Parse as expression - l = NewLexer(tt.input) - p = NewParser(l) expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) switch expected := tt.expected.(type) { case float64: @@ -159,10 +152,16 @@ func TestParsingErrors(t *testing.T) { tests := []struct { input string expectedError string + line int + column int }{ - {"x =", "no prefix parse function"}, - {"= 5", "no prefix parse function"}, - {"(1 + 2", "expected next token to be"}, + {"= 5", "assignment operator '=' without left-hand side identifier", 1, 1}, + {"x =", "expected expression after assignment operator", 1, 3}, + {"(1 + 2", "expected next token to be )", 1, 7}, + {"+ 5", "unexpected operator '+'", 1, 1}, + {"1 +", "expected expression after operator '+'", 1, 3}, + {"@", "unexpected token '@'", 1, 1}, + {"invalid@", "unexpected identifier", 1, 1}, } for _, tt := range tests { @@ -170,11 +169,14 @@ func TestParsingErrors(t *testing.T) { l := NewLexer(tt.input) p := NewParser(l) - if tt.input == "x =" { - p.ParseProgram() - } else { - // Parse as expression to catch syntax errors + // Decide parsing strategy based on the type of error we're testing + switch tt.input { + case "(1 + 2", "+ 5", "1 +": + // These are expression-level errors p.parseExpression(LOWEST) + default: + // These are statement-level errors + p.ParseProgram() } errors := p.Errors() @@ -184,19 +186,69 @@ func TestParsingErrors(t *testing.T) { found := false for _, err := range errors { - if containsSubstring(err, tt.expectedError) { + if strings.Contains(err.Message, tt.expectedError) { found = true + if err.Line != tt.line { + t.Errorf("expected error at line %d, got line %d", tt.line, err.Line) + } break } } if !found { - t.Errorf("expected error containing %q, got %v", tt.expectedError, errors) + errorMsgs := make([]string, len(errors)) + for i, err := range errors { + errorMsgs[i] = err.Message + } + t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs) } }) } } +func TestErrorRecovery(t *testing.T) { + input := `x = 42 += 5 +y = "hello"` + + l := NewLexer(input) + p := NewParser(l) + program := p.ParseProgram() + + // Should have errors but still parse valid statements + if !p.HasErrors() { + t.Fatal("expected parsing errors") + } + + errors := p.Errors() + found := false + for _, err := range errors { + if strings.Contains(err.Message, "assignment operator '=' without left-hand side identifier") { + found = true + if err.Line != 2 { + t.Errorf("expected error at line 2, got line %d", err.Line) + } + break + } + } + + if !found { + t.Error("expected specific assignment error") + } + + // Should still have parsed the valid statements + validStatements := 0 + for _, stmt := range program.Statements { + if stmt != nil { + validStatements++ + } + } + + if validStatements < 2 { + t.Errorf("expected at least 2 valid statements, got %d", validStatements) + } +} + func TestProgram(t *testing.T) { input := `x = 42 y = "hello" @@ -224,6 +276,39 @@ z = true + false` } } +func TestErrorMessages(t *testing.T) { + tests := []struct { + input string + expectedMessage string + }{ + {"= 5", "Parse error at line 1, column 1: assignment operator '=' without left-hand side identifier (near '=')"}, + {"x =", "Parse error at line 1, column 3: expected expression after assignment operator (near '')"}, + {"(", "Parse error at line 1, column 1: unexpected end of input (near '')"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + p.ParseProgram() + + if !p.HasErrors() { + t.Fatal("expected parsing errors") + } + + errors := p.Errors() + if len(errors) == 0 { + t.Fatal("expected at least one error") + } + + errorStr := errors[0].Error() + if !strings.Contains(errorStr, "Parse error at line") { + t.Errorf("expected formatted error message, got: %s", errorStr) + } + }) + } +} + // Helper functions for testing specific node types func testNumberLiteral(t *testing.T, expr Expression, expected float64) { t.Helper() @@ -315,17 +400,8 @@ func checkParserErrors(t *testing.T, p *Parser) { } t.Errorf("parser has %d errors", len(errors)) - for _, msg := range errors { - t.Errorf("parser error: %q", msg) + for _, err := range errors { + t.Errorf("parser error: %s", err.Error()) } t.FailNow() } - -func containsSubstring(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true - } - } - return false -}