diff --git a/parser/ast.go b/parser/ast.go index a684676..6313197 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -102,6 +102,61 @@ func (is *IfStatement) String() string { return result } +// ForStatement represents numeric for loops: for i = start, end, step do ... end +type ForStatement struct { + Variable *Identifier + Start Expression + End Expression + Step Expression // optional, nil means step of 1 + Body []Statement +} + +func (fs *ForStatement) statementNode() {} +func (fs *ForStatement) String() string { + var result string + if fs.Step != nil { + result += fmt.Sprintf("for %s = %s, %s, %s do\n", + fs.Variable.String(), fs.Start.String(), fs.End.String(), fs.Step.String()) + } else { + result += fmt.Sprintf("for %s = %s, %s do\n", + fs.Variable.String(), fs.Start.String(), fs.End.String()) + } + + for _, stmt := range fs.Body { + result += "\t" + stmt.String() + "\n" + } + + result += "end" + return result +} + +// ForInStatement represents iterator for loops: for k, v in expr do ... end +type ForInStatement struct { + Key *Identifier // optional, nil for single variable iteration + Value *Identifier + Iterable Expression + Body []Statement +} + +func (fis *ForInStatement) statementNode() {} +func (fis *ForInStatement) String() string { + var result string + if fis.Key != nil { + result += fmt.Sprintf("for %s, %s in %s do\n", + fis.Key.String(), fis.Value.String(), fis.Iterable.String()) + } else { + result += fmt.Sprintf("for %s in %s do\n", + fis.Value.String(), fis.Iterable.String()) + } + + for _, stmt := range fis.Body { + result += "\t" + stmt.String() + "\n" + } + + result += "end" + return result +} + // Identifier represents identifiers type Identifier struct { Value string diff --git a/parser/parser.go b/parser/parser.go index 54eba42..ef6a6f6 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -104,6 +104,8 @@ func (p *Parser) parseStatement() Statement { return p.parseAssignStatement() case IF: return p.parseIfStatement() + case FOR: + return p.parseForStatement() case ECHO: return p.parseEchoStatement() case ASSIGN: @@ -176,6 +178,144 @@ func (p *Parser) parseEchoStatement() *EchoStatement { return stmt } +// parseForStatement parses for loops (both numeric and for-in) +func (p *Parser) parseForStatement() Statement { + p.nextToken() // move past 'for' + + if !p.curTokenIs(IDENT) { + p.addError("expected identifier after 'for'") + return nil + } + + firstVar := &Identifier{Value: p.curToken.Literal} + + // Look ahead to determine which type of for loop + if p.peekTokenIs(ASSIGN) { + // Numeric for loop: for i = start, end, step do + return p.parseNumericForStatement(firstVar) + } else if p.peekTokenIs(COMMA) || p.peekTokenIs(IN) { + // For-in loop: for k, v in expr do or for v in expr do + return p.parseForInStatement(firstVar) + } else { + p.addError("expected '=', ',' or 'in' after for loop variable") + return nil + } +} + +// parseNumericForStatement parses numeric for loops: for i = start, end, step do +func (p *Parser) parseNumericForStatement(variable *Identifier) *ForStatement { + stmt := &ForStatement{Variable: variable} + + if !p.expectPeek(ASSIGN) { + return nil + } + + p.nextToken() // move past '=' + + // Parse start expression + stmt.Start = p.ParseExpression(LOWEST) + if stmt.Start == nil { + p.addError("expected start expression in for loop") + return nil + } + + if !p.expectPeek(COMMA) { + p.addError("expected ',' after start expression in for loop") + return nil + } + + p.nextToken() // move past ',' + + // Parse end expression + stmt.End = p.ParseExpression(LOWEST) + if stmt.End == nil { + p.addError("expected end expression in for loop") + return nil + } + + // Optional step expression + if p.peekTokenIs(COMMA) { + p.nextToken() // move to ',' + p.nextToken() // move past ',' + + stmt.Step = p.ParseExpression(LOWEST) + if stmt.Step == nil { + p.addError("expected step expression in for loop") + return nil + } + } + + if !p.expectPeek(DO) { + p.addError("expected 'do' after for loop header") + return nil + } + + p.nextToken() // move past 'do' + + // Parse loop body + stmt.Body = p.parseBlockStatements(END) + + if !p.curTokenIs(END) { + p.addError("expected 'end' to close for loop") + return nil + } + + return stmt +} + +// parseForInStatement parses for-in loops: for k, v in expr do or for v in expr do +func (p *Parser) parseForInStatement(firstVar *Identifier) *ForInStatement { + stmt := &ForInStatement{} + + if p.peekTokenIs(COMMA) { + // Two variables: for k, v in expr do + stmt.Key = firstVar + p.nextToken() // move to ',' + p.nextToken() // move past ',' + + if !p.curTokenIs(IDENT) { + p.addError("expected identifier after ',' in for loop") + return nil + } + + stmt.Value = &Identifier{Value: p.curToken.Literal} + } else { + // Single variable: for v in expr do + stmt.Value = firstVar + } + + if !p.expectPeek(IN) { + p.addError("expected 'in' in for loop") + return nil + } + + p.nextToken() // move past 'in' + + // Parse iterable expression + stmt.Iterable = p.ParseExpression(LOWEST) + if stmt.Iterable == nil { + p.addError("expected expression after 'in' in for loop") + return nil + } + + if !p.expectPeek(DO) { + p.addError("expected 'do' after for loop header") + return nil + } + + p.nextToken() // move past 'do' + + // Parse loop body + stmt.Body = p.parseBlockStatements(END) + + if !p.curTokenIs(END) { + p.addError("expected 'end' to close for loop") + return nil + } + + return stmt +} + // parseIfStatement parses if/elseif/else/end statements func (p *Parser) parseIfStatement() *IfStatement { stmt := &IfStatement{} @@ -482,7 +622,7 @@ func (p *Parser) parseInfixExpression(left Expression) Expression { } func (p *Parser) parseDotExpression(left Expression) Expression { - if !p.expectPeek(IDENT) { + if !p.expectPeekIdent() { p.addError("expected identifier after '.'") return nil } @@ -531,6 +671,26 @@ func (p *Parser) expectPeek(t TokenType) bool { return false } +// expectPeekIdent accepts IDENT or keyword tokens as identifiers +func (p *Parser) expectPeekIdent() bool { + if p.peekTokenIs(IDENT) || p.isKeyword(p.peekToken.Type) { + p.nextToken() + return true + } + p.peekError(IDENT) + return false +} + +// isKeyword checks if a token type is a keyword that can be used as identifier +func (p *Parser) isKeyword(t TokenType) bool { + switch t { + case TRUE, FALSE, NIL, VAR, IF, THEN, ELSEIF, ELSE, END, ECHO, FOR, IN, DO: + return true + default: + return false + } +} + // Error handling methods func (p *Parser) addError(message string) { p.errors = append(p.errors, ParseError{ @@ -660,6 +820,12 @@ func tokenTypeString(t TokenType) string { return "end" case ECHO: return "echo" + case FOR: + return "for" + case IN: + return "in" + case DO: + return "do" case EOF: return "end of file" case ILLEGAL: diff --git a/parser/tests/loops_test.go b/parser/tests/loops_test.go new file mode 100644 index 0000000..1fb2d7b --- /dev/null +++ b/parser/tests/loops_test.go @@ -0,0 +1,425 @@ +package parser_test + +import ( + "strings" + "testing" + + "git.sharkk.net/Sharkk/Mako/parser" +) + +func TestNumericForLoop(t *testing.T) { + tests := []struct { + input string + variable string + hasStep bool + desc string + }{ + {"for i = 1, 10 do\necho i\nend", "i", false, "basic numeric loop"}, + {"for j = 0, 5, 2 do\nx = j\nend", "j", true, "numeric loop with step"}, + {"for count = 1, 10, 2 do\necho count\nend", "count", true, "step loop"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := parser.NewLexer(tt.input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*parser.ForStatement) + if !ok { + t.Fatalf("expected ForStatement, got %T", program.Statements[0]) + } + + if stmt.Variable.Value != tt.variable { + t.Errorf("expected variable %s, got %s", tt.variable, stmt.Variable.Value) + } + + if stmt.Start == nil { + t.Error("expected start expression") + } + + if stmt.End == nil { + t.Error("expected end expression") + } + + if tt.hasStep && stmt.Step == nil { + t.Error("expected step expression") + } + + if !tt.hasStep && stmt.Step != nil { + t.Error("unexpected step expression") + } + + if len(stmt.Body) == 0 { + t.Error("expected non-empty body") + } + }) + } +} + +func TestForInLoop(t *testing.T) { + tests := []struct { + input string + hasKey bool + keyName string + valueName string + desc string + }{ + {"for v in arr do\necho v\nend", false, "", "v", "single variable for-in"}, + {"for k, v in table do\necho k\nend", true, "k", "v", "key-value for-in"}, + {"for index, item in list do\necho item\nend", true, "index", "item", "descriptive names"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := parser.NewLexer(tt.input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*parser.ForInStatement) + if !ok { + t.Fatalf("expected ForInStatement, got %T", program.Statements[0]) + } + + if tt.hasKey { + if stmt.Key == nil { + t.Error("expected key variable") + } else if stmt.Key.Value != tt.keyName { + t.Errorf("expected key %s, got %s", tt.keyName, stmt.Key.Value) + } + } else { + if stmt.Key != nil { + t.Error("unexpected key variable") + } + } + + if stmt.Value == nil { + t.Error("expected value variable") + } else if stmt.Value.Value != tt.valueName { + t.Errorf("expected value %s, got %s", tt.valueName, stmt.Value.Value) + } + + if stmt.Iterable == nil { + t.Error("expected iterable expression") + } + + if len(stmt.Body) == 0 { + t.Error("expected non-empty body") + } + }) + } +} + +func TestNestedForLoops(t *testing.T) { + input := `for i = 1, 3 do + for j = 1, 2 do + echo i + j + end + for k, v in table do + echo v + end +end` + + l := parser.NewLexer(input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + outerLoop, ok := program.Statements[0].(*parser.ForStatement) + if !ok { + t.Fatalf("expected ForStatement, got %T", program.Statements[0]) + } + + if len(outerLoop.Body) != 2 { + t.Fatalf("expected 2 body statements, got %d", len(outerLoop.Body)) + } + + // First nested loop: numeric + innerNumeric, ok := outerLoop.Body[0].(*parser.ForStatement) + if !ok { + t.Fatalf("expected nested ForStatement, got %T", outerLoop.Body[0]) + } + + if innerNumeric.Variable.Value != "j" { + t.Errorf("expected variable j, got %s", innerNumeric.Variable.Value) + } + + // Second nested loop: for-in + innerForIn, ok := outerLoop.Body[1].(*parser.ForInStatement) + if !ok { + t.Fatalf("expected nested ForInStatement, got %T", outerLoop.Body[1]) + } + + if innerForIn.Key.Value != "k" || innerForIn.Value.Value != "v" { + t.Error("expected key k and value v in nested for-in loop") + } +} + +func TestForLoopWithComplexExpressions(t *testing.T) { + input := `for i = start, table.size, step + 1 do + for k, v in data[index] do + result[k] = v + end +end` + + l := parser.NewLexer(input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*parser.ForStatement) + if !ok { + t.Fatalf("expected ForStatement, got %T", program.Statements[0]) + } + + // Test that complex expressions are parsed as expressions + if stmt.Start == nil || stmt.End == nil || stmt.Step == nil { + t.Error("expected all expressions to be non-nil") + } + + // Test nested for-in with complex iterable + if len(stmt.Body) != 1 { + t.Fatalf("expected 1 body statement, got %d", len(stmt.Body)) + } + + nestedStmt, ok := stmt.Body[0].(*parser.ForInStatement) + if !ok { + t.Fatalf("expected nested ForInStatement, got %T", stmt.Body[0]) + } + + if nestedStmt.Iterable == nil { + t.Error("expected iterable expression") + } +} + +func TestForLoopErrors(t *testing.T) { + tests := []struct { + input string + expectedError string + desc string + }{ + {"for do end", "expected identifier after 'for'", "missing variable"}, + {"for i do end", "expected '=', ',' or 'in' after for loop variable", "missing assignment or in"}, + {"for i = do end", "expected start expression in for loop", "missing start expression"}, + {"for i = 1 do end", "expected ',' after start expression in for loop", "missing comma"}, + {"for i = 1, do end", "expected end expression in for loop", "missing end expression"}, + {"for i = 1, 10 end", "expected 'do' after for loop header", "missing do"}, + {"for i = 1, 10 do", "expected 'end' to close for loop", "missing end"}, + {"for i, do end", "expected identifier after ',' in for loop", "missing second variable"}, + {"for i, j do end", "expected 'in' in for loop", "missing in keyword"}, + {"for i, j in do end", "expected expression after 'in' in for loop", "missing iterable"}, + {"for i in arr end", "expected 'do' after for loop header", "missing do in for-in"}, + {"for i in arr do", "expected 'end' to close for loop", "missing end in for-in"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := parser.NewLexer(tt.input) + p := parser.NewParser(l) + p.ParseProgram() + + if !p.HasErrors() { + t.Fatal("expected parsing errors") + } + + errors := p.Errors() + found := false + for _, err := range errors { + if strings.Contains(err.Message, tt.expectedError) { + found = true + break + } + } + + if !found { + errorMsgs := make([]string, len(errors)) + for i, err := range errors { + errorMsgs[i] = err.Message + } + t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs) + } + }) + } +} + +func TestForLoopStringRepresentation(t *testing.T) { + tests := []struct { + input string + contains []string + desc string + }{ + { + "for i = 1, 10 do\necho i\nend", + []string{"for i = 1.00, 10.00 do", "echo i", "end"}, + "numeric for loop", + }, + { + "for i = 1, 10, 2 do\nx = i\nend", + []string{"for i = 1.00, 10.00, 2.00 do", "x = i", "end"}, + "numeric for loop with step", + }, + { + "for v in arr do\necho v\nend", + []string{"for v in arr do", "echo v", "end"}, + "single variable for-in", + }, + { + "for k, v in table do\necho k\nend", + []string{"for k, v in table do", "echo k", "end"}, + "key-value for-in", + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := parser.NewLexer(tt.input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + programStr := program.String() + for _, contain := range tt.contains { + if !strings.Contains(programStr, contain) { + t.Errorf("expected string representation to contain %q, got:\n%s", contain, programStr) + } + } + }) + } +} + +func TestForLoopInMixedProgram(t *testing.T) { + input := `arr = {1, 2, 3} +total = 0 +for i = 1, 3 do + total = total + arr[i] +end +echo total + +for k, v in arr do + echo v +end` + + l := parser.NewLexer(input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 5 { + t.Fatalf("expected 5 statements, got %d", len(program.Statements)) + } + + // First: table assignment + _, ok := program.Statements[0].(*parser.AssignStatement) + if !ok { + t.Fatalf("statement 0: expected AssignStatement, got %T", program.Statements[0]) + } + + // Second: variable assignment + _, ok = program.Statements[1].(*parser.AssignStatement) + if !ok { + t.Fatalf("statement 1: expected AssignStatement, got %T", program.Statements[1]) + } + + // Third: numeric for loop + forStmt, ok := program.Statements[2].(*parser.ForStatement) + if !ok { + t.Fatalf("statement 2: expected ForStatement, got %T", program.Statements[2]) + } + + if len(forStmt.Body) != 1 { + t.Errorf("expected 1 body statement in for loop, got %d", len(forStmt.Body)) + } + + // Fourth: echo statement + _, ok = program.Statements[3].(*parser.EchoStatement) + if !ok { + t.Fatalf("statement 3: expected EchoStatement, got %T", program.Statements[3]) + } + + // Fifth: for-in loop + forInStmt, ok := program.Statements[4].(*parser.ForInStatement) + if !ok { + t.Fatalf("statement 4: expected ForInStatement, got %T", program.Statements[4]) + } + + if len(forInStmt.Body) != 1 { + t.Errorf("expected 1 body statement in for-in loop, got %d", len(forInStmt.Body)) + } + + // Check that body contains echo statement + _, ok = forInStmt.Body[0].(*parser.EchoStatement) + if !ok { + t.Fatalf("for-in body: expected EchoStatement, got %T", forInStmt.Body[0]) + } +} + +func TestForLoopWithMemberAccess(t *testing.T) { + input := `for i = obj.start, obj.end, obj.step do + data[i] = result + result.items[i] = data[i] +end + +for key, val in obj.items do + val.count = val.count + 1 +end` + + l := parser.NewLexer(input) + p := parser.NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 2 { + t.Fatalf("expected 2 statements, got %d", len(program.Statements)) + } + + // First: numeric for with member access in bounds + forStmt, ok := program.Statements[0].(*parser.ForStatement) + if !ok { + t.Fatalf("statement 0: expected ForStatement, got %T", program.Statements[0]) + } + + // Check bounds are dot expressions + _, ok = forStmt.Start.(*parser.DotExpression) + if !ok { + t.Fatalf("expected DotExpression for start, got %T", forStmt.Start) + } + + _, ok = forStmt.End.(*parser.DotExpression) + if !ok { + t.Fatalf("expected DotExpression for end, got %T", forStmt.End) + } + + _, ok = forStmt.Step.(*parser.DotExpression) + if !ok { + t.Fatalf("expected DotExpression for step, got %T", forStmt.Step) + } + + // Second: for-in with member access + forInStmt, ok := program.Statements[1].(*parser.ForInStatement) + if !ok { + t.Fatalf("statement 1: expected ForInStatement, got %T", program.Statements[1]) + } + + _, ok = forInStmt.Iterable.(*parser.DotExpression) + if !ok { + t.Fatalf("expected DotExpression for iterable, got %T", forInStmt.Iterable) + } +} diff --git a/parser/token.go b/parser/token.go index a65f523..217be87 100644 --- a/parser/token.go +++ b/parser/token.go @@ -37,6 +37,9 @@ const ( ELSE END ECHO + FOR + IN + DO // Special EOF @@ -87,6 +90,9 @@ func lookupIdent(ident string) TokenType { "else": ELSE, "end": END, "echo": ECHO, + "for": FOR, + "in": IN, + "do": DO, } if tok, ok := keywords[ident]; ok {