diff --git a/parser/lexer.go b/parser/lexer.go index 43d2525..45efac4 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -172,6 +172,26 @@ func (l *Lexer) readString() string { return l.input[position:l.position] } +// readMultilineString reads a multiline string literal using [[ ]] syntax +func (l *Lexer) readMultilineString() string { + l.readChar() // skip first '[' + l.readChar() // skip second '[' + + start := l.position + for { + if l.ch == 0 { + break // EOF - return what we have + } + if l.ch == ']' && l.peekChar() == ']' { + content := l.input[start:l.position] + l.readChar() // skip first ']', positioned at second ']' + return content + } + l.readChar() + } + return l.input[start:l.position] +} + // NextToken returns the next token from the input func (l *Lexer) NextToken() Token { var tok Token @@ -211,6 +231,13 @@ func (l *Lexer) NextToken() Token { case '"': tok.Type = STRING tok.Literal = l.readString() + case '[': + if l.peekChar() == '[' { + tok.Type = STRING + tok.Literal = l.readMultilineString() + } else { + tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column} + } case 0: tok.Literal = "" tok.Type = EOF diff --git a/parser/parser.go b/parser/parser.go index 6d8052d..9e3d003 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -286,9 +286,13 @@ func (p *Parser) parseTableLiteral() Expression { pair := TablePair{} - // Check if this is a key=value pair - if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) { - pair.Key = &Identifier{Value: p.curToken.Literal} + // Check if this is a key=value pair (identifier or string key) + if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) { + if p.curTokenIs(IDENT) { + pair.Key = &Identifier{Value: p.curToken.Literal} + } else { + pair.Key = &StringLiteral{Value: p.curToken.Literal} + } p.nextToken() // move to = p.nextToken() // move past = diff --git a/parser/parser_test.go b/parser/parser_test.go index 1541ae4..087d03a 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -766,3 +766,259 @@ func checkParserErrors(t *testing.T, p *Parser) { } t.FailNow() } + +func TestMultilineStringLiterals(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {`[[hello world]]`, "hello world", "basic multiline string"}, + {`[[]]`, "", "empty multiline string"}, + {`[[hello +world]]`, "hello\nworld", "multiline with newline"}, + {`[[hello [brackets] world]]`, "hello [brackets] world", "nested single brackets"}, + {`[[line1 +line2 +line3]]`, "line1\nline2\nline3", "multiple lines"}, + {`[[ tab and spaces ]]`, "\ttab and spaces\t", "tabs and spaces"}, + {`[[special chars: @#$%^&*()]]`, "special chars: @#$%^&*()", "special characters"}, + {`[["quotes" and 'apostrophes']]`, `"quotes" and 'apostrophes'`, "quotes inside multiline"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + testStringLiteral(t, expr, tt.expected) + }) + } +} + +func TestMultilineStringAssignments(t *testing.T) { + tests := []struct { + input string + identifier string + expected string + desc string + }{ + {`text = [[hello world]]`, "text", "hello world", "basic assignment"}, + {`empty = [[]]`, "empty", "", "empty multiline assignment"}, + {`multiline = [[line1 +line2]]`, "multiline", "line1\nline2", "multiline content assignment"}, + {`sql = [[SELECT * FROM users WHERE id = 1]]`, "sql", "SELECT * FROM users WHERE id = 1", "SQL query assignment"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*AssignStatement) + if !ok { + t.Fatalf("expected AssignStatement, got %T", program.Statements[0]) + } + + if stmt.Name.Value != tt.identifier { + t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value) + } + + testStringLiteral(t, stmt.Value, tt.expected) + }) + } +} + +func TestMultilineStringInTables(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {`{[[hello]], [[world]]}`, `{"hello", "world"}`, "multiline strings in array"}, + {`{msg = [[hello world]]}`, `{msg = "hello world"}`, "multiline string in hash"}, + {`{[[key1]], "value1", key2 = [[value2]]}`, `{"key1", "value1", key2 = "value2"}`, "mixed strings in table"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +} + +func TestMultilineStringExpressions(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {`[[hello]] + [[world]]`, `("hello" + "world")`, "multiline string concatenation"}, + {`([[hello]])`, `"hello"`, "parenthesized multiline string"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +} + +func TestLexerMultilineStringTokens(t *testing.T) { + tests := []struct { + input string + expected []Token + desc string + }{ + { + `[[hello]] = [[world]]`, + []Token{ + {Type: STRING, Literal: "hello"}, + {Type: ASSIGN, Literal: "="}, + {Type: STRING, Literal: "world"}, + {Type: EOF, Literal: ""}, + }, + "multiline string tokens", + }, + { + `x = [[multiline +content]]`, + []Token{ + {Type: IDENT, Literal: "x"}, + {Type: ASSIGN, Literal: "="}, + {Type: STRING, Literal: "multiline\ncontent"}, + {Type: EOF, Literal: ""}, + }, + "multiline with newline tokens", + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + + for i, expectedToken := range tt.expected { + tok := l.NextToken() + if tok.Type != expectedToken.Type { + t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type) + } + if tok.Literal != expectedToken.Literal { + t.Errorf("token %d: expected literal %q, got %q", i, expectedToken.Literal, tok.Literal) + } + } + }) + } +} + +func TestMultilineStringEdgeCases(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {`[[]]`, "", "empty multiline string"}, + {`[[a]]`, "a", "single character"}, + {`[[[]]]`, "[", "single brackets inside - first ]] is closing"}, + {`[[[nested]]]`, "[nested", "nested brackets - first ]] is closing"}, + {`[[]end]]`, "]end", "closing bracket in content"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + testStringLiteral(t, expr, tt.expected) + }) + } +} + +func TestMultilineStringErrors(t *testing.T) { + tests := []struct { + input string + expectedError string + desc string + }{ + {`[hello`, "unexpected token", "single bracket"}, + {`[[hello`, "", "unclosed multiline string - no error expected"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + p.ParseProgram() + + errors := p.Errors() + if len(errors) == 0 && tt.expectedError != "" { + t.Fatalf("expected parsing errors, got none") + } + + if tt.expectedError != "" { + found := false + for _, err := range errors { + if strings.Contains(err.Message, tt.expectedError) { + found = true + break + } + } + + if !found { + errorMsgs := make([]string, len(errors)) + for i, err := range errors { + errorMsgs[i] = err.Message + } + t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs) + } + } + }) + } +} + +func TestMixedStringTypes(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {`"regular" + [[multiline]]`, `("regular" + "multiline")`, "regular + multiline"}, + {`{[[key1]] = "value1", "key2" = [[value2]]}`, `{"key1" = "value1", "key2" = "value2"}`, "mixed in table"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +}