multiline string support

This commit is contained in:
Sky Johnson 2025-06-09 14:08:08 -05:00
parent 82c29cba13
commit 7c99792706
3 changed files with 290 additions and 3 deletions

View File

@ -172,6 +172,26 @@ func (l *Lexer) readString() string {
return l.input[position:l.position]
}
// readMultilineString reads a multiline string literal using [[ ]] syntax
func (l *Lexer) readMultilineString() string {
l.readChar() // skip first '['
l.readChar() // skip second '['
start := l.position
for {
if l.ch == 0 {
break // EOF - return what we have
}
if l.ch == ']' && l.peekChar() == ']' {
content := l.input[start:l.position]
l.readChar() // skip first ']', positioned at second ']'
return content
}
l.readChar()
}
return l.input[start:l.position]
}
// NextToken returns the next token from the input
func (l *Lexer) NextToken() Token {
var tok Token
@ -211,6 +231,13 @@ func (l *Lexer) NextToken() Token {
case '"':
tok.Type = STRING
tok.Literal = l.readString()
case '[':
if l.peekChar() == '[' {
tok.Type = STRING
tok.Literal = l.readMultilineString()
} else {
tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column}
}
case 0:
tok.Literal = ""
tok.Type = EOF

View File

@ -286,9 +286,13 @@ func (p *Parser) parseTableLiteral() Expression {
pair := TablePair{}
// Check if this is a key=value pair
if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) {
pair.Key = &Identifier{Value: p.curToken.Literal}
// Check if this is a key=value pair (identifier or string key)
if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) {
if p.curTokenIs(IDENT) {
pair.Key = &Identifier{Value: p.curToken.Literal}
} else {
pair.Key = &StringLiteral{Value: p.curToken.Literal}
}
p.nextToken() // move to =
p.nextToken() // move past =

View File

@ -766,3 +766,259 @@ func checkParserErrors(t *testing.T, p *Parser) {
}
t.FailNow()
}
func TestMultilineStringLiterals(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{`[[hello world]]`, "hello world", "basic multiline string"},
{`[[]]`, "", "empty multiline string"},
{`[[hello
world]]`, "hello\nworld", "multiline with newline"},
{`[[hello [brackets] world]]`, "hello [brackets] world", "nested single brackets"},
{`[[line1
line2
line3]]`, "line1\nline2\nline3", "multiple lines"},
{`[[ tab and spaces ]]`, "\ttab and spaces\t", "tabs and spaces"},
{`[[special chars: @#$%^&*()]]`, "special chars: @#$%^&*()", "special characters"},
{`[["quotes" and 'apostrophes']]`, `"quotes" and 'apostrophes'`, "quotes inside multiline"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
testStringLiteral(t, expr, tt.expected)
})
}
}
func TestMultilineStringAssignments(t *testing.T) {
tests := []struct {
input string
identifier string
expected string
desc string
}{
{`text = [[hello world]]`, "text", "hello world", "basic assignment"},
{`empty = [[]]`, "empty", "", "empty multiline assignment"},
{`multiline = [[line1
line2]]`, "multiline", "line1\nline2", "multiline content assignment"},
{`sql = [[SELECT * FROM users WHERE id = 1]]`, "sql", "SELECT * FROM users WHERE id = 1", "SQL query assignment"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("expected 1 statement, got %d", len(program.Statements))
}
stmt, ok := program.Statements[0].(*AssignStatement)
if !ok {
t.Fatalf("expected AssignStatement, got %T", program.Statements[0])
}
if stmt.Name.Value != tt.identifier {
t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value)
}
testStringLiteral(t, stmt.Value, tt.expected)
})
}
}
func TestMultilineStringInTables(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{`{[[hello]], [[world]]}`, `{"hello", "world"}`, "multiline strings in array"},
{`{msg = [[hello world]]}`, `{msg = "hello world"}`, "multiline string in hash"},
{`{[[key1]], "value1", key2 = [[value2]]}`, `{"key1", "value1", key2 = "value2"}`, "mixed strings in table"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}
func TestMultilineStringExpressions(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{`[[hello]] + [[world]]`, `("hello" + "world")`, "multiline string concatenation"},
{`([[hello]])`, `"hello"`, "parenthesized multiline string"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}
func TestLexerMultilineStringTokens(t *testing.T) {
tests := []struct {
input string
expected []Token
desc string
}{
{
`[[hello]] = [[world]]`,
[]Token{
{Type: STRING, Literal: "hello"},
{Type: ASSIGN, Literal: "="},
{Type: STRING, Literal: "world"},
{Type: EOF, Literal: ""},
},
"multiline string tokens",
},
{
`x = [[multiline
content]]`,
[]Token{
{Type: IDENT, Literal: "x"},
{Type: ASSIGN, Literal: "="},
{Type: STRING, Literal: "multiline\ncontent"},
{Type: EOF, Literal: ""},
},
"multiline with newline tokens",
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
for i, expectedToken := range tt.expected {
tok := l.NextToken()
if tok.Type != expectedToken.Type {
t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type)
}
if tok.Literal != expectedToken.Literal {
t.Errorf("token %d: expected literal %q, got %q", i, expectedToken.Literal, tok.Literal)
}
}
})
}
}
func TestMultilineStringEdgeCases(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{`[[]]`, "", "empty multiline string"},
{`[[a]]`, "a", "single character"},
{`[[[]]]`, "[", "single brackets inside - first ]] is closing"},
{`[[[nested]]]`, "[nested", "nested brackets - first ]] is closing"},
{`[[]end]]`, "]end", "closing bracket in content"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
testStringLiteral(t, expr, tt.expected)
})
}
}
func TestMultilineStringErrors(t *testing.T) {
tests := []struct {
input string
expectedError string
desc string
}{
{`[hello`, "unexpected token", "single bracket"},
{`[[hello`, "", "unclosed multiline string - no error expected"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
p.ParseProgram()
errors := p.Errors()
if len(errors) == 0 && tt.expectedError != "" {
t.Fatalf("expected parsing errors, got none")
}
if tt.expectedError != "" {
found := false
for _, err := range errors {
if strings.Contains(err.Message, tt.expectedError) {
found = true
break
}
}
if !found {
errorMsgs := make([]string, len(errors))
for i, err := range errors {
errorMsgs[i] = err.Message
}
t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs)
}
}
})
}
}
func TestMixedStringTypes(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{`"regular" + [[multiline]]`, `("regular" + "multiline")`, "regular + multiline"},
{`{[[key1]] = "value1", "key2" = [[value2]]}`, `{"key1" = "value1", "key2" = "value2"}`, "mixed in table"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}