multiline string support
This commit is contained in:
parent
82c29cba13
commit
7c99792706
@ -172,6 +172,26 @@ func (l *Lexer) readString() string {
|
||||
return l.input[position:l.position]
|
||||
}
|
||||
|
||||
// readMultilineString reads a multiline string literal using [[ ]] syntax
|
||||
func (l *Lexer) readMultilineString() string {
|
||||
l.readChar() // skip first '['
|
||||
l.readChar() // skip second '['
|
||||
|
||||
start := l.position
|
||||
for {
|
||||
if l.ch == 0 {
|
||||
break // EOF - return what we have
|
||||
}
|
||||
if l.ch == ']' && l.peekChar() == ']' {
|
||||
content := l.input[start:l.position]
|
||||
l.readChar() // skip first ']', positioned at second ']'
|
||||
return content
|
||||
}
|
||||
l.readChar()
|
||||
}
|
||||
return l.input[start:l.position]
|
||||
}
|
||||
|
||||
// NextToken returns the next token from the input
|
||||
func (l *Lexer) NextToken() Token {
|
||||
var tok Token
|
||||
@ -211,6 +231,13 @@ func (l *Lexer) NextToken() Token {
|
||||
case '"':
|
||||
tok.Type = STRING
|
||||
tok.Literal = l.readString()
|
||||
case '[':
|
||||
if l.peekChar() == '[' {
|
||||
tok.Type = STRING
|
||||
tok.Literal = l.readMultilineString()
|
||||
} else {
|
||||
tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column}
|
||||
}
|
||||
case 0:
|
||||
tok.Literal = ""
|
||||
tok.Type = EOF
|
||||
|
@ -286,9 +286,13 @@ func (p *Parser) parseTableLiteral() Expression {
|
||||
|
||||
pair := TablePair{}
|
||||
|
||||
// Check if this is a key=value pair
|
||||
if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) {
|
||||
pair.Key = &Identifier{Value: p.curToken.Literal}
|
||||
// Check if this is a key=value pair (identifier or string key)
|
||||
if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) {
|
||||
if p.curTokenIs(IDENT) {
|
||||
pair.Key = &Identifier{Value: p.curToken.Literal}
|
||||
} else {
|
||||
pair.Key = &StringLiteral{Value: p.curToken.Literal}
|
||||
}
|
||||
p.nextToken() // move to =
|
||||
p.nextToken() // move past =
|
||||
|
||||
|
@ -766,3 +766,259 @@ func checkParserErrors(t *testing.T, p *Parser) {
|
||||
}
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
func TestMultilineStringLiterals(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`[[hello world]]`, "hello world", "basic multiline string"},
|
||||
{`[[]]`, "", "empty multiline string"},
|
||||
{`[[hello
|
||||
world]]`, "hello\nworld", "multiline with newline"},
|
||||
{`[[hello [brackets] world]]`, "hello [brackets] world", "nested single brackets"},
|
||||
{`[[line1
|
||||
line2
|
||||
line3]]`, "line1\nline2\nline3", "multiple lines"},
|
||||
{`[[ tab and spaces ]]`, "\ttab and spaces\t", "tabs and spaces"},
|
||||
{`[[special chars: @#$%^&*()]]`, "special chars: @#$%^&*()", "special characters"},
|
||||
{`[["quotes" and 'apostrophes']]`, `"quotes" and 'apostrophes'`, "quotes inside multiline"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
testStringLiteral(t, expr, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultilineStringAssignments(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
identifier string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`text = [[hello world]]`, "text", "hello world", "basic assignment"},
|
||||
{`empty = [[]]`, "empty", "", "empty multiline assignment"},
|
||||
{`multiline = [[line1
|
||||
line2]]`, "multiline", "line1\nline2", "multiline content assignment"},
|
||||
{`sql = [[SELECT * FROM users WHERE id = 1]]`, "sql", "SELECT * FROM users WHERE id = 1", "SQL query assignment"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
program := p.ParseProgram()
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if len(program.Statements) != 1 {
|
||||
t.Fatalf("expected 1 statement, got %d", len(program.Statements))
|
||||
}
|
||||
|
||||
stmt, ok := program.Statements[0].(*AssignStatement)
|
||||
if !ok {
|
||||
t.Fatalf("expected AssignStatement, got %T", program.Statements[0])
|
||||
}
|
||||
|
||||
if stmt.Name.Value != tt.identifier {
|
||||
t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value)
|
||||
}
|
||||
|
||||
testStringLiteral(t, stmt.Value, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultilineStringInTables(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`{[[hello]], [[world]]}`, `{"hello", "world"}`, "multiline strings in array"},
|
||||
{`{msg = [[hello world]]}`, `{msg = "hello world"}`, "multiline string in hash"},
|
||||
{`{[[key1]], "value1", key2 = [[value2]]}`, `{"key1", "value1", key2 = "value2"}`, "mixed strings in table"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultilineStringExpressions(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`[[hello]] + [[world]]`, `("hello" + "world")`, "multiline string concatenation"},
|
||||
{`([[hello]])`, `"hello"`, "parenthesized multiline string"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLexerMultilineStringTokens(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected []Token
|
||||
desc string
|
||||
}{
|
||||
{
|
||||
`[[hello]] = [[world]]`,
|
||||
[]Token{
|
||||
{Type: STRING, Literal: "hello"},
|
||||
{Type: ASSIGN, Literal: "="},
|
||||
{Type: STRING, Literal: "world"},
|
||||
{Type: EOF, Literal: ""},
|
||||
},
|
||||
"multiline string tokens",
|
||||
},
|
||||
{
|
||||
`x = [[multiline
|
||||
content]]`,
|
||||
[]Token{
|
||||
{Type: IDENT, Literal: "x"},
|
||||
{Type: ASSIGN, Literal: "="},
|
||||
{Type: STRING, Literal: "multiline\ncontent"},
|
||||
{Type: EOF, Literal: ""},
|
||||
},
|
||||
"multiline with newline tokens",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
|
||||
for i, expectedToken := range tt.expected {
|
||||
tok := l.NextToken()
|
||||
if tok.Type != expectedToken.Type {
|
||||
t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type)
|
||||
}
|
||||
if tok.Literal != expectedToken.Literal {
|
||||
t.Errorf("token %d: expected literal %q, got %q", i, expectedToken.Literal, tok.Literal)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultilineStringEdgeCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`[[]]`, "", "empty multiline string"},
|
||||
{`[[a]]`, "a", "single character"},
|
||||
{`[[[]]]`, "[", "single brackets inside - first ]] is closing"},
|
||||
{`[[[nested]]]`, "[nested", "nested brackets - first ]] is closing"},
|
||||
{`[[]end]]`, "]end", "closing bracket in content"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
testStringLiteral(t, expr, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultilineStringErrors(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expectedError string
|
||||
desc string
|
||||
}{
|
||||
{`[hello`, "unexpected token", "single bracket"},
|
||||
{`[[hello`, "", "unclosed multiline string - no error expected"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
p.ParseProgram()
|
||||
|
||||
errors := p.Errors()
|
||||
if len(errors) == 0 && tt.expectedError != "" {
|
||||
t.Fatalf("expected parsing errors, got none")
|
||||
}
|
||||
|
||||
if tt.expectedError != "" {
|
||||
found := false
|
||||
for _, err := range errors {
|
||||
if strings.Contains(err.Message, tt.expectedError) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
errorMsgs := make([]string, len(errors))
|
||||
for i, err := range errors {
|
||||
errorMsgs[i] = err.Message
|
||||
}
|
||||
t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMixedStringTypes(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{`"regular" + [[multiline]]`, `("regular" + "multiline")`, "regular + multiline"},
|
||||
{`{[[key1]] = "value1", "key2" = [[value2]]}`, `{"key1" = "value1", "key2" = "value2"}`, "mixed in table"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user