Compare commits

..

6 Commits

Author SHA1 Message Date
7c99792706 multiline string support 2025-06-09 14:08:08 -05:00
82c29cba13 hex/binary/sci not 2025-06-09 12:50:03 -05:00
2db5c3bfe5 tables/arrays 2025-06-09 12:39:37 -05:00
a744c12baf enhance errors 2025-06-09 12:18:34 -05:00
1fdd6ed28c first parser test pass 2025-06-09 12:01:20 -05:00
e22a844dd4 true/false/nil 2025-06-09 09:58:47 -05:00
5 changed files with 1494 additions and 25 deletions

View File

@ -67,6 +67,25 @@ type StringLiteral struct {
func (sl *StringLiteral) expressionNode() {} func (sl *StringLiteral) expressionNode() {}
func (sl *StringLiteral) String() string { return fmt.Sprintf(`"%s"`, sl.Value) } func (sl *StringLiteral) String() string { return fmt.Sprintf(`"%s"`, sl.Value) }
// BooleanLiteral represents boolean literals
type BooleanLiteral struct {
Value bool
}
func (bl *BooleanLiteral) expressionNode() {}
func (bl *BooleanLiteral) String() string {
if bl.Value {
return "true"
}
return "false"
}
// NilLiteral represents nil literal
type NilLiteral struct{}
func (nl *NilLiteral) expressionNode() {}
func (nl *NilLiteral) String() string { return "nil" }
// InfixExpression represents binary operations // InfixExpression represents binary operations
type InfixExpression struct { type InfixExpression struct {
Left Expression Left Expression
@ -78,3 +97,59 @@ func (ie *InfixExpression) expressionNode() {}
func (ie *InfixExpression) String() string { func (ie *InfixExpression) String() string {
return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String()) return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String())
} }
// TablePair represents a key-value pair in a table
type TablePair struct {
Key Expression // nil for array-style elements
Value Expression
}
func (tp *TablePair) String() string {
if tp.Key == nil {
return tp.Value.String()
}
return fmt.Sprintf("%s = %s", tp.Key.String(), tp.Value.String())
}
// TableLiteral represents table literals {}
type TableLiteral struct {
Pairs []TablePair
}
func (tl *TableLiteral) expressionNode() {}
func (tl *TableLiteral) String() string {
var pairs []string
for _, pair := range tl.Pairs {
pairs = append(pairs, pair.String())
}
return fmt.Sprintf("{%s}", joinStrings(pairs, ", "))
}
// IsArray returns true if this table contains only array-style elements
func (tl *TableLiteral) IsArray() bool {
for _, pair := range tl.Pairs {
if pair.Key != nil {
return false
}
}
return true
}
// joinStrings joins string slice with separator
func joinStrings(strs []string, sep string) string {
if len(strs) == 0 {
return ""
}
if len(strs) == 1 {
return strs[0]
}
var result string
for i, s := range strs {
if i > 0 {
result += sep
}
result += s
}
return result
}

View File

@ -89,21 +89,74 @@ func (l *Lexer) readIdentifier() string {
return l.input[position:l.position] return l.input[position:l.position]
} }
// readNumber reads a number (including decimals) // readNumber reads a number (decimal, hex, binary, or scientific notation)
func (l *Lexer) readNumber() string { func (l *Lexer) readNumber() string {
position := l.position position := l.position
// Check for hex (0x/0X) or binary (0b/0B) prefix
if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') {
return l.readHexNumber()
}
if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') {
return l.readBinaryNumber()
}
// Read regular decimal number
for isDigit(l.ch) { for isDigit(l.ch) {
l.readChar() l.readChar()
} }
// Handle decimal points // Handle decimal point
if l.ch == '.' && isDigit(l.peekChar()) { if l.ch == '.' && isDigit(l.peekChar()) {
l.readChar() l.readChar() // consume '.'
for isDigit(l.ch) { for isDigit(l.ch) {
l.readChar() l.readChar()
} }
} }
// Handle scientific notation (e/E)
if l.ch == 'e' || l.ch == 'E' {
l.readChar() // consume 'e'/'E'
// Optional +/- sign
if l.ch == '+' || l.ch == '-' {
l.readChar()
}
// Continue reading digits for the exponent
for isDigit(l.ch) {
l.readChar()
}
}
return l.input[position:l.position]
}
// readHexNumber reads a hexadecimal number (0x...)
func (l *Lexer) readHexNumber() string {
position := l.position
l.readChar() // skip '0'
l.readChar() // skip 'x'/'X'
// Continue reading until we hit a non-hex character
for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
// readBinaryNumber reads a binary number (0b...)
func (l *Lexer) readBinaryNumber() string {
position := l.position
l.readChar() // skip '0'
l.readChar() // skip 'b'/'B'
// Continue reading until we hit a non-digit character
for isDigit(l.ch) || isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position] return l.input[position:l.position]
} }
@ -119,6 +172,26 @@ func (l *Lexer) readString() string {
return l.input[position:l.position] return l.input[position:l.position]
} }
// readMultilineString reads a multiline string literal using [[ ]] syntax
func (l *Lexer) readMultilineString() string {
l.readChar() // skip first '['
l.readChar() // skip second '['
start := l.position
for {
if l.ch == 0 {
break // EOF - return what we have
}
if l.ch == ']' && l.peekChar() == ']' {
content := l.input[start:l.position]
l.readChar() // skip first ']', positioned at second ']'
return content
}
l.readChar()
}
return l.input[start:l.position]
}
// NextToken returns the next token from the input // NextToken returns the next token from the input
func (l *Lexer) NextToken() Token { func (l *Lexer) NextToken() Token {
var tok Token var tok Token
@ -149,9 +222,22 @@ func (l *Lexer) NextToken() Token {
tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column}
case ')': case ')':
tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column}
case '{':
tok = Token{Type: LBRACE, Literal: string(l.ch), Line: l.line, Column: l.column}
case '}':
tok = Token{Type: RBRACE, Literal: string(l.ch), Line: l.line, Column: l.column}
case ',':
tok = Token{Type: COMMA, Literal: string(l.ch), Line: l.line, Column: l.column}
case '"': case '"':
tok.Type = STRING tok.Type = STRING
tok.Literal = l.readString() tok.Literal = l.readString()
case '[':
if l.peekChar() == '[' {
tok.Type = STRING
tok.Literal = l.readMultilineString()
} else {
tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column}
}
case 0: case 0:
tok.Literal = "" tok.Literal = ""
tok.Type = EOF tok.Type = EOF
@ -181,3 +267,11 @@ func isLetter(ch byte) bool {
func isDigit(ch byte) bool { func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9' return '0' <= ch && ch <= '9'
} }
func isHexDigit(ch byte) bool {
return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
}
func isBinaryDigit(ch byte) bool {
return ch == '0' || ch == '1'
}

View File

@ -3,8 +3,22 @@ package parser
import ( import (
"fmt" "fmt"
"strconv" "strconv"
"strings"
) )
// ParseError represents a parsing error with location information
type ParseError struct {
Message string
Line int
Column int
Token Token
}
func (pe ParseError) Error() string {
return fmt.Sprintf("Parse error at line %d, column %d: %s (near '%s')",
pe.Line, pe.Column, pe.Message, pe.Token.Literal)
}
// Parser implements a recursive descent Pratt parser // Parser implements a recursive descent Pratt parser
type Parser struct { type Parser struct {
lexer *Lexer lexer *Lexer
@ -15,21 +29,25 @@ type Parser struct {
prefixParseFns map[TokenType]func() Expression prefixParseFns map[TokenType]func() Expression
infixParseFns map[TokenType]func(Expression) Expression infixParseFns map[TokenType]func(Expression) Expression
errors []string errors []ParseError
} }
// NewParser creates a new parser instance // NewParser creates a new parser instance
func NewParser(lexer *Lexer) *Parser { func NewParser(lexer *Lexer) *Parser {
p := &Parser{ p := &Parser{
lexer: lexer, lexer: lexer,
errors: []string{}, errors: []ParseError{},
} }
p.prefixParseFns = make(map[TokenType]func() Expression) p.prefixParseFns = make(map[TokenType]func() Expression)
p.registerPrefix(IDENT, p.parseIdentifier) p.registerPrefix(IDENT, p.parseIdentifier)
p.registerPrefix(NUMBER, p.parseNumberLiteral) p.registerPrefix(NUMBER, p.parseNumberLiteral)
p.registerPrefix(STRING, p.parseStringLiteral) p.registerPrefix(STRING, p.parseStringLiteral)
p.registerPrefix(TRUE, p.parseBooleanLiteral)
p.registerPrefix(FALSE, p.parseBooleanLiteral)
p.registerPrefix(NIL, p.parseNilLiteral)
p.registerPrefix(LPAREN, p.parseGroupedExpression) p.registerPrefix(LPAREN, p.parseGroupedExpression)
p.registerPrefix(LBRACE, p.parseTableLiteral)
p.infixParseFns = make(map[TokenType]func(Expression) Expression) p.infixParseFns = make(map[TokenType]func(Expression) Expression)
p.registerInfix(PLUS, p.parseInfixExpression) p.registerInfix(PLUS, p.parseInfixExpression)
@ -78,12 +96,25 @@ func (p *Parser) ParseProgram() *Program {
// parseStatement parses a statement // parseStatement parses a statement
func (p *Parser) parseStatement() Statement { func (p *Parser) parseStatement() Statement {
if p.curTokenIs(IDENT) && p.peekTokenIs(ASSIGN) { switch p.curToken.Type {
case IDENT:
if p.peekTokenIs(ASSIGN) {
return p.parseAssignStatement() return p.parseAssignStatement()
} }
p.addError("unexpected identifier, expected assignment or declaration")
// Skip unknown statements for now
return nil return nil
case ASSIGN:
p.addError("assignment operator '=' without left-hand side identifier")
return nil
case ILLEGAL:
p.addError(fmt.Sprintf("unexpected token '%s'", p.curToken.Literal))
return nil
case EOF:
return nil
default:
p.addError(fmt.Sprintf("unexpected token '%s', expected statement", p.curToken.Literal))
return nil
}
} }
// parseAssignStatement parses variable assignment // parseAssignStatement parses variable assignment
@ -91,6 +122,7 @@ func (p *Parser) parseAssignStatement() *AssignStatement {
stmt := &AssignStatement{} stmt := &AssignStatement{}
if !p.curTokenIs(IDENT) { if !p.curTokenIs(IDENT) {
p.addError("expected identifier for assignment")
return nil return nil
} }
@ -103,6 +135,10 @@ func (p *Parser) parseAssignStatement() *AssignStatement {
p.nextToken() p.nextToken()
stmt.Value = p.parseExpression(LOWEST) stmt.Value = p.parseExpression(LOWEST)
if stmt.Value == nil {
p.addError("expected expression after assignment operator")
return nil
}
return stmt return stmt
} }
@ -116,6 +152,9 @@ func (p *Parser) parseExpression(precedence Precedence) Expression {
} }
leftExp := prefix() leftExp := prefix()
if leftExp == nil {
return nil
}
for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() { for !p.peekTokenIs(EOF) && precedence < p.peekPrecedence() {
infix := p.infixParseFns[p.peekToken.Type] infix := p.infixParseFns[p.peekToken.Type]
@ -125,6 +164,9 @@ func (p *Parser) parseExpression(precedence Precedence) Expression {
p.nextToken() p.nextToken()
leftExp = infix(leftExp) leftExp = infix(leftExp)
if leftExp == nil {
return nil
}
} }
return leftExp return leftExp
@ -137,13 +179,61 @@ func (p *Parser) parseIdentifier() Expression {
func (p *Parser) parseNumberLiteral() Expression { func (p *Parser) parseNumberLiteral() Expression {
lit := &NumberLiteral{} lit := &NumberLiteral{}
literal := p.curToken.Literal
value, err := strconv.ParseFloat(p.curToken.Literal, 64) var value float64
if err != nil { var err error
msg := fmt.Sprintf("could not parse %q as float", p.curToken.Literal)
p.errors = append(p.errors, msg) // Check for hexadecimal (0x/0X prefix)
if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") {
// Validate hex format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil return nil
} }
hexPart := literal[2:]
for _, ch := range hexPart {
if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
}
// Parse as hex and convert to float64
intVal, parseErr := strconv.ParseInt(literal, 0, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
value = float64(intVal)
} else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") {
// Validate binary format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
binaryPart := literal[2:]
for _, ch := range binaryPart {
if ch != '0' && ch != '1' {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
}
// Parse binary manually since Go doesn't support 0b in ParseInt with base 0
binaryStr := literal[2:] // remove "0b" prefix
intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
value = float64(intVal)
} else {
// Parse as regular decimal (handles scientific notation automatically)
value, err = strconv.ParseFloat(literal, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse '%s' as number", literal))
return nil
}
}
lit.Value = value lit.Value = value
return lit return lit
@ -153,10 +243,21 @@ func (p *Parser) parseStringLiteral() Expression {
return &StringLiteral{Value: p.curToken.Literal} return &StringLiteral{Value: p.curToken.Literal}
} }
func (p *Parser) parseBooleanLiteral() Expression {
return &BooleanLiteral{Value: p.curTokenIs(TRUE)}
}
func (p *Parser) parseNilLiteral() Expression {
return &NilLiteral{}
}
func (p *Parser) parseGroupedExpression() Expression { func (p *Parser) parseGroupedExpression() Expression {
p.nextToken() p.nextToken()
exp := p.parseExpression(LOWEST) exp := p.parseExpression(LOWEST)
if exp == nil {
return nil
}
if !p.expectPeek(RPAREN) { if !p.expectPeek(RPAREN) {
return nil return nil
@ -165,6 +266,80 @@ func (p *Parser) parseGroupedExpression() Expression {
return exp return exp
} }
func (p *Parser) parseTableLiteral() Expression {
table := &TableLiteral{}
table.Pairs = []TablePair{}
if p.peekTokenIs(RBRACE) {
p.nextToken()
return table
}
p.nextToken()
for {
// Check for EOF
if p.curTokenIs(EOF) {
p.addError("unexpected end of input, expected }")
return nil
}
pair := TablePair{}
// Check if this is a key=value pair (identifier or string key)
if (p.curTokenIs(IDENT) || p.curTokenIs(STRING)) && p.peekTokenIs(ASSIGN) {
if p.curTokenIs(IDENT) {
pair.Key = &Identifier{Value: p.curToken.Literal}
} else {
pair.Key = &StringLiteral{Value: p.curToken.Literal}
}
p.nextToken() // move to =
p.nextToken() // move past =
// Check for EOF after =
if p.curTokenIs(EOF) {
p.addError("expected expression after assignment operator")
return nil
}
pair.Value = p.parseExpression(LOWEST)
} else {
// Array-style element
pair.Value = p.parseExpression(LOWEST)
}
if pair.Value == nil {
return nil
}
table.Pairs = append(table.Pairs, pair)
if !p.peekTokenIs(COMMA) {
break
}
p.nextToken() // consume comma
p.nextToken() // move to next element
// Allow trailing comma
if p.curTokenIs(RBRACE) {
break
}
// Check for EOF after comma
if p.curTokenIs(EOF) {
p.addError("expected next token to be }")
return nil
}
}
if !p.expectPeek(RBRACE) {
return nil
}
return table
}
func (p *Parser) parseInfixExpression(left Expression) Expression { func (p *Parser) parseInfixExpression(left Expression) Expression {
expression := &InfixExpression{ expression := &InfixExpression{
Left: left, Left: left,
@ -175,6 +350,11 @@ func (p *Parser) parseInfixExpression(left Expression) Expression {
p.nextToken() p.nextToken()
expression.Right = p.parseExpression(precedence) expression.Right = p.parseExpression(precedence)
if expression.Right == nil {
p.addError(fmt.Sprintf("expected expression after operator '%s'", expression.Operator))
return nil
}
return expression return expression
} }
@ -191,21 +371,50 @@ func (p *Parser) expectPeek(t TokenType) bool {
if p.peekTokenIs(t) { if p.peekTokenIs(t) {
p.nextToken() p.nextToken()
return true return true
} else { }
p.peekError(t) p.peekError(t)
return false return false
} }
// Error handling methods
func (p *Parser) addError(message string) {
p.errors = append(p.errors, ParseError{
Message: message,
Line: p.curToken.Line,
Column: p.curToken.Column,
Token: p.curToken,
})
} }
func (p *Parser) peekError(t TokenType) { func (p *Parser) peekError(t TokenType) {
msg := fmt.Sprintf("expected next token to be %v, got %v instead", message := fmt.Sprintf("expected next token to be %s, got %s instead",
t, p.peekToken.Type) tokenTypeString(t), tokenTypeString(p.peekToken.Type))
p.errors = append(p.errors, msg) p.errors = append(p.errors, ParseError{
Message: message,
Line: p.peekToken.Line,
Column: p.peekToken.Column,
Token: p.peekToken,
})
} }
func (p *Parser) noPrefixParseFnError(t TokenType) { func (p *Parser) noPrefixParseFnError(t TokenType) {
msg := fmt.Sprintf("no prefix parse function for %v found", t) var message string
p.errors = append(p.errors, msg) switch t {
case ASSIGN:
message = "unexpected assignment operator, missing left-hand side identifier"
case PLUS, MINUS, STAR, SLASH:
message = fmt.Sprintf("unexpected operator '%s', missing left operand", tokenTypeString(t))
case RPAREN:
message = "unexpected closing parenthesis"
case RBRACE:
message = "unexpected closing brace"
case EOF:
message = "unexpected end of input"
default:
message = fmt.Sprintf("unexpected token '%s'", tokenTypeString(t))
}
p.addError(message)
} }
func (p *Parser) peekPrecedence() Precedence { func (p *Parser) peekPrecedence() Precedence {
@ -223,6 +432,64 @@ func (p *Parser) curPrecedence() Precedence {
} }
// Errors returns all parsing errors // Errors returns all parsing errors
func (p *Parser) Errors() []string { func (p *Parser) Errors() []ParseError {
return p.errors return p.errors
} }
// HasErrors returns true if there are any parsing errors
func (p *Parser) HasErrors() bool {
return len(p.errors) > 0
}
// ErrorStrings returns error messages as strings for backward compatibility
func (p *Parser) ErrorStrings() []string {
result := make([]string, len(p.errors))
for i, err := range p.errors {
result[i] = err.Error()
}
return result
}
// tokenTypeString returns a human-readable string for token types
func tokenTypeString(t TokenType) string {
switch t {
case IDENT:
return "identifier"
case NUMBER:
return "number"
case STRING:
return "string"
case TRUE, FALSE:
return "boolean"
case NIL:
return "nil"
case ASSIGN:
return "="
case PLUS:
return "+"
case MINUS:
return "-"
case STAR:
return "*"
case SLASH:
return "/"
case LPAREN:
return "("
case RPAREN:
return ")"
case LBRACE:
return "{"
case RBRACE:
return "}"
case COMMA:
return ","
case VAR:
return "var"
case EOF:
return "end of file"
case ILLEGAL:
return "illegal token"
default:
return "unknown"
}
}

1024
parser/parser_test.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,9 @@ const (
IDENT TokenType = iota IDENT TokenType = iota
NUMBER NUMBER
STRING STRING
TRUE
FALSE
NIL
// Operators // Operators
ASSIGN // = ASSIGN // =
@ -19,6 +22,9 @@ const (
// Delimiters // Delimiters
LPAREN // ( LPAREN // (
RPAREN // ) RPAREN // )
LBRACE // {
RBRACE // }
COMMA // ,
// Keywords // Keywords
VAR VAR
@ -60,6 +66,9 @@ var precedences = map[TokenType]Precedence{
func lookupIdent(ident string) TokenType { func lookupIdent(ident string) TokenType {
keywords := map[string]TokenType{ keywords := map[string]TokenType{
"var": VAR, "var": VAR,
"true": TRUE,
"false": FALSE,
"nil": NIL,
} }
if tok, ok := keywords[ident]; ok { if tok, ok := keywords[ident]; ok {