From a531dedc5cc0abd34f076d74f1b03a3e3df16d72 Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Sun, 2 Mar 2025 05:58:12 -0600 Subject: [PATCH] ref 3 --- config.go | 106 +++++++- parser.go | 320 +++++++++++++++------- scanner.go | 761 ++++++++++++++++++++++++++++++----------------------- 3 files changed, 756 insertions(+), 431 deletions(-) diff --git a/config.go b/config.go index 629aa86..e8b7667 100644 --- a/config.go +++ b/config.go @@ -189,6 +189,108 @@ func (c *Config) GetMap(key string) (map[string]any, error) { // Load parses a config from a reader func Load(r io.Reader) (*Config, error) { - parser := NewParser(r) - return parser.Parse() + scanner := NewScanner(r) + config := NewConfig() + + for { + err := scanner.SkipWhitespace() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + b, err := scanner.PeekByte() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + // Handle comments + if b == '-' { + peekBytes, err := scanner.PeekBytes(2) + if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { + err = scanner.scanComment() + if err != nil { + return nil, err + } + continue + } + } + + // Process key-value pair + if isLetter(b) { + // Read name + nameToken, err := scanner.scanName(scanner.line, scanner.col) + if err != nil { + return nil, err + } + name := string(nameToken.Value) + + // Skip whitespace + err = scanner.SkipWhitespace() + if err != nil { + return nil, err + } + + // Must be followed by = or { + b, err = scanner.PeekByte() + if err != nil { + return nil, err + } + + if b != '=' && b != '{' { + return nil, scanner.Error("expected '=' or '{' after name") + } + + var value any + if b == '=' { + _, _ = scanner.ReadByte() // consume = + err = scanner.SkipWhitespace() + if err != nil { + return nil, err + } + + value, err = scanner.ScanValue() + if err != nil { + return nil, err + } + } else { // b == '{' + _, _ = scanner.ReadByte() // consume { + value, err = scanner.scanObjectOrArray() + if err != nil { + return nil, err + } + } + + // Store in config + config.data[name] = value + } else { + return nil, scanner.Error("expected name at top level") + } + } + + return config, nil +} + +// Helpers + +func isLetter(b byte) bool { + return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') +} + +func isDigit(b byte) bool { + return b >= '0' && b <= '9' +} + +func hasDot(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] == '.' { + return true + } + } + return false } diff --git a/parser.go b/parser.go index 356b01d..843953c 100644 --- a/parser.go +++ b/parser.go @@ -3,6 +3,7 @@ package config import ( "fmt" "io" + "strconv" ) // Parser parses configuration files @@ -11,6 +12,7 @@ type Parser struct { config *Config currentObject map[string]any stack []map[string]any + currentToken Token } // NewParser creates a new parser with a reader and empty config @@ -24,9 +26,10 @@ func NewParser(r io.Reader) *Parser { } } -// Error creates an error with line information from the scanner +// Error creates an error with line information from the current token func (p *Parser) Error(msg string) error { - return fmt.Errorf("line %d: %s", p.scanner.line, msg) + return fmt.Errorf("line %d, column %d: %s", + p.currentToken.Line, p.currentToken.Column, msg) } // Parse parses the config file and returns a Config @@ -53,119 +56,244 @@ func (p *Parser) popObject() { } } +// nextToken gets the next meaningful token (skipping comments) +func (p *Parser) nextToken() (Token, error) { + for { + token, err := p.scanner.NextToken() + if err != nil { + return token, err + } + + // Skip comment tokens + if token.Type != TokenComment { + p.currentToken = token + return token, nil + } + } +} + // parseContent is the main parsing function func (p *Parser) parseContent() error { - skipErr := p.scanner.SkipWhitespace() - for ; skipErr == nil; skipErr = p.scanner.SkipWhitespace() { - r, peekErr := p.scanner.PeekRune() - if peekErr == io.EOF { + for { + token, err := p.nextToken() + if err != nil { + return err + } + + // Check for end of file + if token.Type == TokenEOF { break } - if peekErr != nil { - return peekErr + + // We expect top level entries to be names + if token.Type != TokenName { + return p.Error("expected name at top level") } - // Handle comments - if r == '-' { - if err := p.scanner.ScanComment(); err != nil { - return err - } - continue + // Get the property name + name := string(token.Value) + + // Get the next token (should be = or {) + token, err = p.nextToken() + if err != nil { + return err } - // Handle name=value pairs or named objects - if isLetter(r) { - name, err := p.scanner.ScanName() + var value any + + if token.Type == TokenEquals { + // It's a standard key=value assignment + value, err = p.parseValue() if err != nil { return err } - - if err = p.scanner.SkipWhitespace(); err != nil { + } else if token.Type == TokenOpenBrace { + // It's a map/array without '=' + value, err = p.parseObject() + if err != nil { return err } - - r, err = p.scanner.PeekRune() - if err != nil && err != io.EOF { - return err - } - - // Assignment or direct map/array - if r == '=' { - // It's a standard key=value pair - p.scanner.ReadRune() // consume '=' - - if err = p.scanner.SkipWhitespace(); err != nil { - return err - } - - value, err := p.scanner.ScanValue() - if err != nil { - return err - } - - // Store the value directly - if mapValue, ok := value.(map[string]any); ok { - // Add an entry in current object - newMap := make(map[string]any, 8) // Pre-allocate with capacity - p.currentObject[name] = newMap - - // Process the map contents - p.pushObject(newMap) - - // Copy values from scanned map to our object - for k, v := range mapValue { - p.currentObject[k] = v - } - - p.popObject() - } else { - // Direct storage for primitives and arrays - p.currentObject[name] = value - } - } else if r == '{' { - // It's a map/array without '=' - value, err := p.scanner.ScanValue() - if err != nil { - return err - } - - // Store the complex value directly - if mapValue, ok := value.(map[string]any); ok { - // Add an entry in current object - newMap := make(map[string]any, 8) // Pre-allocate with capacity - p.currentObject[name] = newMap - - // Process the map contents - p.pushObject(newMap) - - // Copy values from scanned map to our object - for k, v := range mapValue { - p.currentObject[k] = v - } - - p.popObject() - } else { - // Direct storage for arrays - p.currentObject[name] = value - } - } else { - return p.Error("expected '=' or '{' after name") - } - - continue + } else { + return p.Error("expected '=' or '{' after name") } - return p.Error("unexpected character") - } + // Store the value in the config + if mapValue, ok := value.(map[string]any); ok { + // Add an entry in current object + newMap := make(map[string]any, 8) // Pre-allocate with capacity + p.currentObject[name] = newMap - if skipErr != nil && skipErr != io.EOF { - return skipErr + // Process the map contents + p.pushObject(newMap) + + // Copy values from scanned map to our object + for k, v := range mapValue { + p.currentObject[k] = v + } + + p.popObject() + } else { + // Direct storage for primitives and arrays + p.currentObject[name] = value + } } return nil } -// Helper function -func isLetter(r rune) bool { - return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') +// parseValue parses a value after an equals sign +func (p *Parser) parseValue() (any, error) { + token, err := p.nextToken() + if err != nil { + return nil, err + } + + switch token.Type { + case TokenString: + return string(token.Value), nil + + case TokenNumber: + strValue := string(token.Value) + // Check if it's a float or int + for i := 0; i < len(strValue); i++ { + if strValue[i] == '.' { + // It's a float + val, err := strconv.ParseFloat(strValue, 64) + if err != nil { + return nil, p.Error(fmt.Sprintf("invalid float: %s", strValue)) + } + return val, nil + } + } + // It's an integer + val, err := strconv.ParseInt(strValue, 10, 64) + if err != nil { + return nil, p.Error(fmt.Sprintf("invalid integer: %s", strValue)) + } + return val, nil + + case TokenBoolean: + return string(token.Value) == "true", nil + + case TokenOpenBrace: + // It's a map or array + return p.parseObject() + + case TokenName: + // Treat as a string value + return string(token.Value), nil + + default: + return nil, p.Error(fmt.Sprintf("unexpected token: %v", token.Type)) + } +} + +// parseObject parses a map or array +func (p *Parser) parseObject() (any, error) { + contents := make(map[string]any) + var arrayElements []any + isArray := true + + for { + token, err := p.nextToken() + if err != nil { + return nil, err + } + + // Check for end of object + if token.Type == TokenCloseBrace { + if isArray && len(contents) == 0 { + return arrayElements, nil + } + return contents, nil + } + + // If we find a name, it could be a map entry or array element + if token.Type == TokenName { + // Get the name value + name := string(token.Value) + + // Get the next token to determine if it's a map entry or array element + nextToken, err := p.nextToken() + if err != nil { + return nil, err + } + + if nextToken.Type == TokenEquals { + // It's a key-value pair + value, err := p.parseValue() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = value + } else if nextToken.Type == TokenOpenBrace { + // It's a nested object + objValue, err := p.parseObject() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = objValue + } else { + // Put the token back and treat the name as an array element + p.scanner.UnreadToken(nextToken) + + // Try to convert to appropriate type + var value any = name + + // Do some type inference for common values + if name == "true" { + value = true + } else if name == "false" { + value = false + } else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) { + // Try to parse as number + if hasDot(name) { + if f, err := strconv.ParseFloat(name, 64); err == nil { + value = f + } + } else { + if i, err := strconv.ParseInt(name, 10, 64); err == nil { + value = i + } + } + } + + arrayElements = append(arrayElements, value) + } + } else if token.Type == TokenString || token.Type == TokenNumber || token.Type == TokenBoolean { + // Direct array element + var value any + + switch token.Type { + case TokenString: + value = string(token.Value) + case TokenNumber: + strVal := string(token.Value) + if hasDot(strVal) { + f, _ := strconv.ParseFloat(strVal, 64) + value = f + } else { + i, _ := strconv.ParseInt(strVal, 10, 64) + value = i + } + case TokenBoolean: + value = string(token.Value) == "true" + } + + arrayElements = append(arrayElements, value) + } else if token.Type == TokenOpenBrace { + // Nested object in array + nestedObj, err := p.parseObject() + if err != nil { + return nil, err + } + arrayElements = append(arrayElements, nestedObj) + } else { + return nil, p.Error(fmt.Sprintf("unexpected token in object: %v", token.Type)) + } + } } diff --git a/scanner.go b/scanner.go index c559ba1..6b4d63a 100644 --- a/scanner.go +++ b/scanner.go @@ -2,19 +2,44 @@ package config import ( "bufio" + "bytes" "errors" "fmt" "io" "strconv" - "unicode" ) +// TokenType represents the type of token +type TokenType int + +const ( + TokenError TokenType = iota + TokenEOF + TokenName + TokenString + TokenNumber + TokenBoolean + TokenEquals + TokenOpenBrace + TokenCloseBrace + TokenComment +) + +// Token represents a lexical token +type Token struct { + Type TokenType + Value []byte + Line int + Column int +} + // Scanner handles the low-level parsing of the configuration format type Scanner struct { reader *bufio.Reader line int // Current line number col int // Current column position - buffer []rune + buffer []byte + token Token // Current token } // NewScanner creates a new scanner with the given reader @@ -23,32 +48,45 @@ func NewScanner(r io.Reader) *Scanner { reader: bufio.NewReader(r), line: 1, // Start at line 1 col: 0, - buffer: make([]rune, 0, 64), + buffer: make([]byte, 0, 128), // Pre-allocate with reasonable capacity } } -// ReadRune reads a single rune from the input -func (s *Scanner) ReadRune() (rune, int, error) { - r, i, err := s.reader.ReadRune() +// ReadByte reads a single byte from the input +func (s *Scanner) ReadByte() (byte, error) { + b, err := s.reader.ReadByte() if err == nil { - if r == '\n' { + if b == '\n' { s.line++ s.col = 0 } else { s.col++ } } - return r, i, err + return b, err } -// PeekRune looks at the next rune without consuming it -func (s *Scanner) PeekRune() (rune, error) { - r, _, err := s.reader.ReadRune() +// PeekByte looks at the next byte without consuming it +func (s *Scanner) PeekByte() (byte, error) { + b, err := s.reader.Peek(1) if err != nil { return 0, err } - s.reader.UnreadRune() - return r, nil + return b[0], nil +} + +// PeekBytes looks at the next n bytes without consuming them +func (s *Scanner) PeekBytes(n int) ([]byte, error) { + return s.reader.Peek(n) +} + +// UnreadByte pushes back a byte to the reader +func (s *Scanner) UnreadByte() error { + err := s.reader.UnreadByte() + if err == nil && s.col > 0 { + s.col-- + } + return err } // Error creates an error with line and column information @@ -59,81 +97,149 @@ func (s *Scanner) Error(msg string) error { // SkipWhitespace skips whitespace characters func (s *Scanner) SkipWhitespace() error { for { - r, err := s.PeekRune() + b, err := s.PeekByte() if err == io.EOF { return nil } if err != nil { return err } - if !unicode.IsSpace(r) { + + // Fast check for common whitespace bytes + if b != ' ' && b != '\t' && b != '\n' && b != '\r' { return nil } - _, _, err = s.ReadRune() + + _, err = s.ReadByte() if err != nil { return err } } } -// peekAndCheckRune checks if the next rune matches expected without consuming it -func (s *Scanner) peekAndCheckRune(expected rune) (bool, error) { - r, err := s.PeekRune() +// NextToken scans and returns the next token +func (s *Scanner) NextToken() (Token, error) { + if s.token.Type != TokenError { + // We have a stored token + token := s.token + s.token = Token{Type: TokenError} // Reset + return token, nil + } + + // No stored token, scan a new one + // Skip whitespace + err := s.SkipWhitespace() + if err == io.EOF { + return Token{Type: TokenEOF}, nil + } if err != nil { - return false, err - } - return r == expected, nil -} - -// consumeIfMatch consumes the next rune if it matches expected -func (s *Scanner) consumeIfMatch(expected rune) (bool, error) { - matches, err := s.peekAndCheckRune(expected) - if err != nil || !matches { - return false, err + return Token{Type: TokenError, Value: []byte(err.Error())}, err } - _, _, err = s.ReadRune() // consume the rune - return err == nil, err + b, err := s.PeekByte() + if err != nil { + if err == io.EOF { + return Token{Type: TokenEOF}, nil + } + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + + // Record start position for error reporting + startLine, startColumn := s.line, s.col + + // Process based on first character + switch { + case b == '=': + _, _ = s.ReadByte() // consume equals + return Token{Type: TokenEquals, Line: startLine, Column: startColumn}, nil + + case b == '{': + _, _ = s.ReadByte() // consume open brace + return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil + + case b == '}': + _, _ = s.ReadByte() // consume close brace + return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil + + case b == '-': + // Could be a comment or a negative number + peekBytes, err := s.PeekBytes(2) + if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { + err = s.scanComment() + if err != nil { + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil + } + + // Check if it's a negative number + if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) { + return s.scanNumber(startLine, startColumn) + } + + // Just a single dash + _, _ = s.ReadByte() // consume dash + return Token{Type: TokenError, Value: []byte("unexpected '-'")}, + fmt.Errorf("unexpected '-' at line %d, column %d", startLine, startColumn) + + case b == '"': + return s.scanString(startLine, startColumn) + + case isLetter(b): + return s.scanName(startLine, startColumn) + + case isDigit(b): + return s.scanNumber(startLine, startColumn) + + default: + _, _ = s.ReadByte() // consume the unexpected character + err := fmt.Errorf("unexpected character: %c", b) + return Token{Type: TokenError, Value: []byte(err.Error()), Line: startLine, Column: startColumn}, err + } } -// ScanComment processes a comment -func (s *Scanner) ScanComment() error { +func (s *Scanner) UnreadToken(token Token) { + s.token = token // Store the token to be returned next +} + +// scanComment processes a comment +func (s *Scanner) scanComment() error { // Consume the first dash - _, _, err := s.ReadRune() + _, err := s.ReadByte() if err != nil { return err } // Check for second dash - r, _, err := s.ReadRune() + b, err := s.ReadByte() if err != nil { return err } - if r != '-' { + if b != '-' { return s.Error("invalid comment") } // Check for block comment [[ - r, err = s.PeekRune() - if err == nil && r == '[' { - _, _, _ = s.ReadRune() // consume first [ - r, err = s.PeekRune() - if err == nil && r == '[' { - _, _, _ = s.ReadRune() // consume second [ + b, err = s.PeekByte() + if err == nil && b == '[' { + _, _ = s.ReadByte() // consume first [ + b, err = s.PeekByte() + if err == nil && b == '[' { + _, _ = s.ReadByte() // consume second [ return s.scanBlockComment() } } - // Line comment + // Line comment - consume until newline or EOF for { - r, _, err := s.ReadRune() + b, err := s.ReadByte() if err == io.EOF { return nil } if err != nil { return err } - if r == '\n' { + if b == '\n' { return nil } } @@ -142,276 +248,47 @@ func (s *Scanner) ScanComment() error { // scanBlockComment processes a block comment func (s *Scanner) scanBlockComment() error { for { - r, _, err := s.ReadRune() + b, err := s.ReadByte() if err != nil { return s.Error("unclosed block comment") } - if r == ']' { - r, err = s.PeekRune() - if err == nil && r == ']' { - _, _, _ = s.ReadRune() // consume second ] + if b == ']' { + b, err = s.PeekByte() + if err == nil && b == ']' { + _, _ = s.ReadByte() // consume second ] return nil } } } } -// ScanName reads a name identifier -func (s *Scanner) ScanName() (string, error) { - s.buffer = s.buffer[:0] // Reset buffer - - // Read first character - r, _, err := s.ReadRune() - if err != nil { - return "", err - } - - if !unicode.IsLetter(r) { - return "", s.Error("name must start with letter") - } - s.buffer = append(s.buffer, r) - - // Read rest of name - for { - r, err := s.PeekRune() - if err == io.EOF { - break - } - if err != nil { - return "", err - } - if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { - break - } - s.buffer = append(s.buffer, r) - _, _, _ = s.ReadRune() - } - - return string(s.buffer), nil -} - -// processArrayElement processes a single array element -func (s *Scanner) processArrayElement() (any, error) { - r, err := s.PeekRune() - if err != nil { - return nil, err - } - - // Handle identifier-like elements - if unicode.IsLetter(r) { - name, err := s.ScanName() - if err != nil { - return nil, err - } - - // Try to convert to appropriate type - convertedValue, err := s.ConvertValue(name) - if err == nil { - return convertedValue, nil - } - return name, nil - } - - // Handle other element types (strings, numbers, etc.) - return s.ScanValue() -} - -// processMapEntry processes a key-value pair in a map -func (s *Scanner) processMapEntry() (string, any, bool, error) { - name, err := s.ScanName() - if err != nil { - return "", nil, false, err - } - - err = s.SkipWhitespace() - if err != nil { - return "", nil, false, err - } - - // Check for equals sign - isEquals, err := s.consumeIfMatch('=') - if err != nil && err != io.EOF { - return "", nil, false, err - } - - if isEquals { - value, err := s.ScanValue() - if err != nil { - return "", nil, false, err - } - return name, value, true, nil // true indicates this is a map entry - } - - // Check for opening brace (nested map/array) - isBrace, err := s.peekAndCheckRune('{') - if err != nil && err != io.EOF { - return "", nil, false, err - } - - if isBrace { - value, err := s.ScanValue() - if err != nil { - return "", nil, false, err - } - return name, value, true, nil // true indicates this is a map entry - } - - // If neither equals nor brace, it's an array element (name as string) - return name, name, false, nil // false indicates this is not a map entry -} - -// ScanValue processes and returns a value from the config -func (s *Scanner) ScanValue() (any, error) { - err := s.SkipWhitespace() - if err != nil { - return nil, err - } - - r, err := s.PeekRune() - if err != nil { - return nil, err - } - - // Check if it's an array/map - if r == '{' { - return s.ScanArrayOrMap() - } - - // Check if it's a quoted string - if r == '"' { - return s.ScanString() - } - - // Otherwise, treat it as a simple value - var value []rune - for { - r, err := s.PeekRune() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - if unicode.IsSpace(r) || r == '}' { - break - } - value = append(value, r) - _, _, _ = s.ReadRune() - } - - if len(value) == 0 { - return nil, s.Error("empty value") - } - - // Convert value to appropriate type - return s.ConvertValue(string(value)) -} - -// ScanArrayOrMap processes arrays and maps -func (s *Scanner) ScanArrayOrMap() (any, error) { - // Consume opening brace - _, _, err := s.ReadRune() - if err != nil { - return nil, err - } - - // Initialize a new map to store contents - contents := make(map[string]any) - // And a slice to track array elements - var arrayElements []any - isArray := true - - for { - err := s.SkipWhitespace() - if err != nil { - return nil, err - } - - r, err := s.PeekRune() - if err == io.EOF { - return nil, s.Error("unclosed array/map") - } - if err != nil { - return nil, err - } - - // Check for closing brace - if r == '}' { - _, _, _ = s.ReadRune() // consume the closing brace - break - } - - // Handle comments inside arrays/maps - if r == '-' { - err = s.ScanComment() - if err != nil { - return nil, err - } - continue - } - - // If we find a named property, it might be a map entry - if unicode.IsLetter(r) { - name, value, isMapEntry, err := s.processMapEntry() - if err != nil { - return nil, err - } - - if isMapEntry { - // It's a key-value pair for a map - isArray = false - contents[name] = value - } else { - // It's an array element - arrayElements = append(arrayElements, value) - } - continue - } - - // Handle array elements that start with quotes, numbers, etc. - value, err := s.processArrayElement() - if err != nil { - return nil, err - } - arrayElements = append(arrayElements, value) - } - - // Check for array/map distinction and return appropriate result - if isArray && len(contents) == 0 { - return arrayElements, nil - } - return contents, nil -} - -// ScanString reads a quoted string -func (s *Scanner) ScanString() (any, error) { - // Consume opening quote - _, _, err := s.ReadRune() - if err != nil { - return nil, err - } - - // Reset buffer while preserving capacity +// scanString scans a quoted string +func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { + // Reset buffer s.buffer = s.buffer[:0] - // Avoid strings.Builder as it creates a new array internally - // and instead use our rune buffer directly + // Consume opening quote + _, err := s.ReadByte() + if err != nil { + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + for { - r, _, err := s.ReadRune() + b, err := s.ReadByte() if err != nil { - return nil, s.Error("unterminated string") + return Token{Type: TokenError, Value: []byte("unterminated string")}, errors.New("unterminated string") } - if r == '"' { + if b == '"' { break } // Handle escape sequences - if r == '\\' { - escaped, _, err := s.ReadRune() + if b == '\\' { + escaped, err := s.ReadByte() if err != nil { - return nil, err + return Token{Type: TokenError, Value: []byte("unterminated escape sequence")}, errors.New("unterminated escape sequence") } switch escaped { case '"': @@ -423,63 +300,281 @@ func (s *Scanner) ScanString() (any, error) { case 't': s.buffer = append(s.buffer, '\t') default: - s.buffer = append(s.buffer, '\\', escaped) + s.buffer = append(s.buffer, '\\') + s.buffer = append(s.buffer, escaped) } } else { - s.buffer = append(s.buffer, r) + s.buffer = append(s.buffer, b) } } - // Convert rune slice to string once at the end - return string(s.buffer), nil + return Token{ + Type: TokenString, + Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Line: startLine, + Column: startColumn, + }, nil } -// ConvertValue converts string values to their appropriate types -func (s *Scanner) ConvertValue(value string) (any, error) { - // Fast path for booleans - if value == "true" { - return true, nil - } - if value == "false" { - return false, nil +// scanName scans an identifier +func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { + // Reset buffer + s.buffer = s.buffer[:0] + + // Read first character + b, err := s.ReadByte() + if err != nil { + return Token{Type: TokenError, Value: []byte(err.Error())}, err } - // Early exit for empty values - if len(value) == 0 { - return nil, errors.New("empty value") + if !isLetter(b) { + return Token{Type: TokenError, Value: []byte("name must start with letter")}, s.Error("name must start with letter") } + s.buffer = append(s.buffer, b) - // Check for number type in one pass - isNegative := value[0] == '-' - startIdx := 0 - if isNegative { - if len(value) == 1 { - return nil, errors.New("invalid value: -") + // Read rest of name + for { + b, err := s.PeekByte() + if err == io.EOF { + break } - startIdx = 1 + if err != nil { + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + if !isLetter(b) && !isDigit(b) && b != '_' { + break + } + s.buffer = append(s.buffer, b) + _, _ = s.ReadByte() } + // Check if it's a boolean + if bytes.Equal(s.buffer, []byte("true")) || bytes.Equal(s.buffer, []byte("false")) { + return Token{ + Type: TokenBoolean, + Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Line: startLine, + Column: startColumn, + }, nil + } + + return Token{ + Type: TokenName, + Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Line: startLine, + Column: startColumn, + }, nil +} + +// scanNumber scans a numeric value +func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { + // Reset buffer + s.buffer = s.buffer[:0] + + // Read first character (might be a minus sign or digit) + b, err := s.ReadByte() + if err != nil { + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + s.buffer = append(s.buffer, b) + + // Scan the rest of the number hasDot := false - for i := startIdx; i < len(value); i++ { - if value[i] == '.' { - if hasDot { - return nil, errors.New("invalid number format") + for { + b, err := s.PeekByte() + if err != nil { + if err == io.EOF { + break } + return Token{Type: TokenError, Value: []byte(err.Error())}, err + } + + if b == '.' && !hasDot { hasDot = true - } else if value[i] < '0' || value[i] > '9' { - return nil, errors.New("invalid value format: " + value) + _, _ = s.ReadByte() + s.buffer = append(s.buffer, b) + } else if isDigit(b) { + _, _ = s.ReadByte() + s.buffer = append(s.buffer, b) + } else { + break } } - // Process as integer or float based on presence of decimal - if !hasDot { - return strconv.ParseInt(value, 10, 64) - } - - // Float (ensure not ending with dot) - if value[len(value)-1] != '.' { - return strconv.ParseFloat(value, 64) - } - - return nil, errors.New("invalid value format: " + value) + return Token{ + Type: TokenNumber, + Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Line: startLine, + Column: startColumn, + }, nil +} + +// ScanValue processes a value and returns its Go representation +func (s *Scanner) ScanValue() (any, error) { + token, err := s.NextToken() + if err != nil { + return nil, err + } + + switch token.Type { + case TokenString: + return string(token.Value), nil + + case TokenBoolean: + if bytes.Equal(token.Value, []byte("true")) { + return true, nil + } + return false, nil + + case TokenNumber: + // Convert to number + value := string(token.Value) + if bytes.Contains(token.Value, []byte(".")) { + // Float + return strconv.ParseFloat(value, 64) + } + // Integer + return strconv.ParseInt(value, 10, 64) + + case TokenOpenBrace: + // Object or array + return s.scanObjectOrArray() + + case TokenName: + // Name identifier - could be a special value or just a string + return string(token.Value), nil + + default: + return nil, fmt.Errorf("unexpected token type %v at line %d, column %d", token.Type, token.Line, token.Column) + } +} + +// scanObjectOrArray processes a map or array enclosed in braces +func (s *Scanner) scanObjectOrArray() (any, error) { + // Initialize collections + contents := make(map[string]any) + var arrayElements []any + isArray := true + + for { + err := s.SkipWhitespace() + if err != nil { + return nil, err + } + + b, err := s.PeekByte() + if err == io.EOF { + return nil, errors.New("unclosed object/array") + } + if err != nil { + return nil, err + } + + // Check for closing brace + if b == '}' { + _, _ = s.ReadByte() // consume closing brace + if isArray && len(contents) == 0 { + return arrayElements, nil + } + return contents, nil + } + + // Handle comments + if b == '-' { + peekBytes, err := s.PeekBytes(2) + if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { + err = s.scanComment() + if err != nil { + return nil, err + } + continue + } + } + + // Process key-value pair or array element + if isLetter(b) { + // Read name + nameToken, err := s.scanName(s.line, s.col) + if err != nil { + return nil, err + } + name := string(nameToken.Value) + + // Skip whitespace + err = s.SkipWhitespace() + if err != nil { + return nil, err + } + + // Check if it's followed by = or { + b, err = s.PeekByte() + if err != nil && err != io.EOF { + return nil, err + } + + if b == '=' { + // It's a key-value pair + _, _ = s.ReadByte() // consume = + err = s.SkipWhitespace() + if err != nil { + return nil, err + } + + value, err := s.ScanValue() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = value + } else if b == '{' { + // It's a nested object/array + _, _ = s.ReadByte() // consume { + value, err := s.scanObjectOrArray() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = value + } else { + // It's a simple name as an array element + // Try to convert to appropriate type first + var value any = name + // Try common conversions + if name == "true" { + value = true + } else if name == "false" { + value = false + } else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) { + // Looks like a number, try to convert + if hasDot(name) { + if f, err := strconv.ParseFloat(name, 64); err == nil { + value = f + } + } else { + if i, err := strconv.ParseInt(name, 10, 64); err == nil { + value = i + } + } + } + + arrayElements = append(arrayElements, value) + } + } else if b == '"' { + // String value - must be an array element + value, err := s.ScanValue() + if err != nil { + return nil, err + } + arrayElements = append(arrayElements, value) + } else { + // Other value type - must be an array element + value, err := s.ScanValue() + if err != nil { + return nil, err + } + arrayElements = append(arrayElements, value) + } + } }