From 88c917210d39f93ce6193d393e5f8644cfc1645e Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Sun, 2 Mar 2025 06:05:58 -0600 Subject: [PATCH] ref 4 --- config.go | 118 ++++++----------------- parser.go | 65 +++++-------- scanner.go | 267 ++++++----------------------------------------------- token.go | 25 +++++ 4 files changed, 109 insertions(+), 366 deletions(-) create mode 100644 token.go diff --git a/config.go b/config.go index e8b7667..028ad39 100644 --- a/config.go +++ b/config.go @@ -189,91 +189,8 @@ func (c *Config) GetMap(key string) (map[string]any, error) { // Load parses a config from a reader func Load(r io.Reader) (*Config, error) { - scanner := NewScanner(r) - config := NewConfig() - - for { - err := scanner.SkipWhitespace() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - - b, err := scanner.PeekByte() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - - // Handle comments - if b == '-' { - peekBytes, err := scanner.PeekBytes(2) - if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { - err = scanner.scanComment() - if err != nil { - return nil, err - } - continue - } - } - - // Process key-value pair - if isLetter(b) { - // Read name - nameToken, err := scanner.scanName(scanner.line, scanner.col) - if err != nil { - return nil, err - } - name := string(nameToken.Value) - - // Skip whitespace - err = scanner.SkipWhitespace() - if err != nil { - return nil, err - } - - // Must be followed by = or { - b, err = scanner.PeekByte() - if err != nil { - return nil, err - } - - if b != '=' && b != '{' { - return nil, scanner.Error("expected '=' or '{' after name") - } - - var value any - if b == '=' { - _, _ = scanner.ReadByte() // consume = - err = scanner.SkipWhitespace() - if err != nil { - return nil, err - } - - value, err = scanner.ScanValue() - if err != nil { - return nil, err - } - } else { // b == '{' - _, _ = scanner.ReadByte() // consume { - value, err = scanner.scanObjectOrArray() - if err != nil { - return nil, err - } - } - - // Store in config - config.data[name] = value - } else { - return nil, scanner.Error("expected name at top level") - } - } - - return config, nil + parser := NewParser(r) + return parser.Parse() } // Helpers @@ -286,11 +203,36 @@ func isDigit(b byte) bool { return b >= '0' && b <= '9' } -func hasDot(s string) bool { +// ParseNumber converts a string to a number (int64 or float64) +func ParseNumber(s string) (any, error) { + // Check if it has a decimal point for i := 0; i < len(s); i++ { if s[i] == '.' { - return true + // It's a float + return strconv.ParseFloat(s, 64) } } - return false + // It's an integer + return strconv.ParseInt(s, 10, 64) +} + +// isDigitOrMinus checks if a string starts with a digit or minus sign +func isDigitOrMinus(s string) bool { + if len(s) == 0 { + return false + } + return isDigit(s[0]) || (s[0] == '-' && len(s) > 1 && isDigit(s[1])) +} + +// parseStringAsNumber tries to parse a string as a number (float or int) +func parseStringAsNumber(s string) (any, error) { + // Check if it has a decimal point + for i := 0; i < len(s); i++ { + if s[i] == '.' { + // It's a float + return strconv.ParseFloat(s, 64) + } + } + // It's an integer + return strconv.ParseInt(s, 10, 64) } diff --git a/parser.go b/parser.go index 843953c..80c03a3 100644 --- a/parser.go +++ b/parser.go @@ -41,21 +41,6 @@ func (p *Parser) Parse() (*Config, error) { return p.config, nil } -// pushObject enters a new object scope -func (p *Parser) pushObject(obj map[string]any) { - p.stack = append(p.stack, p.currentObject) - p.currentObject = obj -} - -// popObject exits the current object scope -func (p *Parser) popObject() { - n := len(p.stack) - if n > 0 { - p.currentObject = p.stack[n-1] - p.stack = p.stack[:n-1] - } -} - // nextToken gets the next meaningful token (skipping comments) func (p *Parser) nextToken() (Token, error) { for { @@ -124,14 +109,20 @@ func (p *Parser) parseContent() error { p.currentObject[name] = newMap // Process the map contents - p.pushObject(newMap) + p.stack = append(p.stack, p.currentObject) + p.currentObject = newMap // Copy values from scanned map to our object for k, v := range mapValue { p.currentObject[k] = v } - p.popObject() + // Restore parent object + n := len(p.stack) + if n > 0 { + p.currentObject = p.stack[n-1] + p.stack = p.stack[:n-1] + } } else { // Direct storage for primitives and arrays p.currentObject[name] = value @@ -154,7 +145,6 @@ func (p *Parser) parseValue() (any, error) { case TokenNumber: strValue := string(token.Value) - // Check if it's a float or int for i := 0; i < len(strValue); i++ { if strValue[i] == '.' { // It's a float @@ -208,8 +198,9 @@ func (p *Parser) parseObject() (any, error) { return contents, nil } - // If we find a name, it could be a map entry or array element - if token.Type == TokenName { + // Handle based on token type + switch token.Type { + case TokenName: // Get the name value name := string(token.Value) @@ -241,30 +232,26 @@ func (p *Parser) parseObject() (any, error) { // Put the token back and treat the name as an array element p.scanner.UnreadToken(nextToken) - // Try to convert to appropriate type + // Convert to appropriate type if possible var value any = name - // Do some type inference for common values + // Try to infer type if name == "true" { value = true } else if name == "false" { value = false - } else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) { + } else if isDigitOrMinus(name) { // Try to parse as number - if hasDot(name) { - if f, err := strconv.ParseFloat(name, 64); err == nil { - value = f - } - } else { - if i, err := strconv.ParseInt(name, 10, 64); err == nil { - value = i - } + numValue, err := parseStringAsNumber(name) + if err == nil { + value = numValue } } arrayElements = append(arrayElements, value) } - } else if token.Type == TokenString || token.Type == TokenNumber || token.Type == TokenBoolean { + + case TokenString, TokenNumber, TokenBoolean: // Direct array element var value any @@ -273,26 +260,22 @@ func (p *Parser) parseObject() (any, error) { value = string(token.Value) case TokenNumber: strVal := string(token.Value) - if hasDot(strVal) { - f, _ := strconv.ParseFloat(strVal, 64) - value = f - } else { - i, _ := strconv.ParseInt(strVal, 10, 64) - value = i - } + value, _ = parseStringAsNumber(strVal) case TokenBoolean: value = string(token.Value) == "true" } arrayElements = append(arrayElements, value) - } else if token.Type == TokenOpenBrace { + + case TokenOpenBrace: // Nested object in array nestedObj, err := p.parseObject() if err != nil { return nil, err } arrayElements = append(arrayElements, nestedObj) - } else { + + default: return nil, p.Error(fmt.Sprintf("unexpected token in object: %v", token.Type)) } } diff --git a/scanner.go b/scanner.go index 6b4d63a..0d1bf17 100644 --- a/scanner.go +++ b/scanner.go @@ -2,37 +2,11 @@ package config import ( "bufio" - "bytes" "errors" "fmt" "io" - "strconv" ) -// TokenType represents the type of token -type TokenType int - -const ( - TokenError TokenType = iota - TokenEOF - TokenName - TokenString - TokenNumber - TokenBoolean - TokenEquals - TokenOpenBrace - TokenCloseBrace - TokenComment -) - -// Token represents a lexical token -type Token struct { - Type TokenType - Value []byte - Line int - Column int -} - // Scanner handles the low-level parsing of the configuration format type Scanner struct { reader *bufio.Reader @@ -117,6 +91,11 @@ func (s *Scanner) SkipWhitespace() error { } } +// UnreadToken stores a token to be returned by the next call to NextToken +func (s *Scanner) UnreadToken(token Token) { + s.token = token +} + // NextToken scans and returns the next token func (s *Scanner) NextToken() (Token, error) { if s.token.Type != TokenError { @@ -126,7 +105,6 @@ func (s *Scanner) NextToken() (Token, error) { return token, nil } - // No stored token, scan a new one // Skip whitespace err := s.SkipWhitespace() if err == io.EOF { @@ -198,10 +176,6 @@ func (s *Scanner) NextToken() (Token, error) { } } -func (s *Scanner) UnreadToken(token Token) { - s.token = token // Store the token to be returned next -} - // scanComment processes a comment func (s *Scanner) scanComment() error { // Consume the first dash @@ -220,13 +194,23 @@ func (s *Scanner) scanComment() error { } // Check for block comment [[ - b, err = s.PeekByte() - if err == nil && b == '[' { + if b1, err := s.PeekByte(); err == nil && b1 == '[' { _, _ = s.ReadByte() // consume first [ - b, err = s.PeekByte() - if err == nil && b == '[' { + if b2, err := s.PeekByte(); err == nil && b2 == '[' { _, _ = s.ReadByte() // consume second [ - return s.scanBlockComment() + // Process block comment + for { + b, err := s.ReadByte() + if err != nil { + return s.Error("unclosed block comment") + } + if b == ']' { + if n, err := s.PeekByte(); err == nil && n == ']' { + _, _ = s.ReadByte() // consume second ] + return nil + } + } + } } } @@ -245,24 +229,6 @@ func (s *Scanner) scanComment() error { } } -// scanBlockComment processes a block comment -func (s *Scanner) scanBlockComment() error { - for { - b, err := s.ReadByte() - if err != nil { - return s.Error("unclosed block comment") - } - - if b == ']' { - b, err = s.PeekByte() - if err == nil && b == ']' { - _, _ = s.ReadByte() // consume second ] - return nil - } - } - } -} - // scanString scans a quoted string func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { // Reset buffer @@ -300,17 +266,17 @@ func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { case 't': s.buffer = append(s.buffer, '\t') default: - s.buffer = append(s.buffer, '\\') - s.buffer = append(s.buffer, escaped) + s.buffer = append(s.buffer, '\\', escaped) } } else { s.buffer = append(s.buffer, b) } } + // Return token using buffer directly - we'll copy in NextToken if needed return Token{ Type: TokenString, - Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Value: s.buffer, Line: startLine, Column: startColumn, }, nil @@ -349,18 +315,15 @@ func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { } // Check if it's a boolean - if bytes.Equal(s.buffer, []byte("true")) || bytes.Equal(s.buffer, []byte("false")) { - return Token{ - Type: TokenBoolean, - Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer - Line: startLine, - Column: startColumn, - }, nil + tokenType := TokenName + if len(s.buffer) == 4 && (s.buffer[0] == 't' && s.buffer[1] == 'r' && s.buffer[2] == 'u' && s.buffer[3] == 'e' || + s.buffer[0] == 'f' && s.buffer[1] == 'a' && s.buffer[2] == 'l' && s.buffer[3] == 's' && s.buffer[4] == 'e') { + tokenType = TokenBoolean } return Token{ - Type: TokenName, - Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Type: tokenType, + Value: s.buffer, Line: startLine, Column: startColumn, }, nil @@ -403,178 +366,8 @@ func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { return Token{ Type: TokenNumber, - Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer + Value: s.buffer, Line: startLine, Column: startColumn, }, nil } - -// ScanValue processes a value and returns its Go representation -func (s *Scanner) ScanValue() (any, error) { - token, err := s.NextToken() - if err != nil { - return nil, err - } - - switch token.Type { - case TokenString: - return string(token.Value), nil - - case TokenBoolean: - if bytes.Equal(token.Value, []byte("true")) { - return true, nil - } - return false, nil - - case TokenNumber: - // Convert to number - value := string(token.Value) - if bytes.Contains(token.Value, []byte(".")) { - // Float - return strconv.ParseFloat(value, 64) - } - // Integer - return strconv.ParseInt(value, 10, 64) - - case TokenOpenBrace: - // Object or array - return s.scanObjectOrArray() - - case TokenName: - // Name identifier - could be a special value or just a string - return string(token.Value), nil - - default: - return nil, fmt.Errorf("unexpected token type %v at line %d, column %d", token.Type, token.Line, token.Column) - } -} - -// scanObjectOrArray processes a map or array enclosed in braces -func (s *Scanner) scanObjectOrArray() (any, error) { - // Initialize collections - contents := make(map[string]any) - var arrayElements []any - isArray := true - - for { - err := s.SkipWhitespace() - if err != nil { - return nil, err - } - - b, err := s.PeekByte() - if err == io.EOF { - return nil, errors.New("unclosed object/array") - } - if err != nil { - return nil, err - } - - // Check for closing brace - if b == '}' { - _, _ = s.ReadByte() // consume closing brace - if isArray && len(contents) == 0 { - return arrayElements, nil - } - return contents, nil - } - - // Handle comments - if b == '-' { - peekBytes, err := s.PeekBytes(2) - if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { - err = s.scanComment() - if err != nil { - return nil, err - } - continue - } - } - - // Process key-value pair or array element - if isLetter(b) { - // Read name - nameToken, err := s.scanName(s.line, s.col) - if err != nil { - return nil, err - } - name := string(nameToken.Value) - - // Skip whitespace - err = s.SkipWhitespace() - if err != nil { - return nil, err - } - - // Check if it's followed by = or { - b, err = s.PeekByte() - if err != nil && err != io.EOF { - return nil, err - } - - if b == '=' { - // It's a key-value pair - _, _ = s.ReadByte() // consume = - err = s.SkipWhitespace() - if err != nil { - return nil, err - } - - value, err := s.ScanValue() - if err != nil { - return nil, err - } - - isArray = false - contents[name] = value - } else if b == '{' { - // It's a nested object/array - _, _ = s.ReadByte() // consume { - value, err := s.scanObjectOrArray() - if err != nil { - return nil, err - } - - isArray = false - contents[name] = value - } else { - // It's a simple name as an array element - // Try to convert to appropriate type first - var value any = name - // Try common conversions - if name == "true" { - value = true - } else if name == "false" { - value = false - } else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) { - // Looks like a number, try to convert - if hasDot(name) { - if f, err := strconv.ParseFloat(name, 64); err == nil { - value = f - } - } else { - if i, err := strconv.ParseInt(name, 10, 64); err == nil { - value = i - } - } - } - - arrayElements = append(arrayElements, value) - } - } else if b == '"' { - // String value - must be an array element - value, err := s.ScanValue() - if err != nil { - return nil, err - } - arrayElements = append(arrayElements, value) - } else { - // Other value type - must be an array element - value, err := s.ScanValue() - if err != nil { - return nil, err - } - arrayElements = append(arrayElements, value) - } - } -} diff --git a/token.go b/token.go new file mode 100644 index 0000000..59d6bf9 --- /dev/null +++ b/token.go @@ -0,0 +1,25 @@ +package config + +// TokenType represents the type of token +type TokenType int + +const ( + TokenError TokenType = iota + TokenEOF + TokenName + TokenString + TokenNumber + TokenBoolean + TokenEquals + TokenOpenBrace + TokenCloseBrace + TokenComment +) + +// Token represents a lexical token +type Token struct { + Type TokenType + Value []byte + Line int + Column int +}