From 8258a967a2896cc5478764751e08b456b62db698 Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Mon, 3 Mar 2025 07:12:15 -0600 Subject: [PATCH] ref 6 --- config.go | 335 ++++++++++++++++++++++++++++++++++++++++++++++++++++- parser.go | 282 -------------------------------------------- scanner.go | 51 +++++--- token.go | 2 +- 4 files changed, 367 insertions(+), 303 deletions(-) delete mode 100644 parser.go diff --git a/config.go b/config.go index 028ad39..ba8a19b 100644 --- a/config.go +++ b/config.go @@ -4,18 +4,26 @@ import ( "fmt" "io" "strconv" + "sync" ) -// Config holds a single hierarchical structure like JSON +// Config holds a single hierarchical structure like JSON and handles parsing type Config struct { - data map[string]any + data map[string]any + scanner *Scanner + currentObject map[string]any + stack []map[string]any + currentToken Token } // NewConfig creates a new empty config func NewConfig() *Config { - return &Config{ - data: make(map[string]any), + cfg := &Config{ + data: make(map[string]any, 16), // Pre-allocate with expected capacity + stack: make([]map[string]any, 0, 8), } + cfg.currentObject = cfg.data + return cfg } // Get retrieves a value from the config using dot notation @@ -187,10 +195,312 @@ func (c *Config) GetMap(key string) (map[string]any, error) { return nil, fmt.Errorf("value for key %s is not a map", key) } +// --- Parser Methods (integrated into Config) --- + +// Error creates an error with line information from the current token +func (c *Config) Error(msg string) error { + return fmt.Errorf("line %d, column %d: %s", + c.currentToken.Line, c.currentToken.Column, msg) +} + +// Parse parses the config from a reader +func (c *Config) Parse(r io.Reader) error { + c.scanner = NewScanner(r) + c.currentObject = c.data + err := c.parseContent() + + // Clean up scanner resources even on success + if c.scanner != nil { + ReleaseScanner(c.scanner) + c.scanner = nil + } + + return err +} + +// nextToken gets the next meaningful token (skipping comments) +func (c *Config) nextToken() (Token, error) { + for { + token, err := c.scanner.NextToken() + if err != nil { + return token, err + } + + // Skip comment tokens + if token.Type != TokenComment { + c.currentToken = token + return token, nil + } + } +} + +// parseContent is the main parsing function +func (c *Config) parseContent() error { + for { + token, err := c.nextToken() + if err != nil { + return err + } + + // Check for end of file + if token.Type == TokenEOF { + break + } + + // We expect top level entries to be names + if token.Type != TokenName { + return c.Error("expected name at top level") + } + + // Get the property name - copy to create a stable key + nameBytes := token.Value + name := string(nameBytes) + + // Get the next token (should be = or {) + token, err = c.nextToken() + if err != nil { + return err + } + + var value any + + if token.Type == TokenEquals { + // It's a standard key=value assignment + value, err = c.parseValue() + if err != nil { + return err + } + } else if token.Type == TokenOpenBrace { + // It's a map/array without '=' + value, err = c.parseObject() + if err != nil { + return err + } + } else { + return c.Error("expected '=' or '{' after name") + } + + // Store the value in the config + if mapValue, ok := value.(map[string]any); ok { + // Add an entry in current object + newMap := make(map[string]any, 8) // Pre-allocate with capacity + c.currentObject[name] = newMap + + // Process the map contents + c.stack = append(c.stack, c.currentObject) + c.currentObject = newMap + + // Copy values from scanned map to our object + for k, v := range mapValue { + c.currentObject[k] = v + } + + // Restore parent object + n := len(c.stack) + if n > 0 { + c.currentObject = c.stack[n-1] + c.stack = c.stack[:n-1] + } + } else { + // Direct storage for primitives and arrays + c.currentObject[name] = value + } + } + + return nil +} + +// valuePool to reuse maps and slices for common value types +var valuePool = sync.Pool{ + New: func() interface{} { + return make(map[string]any, 8) + }, +} + +// parseValue parses a value after an equals sign +func (c *Config) parseValue() (any, error) { + token, err := c.nextToken() + if err != nil { + return nil, err + } + + switch token.Type { + case TokenString: + // Copy the value for string stability + return string(token.Value), nil + + case TokenNumber: + strValue := string(token.Value) + for i := 0; i < len(strValue); i++ { + if strValue[i] == '.' { + // It's a float + val, err := strconv.ParseFloat(strValue, 64) + if err != nil { + return nil, c.Error(fmt.Sprintf("invalid float: %s", strValue)) + } + return val, nil + } + } + // It's an integer + val, err := strconv.ParseInt(strValue, 10, 64) + if err != nil { + return nil, c.Error(fmt.Sprintf("invalid integer: %s", strValue)) + } + return val, nil + + case TokenBoolean: + return bytesEqual(token.Value, []byte("true")), nil + + case TokenOpenBrace: + // It's a map or array + return c.parseObject() + + case TokenName: + // Treat as a string value - copy to create a stable string + return string(token.Value), nil + + default: + return nil, c.Error(fmt.Sprintf("unexpected token: %v", token.Type)) + } +} + +// parseObject parses a map or array +func (c *Config) parseObject() (any, error) { + // Get a map from the pool + contents := valuePool.Get().(map[string]any) + // Clear the map to reuse it + for k := range contents { + delete(contents, k) + } + + // Ensure map is returned to pool on function exit + defer func() { + // Only return to pool if we're using array (contents becomes unused) + // If we're returning contents directly, don't return to pool + if contents != nil { + valuePool.Put(contents) + } + }() + + // Use pre-allocated capacity for array elements to avoid reallocations + arrayElements := make([]any, 0, 8) + isArray := true + + for { + token, err := c.nextToken() + if err != nil { + return nil, err + } + + // Check for end of object + if token.Type == TokenCloseBrace { + if isArray && len(contents) == 0 { + // Using array, set contents to nil to signal in defer that it should be returned to pool + contentsToReturn := contents + contents = nil + valuePool.Put(contentsToReturn) + return arrayElements, nil + } + + // We're returning contents directly, set to nil to signal in defer not to return to pool + result := contents + contents = nil + return result, nil + } + + // Handle based on token type + switch token.Type { + case TokenName: + // Get the name value - must copy for stability + name := string(token.Value) + + // Get the next token to determine if it's a map entry or array element + nextToken, err := c.nextToken() + if err != nil { + return nil, err + } + + if nextToken.Type == TokenEquals { + // It's a key-value pair + value, err := c.parseValue() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = value + } else if nextToken.Type == TokenOpenBrace { + // It's a nested object + objValue, err := c.parseObject() + if err != nil { + return nil, err + } + + isArray = false + contents[name] = objValue + } else { + // Put the token back and treat the name as an array element + c.scanner.UnreadToken(nextToken) + + // Convert to appropriate type if possible + var value any = name + + // Try to infer type + if name == "true" { + value = true + } else if name == "false" { + value = false + } else if isDigitOrMinus(name) { + // Try to parse as number + numValue, err := parseStringAsNumber(name) + if err == nil { + value = numValue + } + } + + arrayElements = append(arrayElements, value) + } + + case TokenString, TokenNumber, TokenBoolean: + // Direct array element + var value any + + switch token.Type { + case TokenString: + value = string(token.Value) + case TokenNumber: + strVal := string(token.Value) + value, _ = parseStringAsNumber(strVal) + case TokenBoolean: + value = bytesEqual(token.Value, []byte("true")) + } + + arrayElements = append(arrayElements, value) + + case TokenOpenBrace: + // Nested object in array + nestedObj, err := c.parseObject() + if err != nil { + return nil, err + } + arrayElements = append(arrayElements, nestedObj) + + default: + return nil, c.Error(fmt.Sprintf("unexpected token in object: %v", token.Type)) + } + } +} + // Load parses a config from a reader func Load(r io.Reader) (*Config, error) { - parser := NewParser(r) - return parser.Parse() + config := NewConfig() + err := config.Parse(r) + + if err != nil { + return nil, err + } + + return config, nil } // Helpers @@ -216,6 +526,19 @@ func ParseNumber(s string) (any, error) { return strconv.ParseInt(s, 10, 64) } +// bytesEqual compares a byte slice with either a string or byte slice +func bytesEqual(b []byte, s []byte) bool { + if len(b) != len(s) { + return false + } + for i := 0; i < len(b); i++ { + if b[i] != s[i] { + return false + } + } + return true +} + // isDigitOrMinus checks if a string starts with a digit or minus sign func isDigitOrMinus(s string) bool { if len(s) == 0 { diff --git a/parser.go b/parser.go deleted file mode 100644 index 80c03a3..0000000 --- a/parser.go +++ /dev/null @@ -1,282 +0,0 @@ -package config - -import ( - "fmt" - "io" - "strconv" -) - -// Parser parses configuration files -type Parser struct { - scanner *Scanner - config *Config - currentObject map[string]any - stack []map[string]any - currentToken Token -} - -// NewParser creates a new parser with a reader and empty config -func NewParser(r io.Reader) *Parser { - config := NewConfig() - return &Parser{ - scanner: NewScanner(r), - config: config, - currentObject: config.data, - stack: make([]map[string]any, 0, 8), // Pre-allocate stack with reasonable capacity - } -} - -// Error creates an error with line information from the current token -func (p *Parser) Error(msg string) error { - return fmt.Errorf("line %d, column %d: %s", - p.currentToken.Line, p.currentToken.Column, msg) -} - -// Parse parses the config file and returns a Config -func (p *Parser) Parse() (*Config, error) { - err := p.parseContent() - if err != nil { - return nil, err - } - return p.config, nil -} - -// nextToken gets the next meaningful token (skipping comments) -func (p *Parser) nextToken() (Token, error) { - for { - token, err := p.scanner.NextToken() - if err != nil { - return token, err - } - - // Skip comment tokens - if token.Type != TokenComment { - p.currentToken = token - return token, nil - } - } -} - -// parseContent is the main parsing function -func (p *Parser) parseContent() error { - for { - token, err := p.nextToken() - if err != nil { - return err - } - - // Check for end of file - if token.Type == TokenEOF { - break - } - - // We expect top level entries to be names - if token.Type != TokenName { - return p.Error("expected name at top level") - } - - // Get the property name - name := string(token.Value) - - // Get the next token (should be = or {) - token, err = p.nextToken() - if err != nil { - return err - } - - var value any - - if token.Type == TokenEquals { - // It's a standard key=value assignment - value, err = p.parseValue() - if err != nil { - return err - } - } else if token.Type == TokenOpenBrace { - // It's a map/array without '=' - value, err = p.parseObject() - if err != nil { - return err - } - } else { - return p.Error("expected '=' or '{' after name") - } - - // Store the value in the config - if mapValue, ok := value.(map[string]any); ok { - // Add an entry in current object - newMap := make(map[string]any, 8) // Pre-allocate with capacity - p.currentObject[name] = newMap - - // Process the map contents - p.stack = append(p.stack, p.currentObject) - p.currentObject = newMap - - // Copy values from scanned map to our object - for k, v := range mapValue { - p.currentObject[k] = v - } - - // Restore parent object - n := len(p.stack) - if n > 0 { - p.currentObject = p.stack[n-1] - p.stack = p.stack[:n-1] - } - } else { - // Direct storage for primitives and arrays - p.currentObject[name] = value - } - } - - return nil -} - -// parseValue parses a value after an equals sign -func (p *Parser) parseValue() (any, error) { - token, err := p.nextToken() - if err != nil { - return nil, err - } - - switch token.Type { - case TokenString: - return string(token.Value), nil - - case TokenNumber: - strValue := string(token.Value) - for i := 0; i < len(strValue); i++ { - if strValue[i] == '.' { - // It's a float - val, err := strconv.ParseFloat(strValue, 64) - if err != nil { - return nil, p.Error(fmt.Sprintf("invalid float: %s", strValue)) - } - return val, nil - } - } - // It's an integer - val, err := strconv.ParseInt(strValue, 10, 64) - if err != nil { - return nil, p.Error(fmt.Sprintf("invalid integer: %s", strValue)) - } - return val, nil - - case TokenBoolean: - return string(token.Value) == "true", nil - - case TokenOpenBrace: - // It's a map or array - return p.parseObject() - - case TokenName: - // Treat as a string value - return string(token.Value), nil - - default: - return nil, p.Error(fmt.Sprintf("unexpected token: %v", token.Type)) - } -} - -// parseObject parses a map or array -func (p *Parser) parseObject() (any, error) { - contents := make(map[string]any) - var arrayElements []any - isArray := true - - for { - token, err := p.nextToken() - if err != nil { - return nil, err - } - - // Check for end of object - if token.Type == TokenCloseBrace { - if isArray && len(contents) == 0 { - return arrayElements, nil - } - return contents, nil - } - - // Handle based on token type - switch token.Type { - case TokenName: - // Get the name value - name := string(token.Value) - - // Get the next token to determine if it's a map entry or array element - nextToken, err := p.nextToken() - if err != nil { - return nil, err - } - - if nextToken.Type == TokenEquals { - // It's a key-value pair - value, err := p.parseValue() - if err != nil { - return nil, err - } - - isArray = false - contents[name] = value - } else if nextToken.Type == TokenOpenBrace { - // It's a nested object - objValue, err := p.parseObject() - if err != nil { - return nil, err - } - - isArray = false - contents[name] = objValue - } else { - // Put the token back and treat the name as an array element - p.scanner.UnreadToken(nextToken) - - // Convert to appropriate type if possible - var value any = name - - // Try to infer type - if name == "true" { - value = true - } else if name == "false" { - value = false - } else if isDigitOrMinus(name) { - // Try to parse as number - numValue, err := parseStringAsNumber(name) - if err == nil { - value = numValue - } - } - - arrayElements = append(arrayElements, value) - } - - case TokenString, TokenNumber, TokenBoolean: - // Direct array element - var value any - - switch token.Type { - case TokenString: - value = string(token.Value) - case TokenNumber: - strVal := string(token.Value) - value, _ = parseStringAsNumber(strVal) - case TokenBoolean: - value = string(token.Value) == "true" - } - - arrayElements = append(arrayElements, value) - - case TokenOpenBrace: - // Nested object in array - nestedObj, err := p.parseObject() - if err != nil { - return nil, err - } - arrayElements = append(arrayElements, nestedObj) - - default: - return nil, p.Error(fmt.Sprintf("unexpected token in object: %v", token.Type)) - } - } -} diff --git a/scanner.go b/scanner.go index 7b9fc72..c70584f 100644 --- a/scanner.go +++ b/scanner.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "sync" ) // Pre-declared errors to reduce allocations @@ -19,19 +20,41 @@ var ( // Scanner handles the low-level parsing of the configuration format type Scanner struct { reader *bufio.Reader - line int // Current line number - col int // Current column position + line int + col int buffer []byte - token Token // Current token + token Token // Current token for unread } -// NewScanner creates a new scanner with the given reader +// scannerPool helps reuse scanner objects +var scannerPool = sync.Pool{ + New: func() interface{} { + return &Scanner{ + line: 1, + col: 0, + buffer: make([]byte, 0, 128), + } + }, +} + +// NewScanner creates a new scanner from a pool func NewScanner(r io.Reader) *Scanner { - return &Scanner{ - reader: bufio.NewReader(r), - line: 1, // Start at line 1 - col: 0, - buffer: make([]byte, 0, 128), // Pre-allocate with reasonable capacity + s := scannerPool.Get().(*Scanner) + s.reader = bufio.NewReader(r) + s.line = 1 + s.col = 0 + s.buffer = s.buffer[:0] + s.token = Token{Type: TokenError} + return s +} + +// ReleaseScanner returns a scanner to the pool +func ReleaseScanner(s *Scanner) { + if s != nil { + // Clear references but keep allocated memory + s.reader = nil + s.buffer = s.buffer[:0] + scannerPool.Put(s) } } @@ -282,7 +305,7 @@ func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { } } - // Use the buffer directly - consumer is responsible for copying if needed + // Return token with buffer value - important: consumer must copy if needed return Token{ Type: TokenString, Value: s.buffer, @@ -323,15 +346,15 @@ func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { _, _ = s.ReadByte() } - // Check if it's a boolean - fixed comparison + // Check if it's a boolean - use direct byte comparison tokenType := TokenName - if string(s.buffer) == "true" || string(s.buffer) == "false" { + if bytesEqual(s.buffer, []byte("true")) || bytesEqual(s.buffer, []byte("false")) { tokenType = TokenBoolean } return Token{ Type: tokenType, - Value: s.buffer, + Value: s.buffer, // Direct buffer reference - consumer must copy! Line: startLine, Column: startColumn, }, nil @@ -374,7 +397,7 @@ func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { return Token{ Type: TokenNumber, - Value: s.buffer, + Value: s.buffer, // Direct buffer reference - consumer must copy! Line: startLine, Column: startColumn, }, nil diff --git a/token.go b/token.go index 59d6bf9..9170548 100644 --- a/token.go +++ b/token.go @@ -19,7 +19,7 @@ const ( // Token represents a lexical token type Token struct { Type TokenType - Value []byte + Value []byte // Not modified after returning - caller must copy if needed Line int Column int }