package config import ( "bufio" "errors" "fmt" "io" "sync" ) // Pre-declared errors to reduce allocations var ( ErrUnterminatedString = errors.New("unterminated string") ErrUnterminatedEscape = errors.New("unterminated escape sequence") ErrUnterminatedComment = errors.New("unclosed block comment") ErrInvalidComment = errors.New("invalid comment") ErrNameStartWithLetter = errors.New("name must start with letter") ) // Scanner handles the low-level parsing of the configuration format type Scanner struct { reader *bufio.Reader line int col int buffer []byte token Token // Current token for unread } // scannerPool helps reuse scanner objects var scannerPool = sync.Pool{ New: func() interface{} { return &Scanner{ line: 1, col: 0, buffer: make([]byte, 0, 128), } }, } // NewScanner creates a new scanner from a pool func NewScanner(r io.Reader) *Scanner { s := scannerPool.Get().(*Scanner) s.reader = bufio.NewReader(r) s.line = 1 s.col = 0 s.buffer = s.buffer[:0] s.token = Token{Type: TokenError} return s } // ReleaseScanner returns a scanner to the pool func ReleaseScanner(s *Scanner) { if s != nil { // Clear references but keep allocated memory s.reader = nil s.buffer = s.buffer[:0] scannerPool.Put(s) } } // ReadByte reads a single byte from the input func (s *Scanner) ReadByte() (byte, error) { b, err := s.reader.ReadByte() if err == nil { if b == '\n' { s.line++ s.col = 0 } else { s.col++ } } return b, err } // PeekByte looks at the next byte without consuming it func (s *Scanner) PeekByte() (byte, error) { b, err := s.reader.Peek(1) if err != nil { return 0, err } return b[0], nil } // PeekBytes looks at the next n bytes without consuming them func (s *Scanner) PeekBytes(n int) ([]byte, error) { return s.reader.Peek(n) } // UnreadByte pushes back a byte to the reader func (s *Scanner) UnreadByte() error { err := s.reader.UnreadByte() if err == nil && s.col > 0 { s.col-- } return err } // Error creates an error with line and column information func (s *Scanner) Error(msg string) error { return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg) } // SkipWhitespace skips whitespace characters func (s *Scanner) SkipWhitespace() error { for { b, err := s.PeekByte() if err == io.EOF { return nil } if err != nil { return err } // Fast check for common whitespace bytes if b != ' ' && b != '\t' && b != '\n' && b != '\r' { return nil } _, err = s.ReadByte() if err != nil { return err } } } // UnreadToken stores a token to be returned by the next call to NextToken func (s *Scanner) UnreadToken(token Token) { s.token = token } // NextToken scans and returns the next token func (s *Scanner) NextToken() (Token, error) { if s.token.Type != TokenError { // We have a stored token token := s.token s.token = Token{Type: TokenError} // Reset return token, nil } // Skip whitespace err := s.SkipWhitespace() if err == io.EOF { return Token{Type: TokenEOF}, nil } if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } b, err := s.PeekByte() if err != nil { if err == io.EOF { return Token{Type: TokenEOF}, nil } return Token{Type: TokenError, Value: []byte(err.Error())}, err } // Record start position for error reporting startLine, startColumn := s.line, s.col // Process based on first character switch { case b == '=': _, _ = s.ReadByte() // consume equals return Token{Type: TokenEquals, Line: startLine, Column: startColumn}, nil case b == '{': _, _ = s.ReadByte() // consume open brace return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil case b == '}': _, _ = s.ReadByte() // consume close brace return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil case b == '-': // Could be a comment or a negative number peekBytes, err := s.PeekBytes(2) if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { err = s.scanComment() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil } // Check if it's a negative number if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) { return s.scanNumber(startLine, startColumn) } // Just a single dash _, _ = s.ReadByte() // consume dash return Token{Type: TokenError, Value: []byte("unexpected '-'")}, s.Error("unexpected '-'") case b == '"': return s.scanString(startLine, startColumn) case isLetter(b): return s.scanName(startLine, startColumn) case isDigit(b): return s.scanNumber(startLine, startColumn) default: _, _ = s.ReadByte() // consume the unexpected character return Token{Type: TokenError, Value: []byte(fmt.Sprintf("unexpected character: %c", b)), Line: startLine, Column: startColumn}, s.Error(fmt.Sprintf("unexpected character: %c", b)) } } // scanComment processes a comment func (s *Scanner) scanComment() error { // Consume the first dash _, err := s.ReadByte() if err != nil { return err } // Check for second dash b, err := s.ReadByte() if err != nil { return err } if b != '-' { return ErrInvalidComment } // Check for block comment [[ if b1, err := s.PeekByte(); err == nil && b1 == '[' { _, _ = s.ReadByte() // consume first [ if b2, err := s.PeekByte(); err == nil && b2 == '[' { _, _ = s.ReadByte() // consume second [ // Process block comment for { b, err := s.ReadByte() if err != nil { return ErrUnterminatedComment } if b == ']' { if n, err := s.PeekByte(); err == nil && n == ']' { _, _ = s.ReadByte() // consume second ] return nil } } } } } // Line comment - consume until newline or EOF for { b, err := s.ReadByte() if err == io.EOF { return nil } if err != nil { return err } if b == '\n' { return nil } } } // scanString scans a quoted string func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Consume opening quote _, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } for { b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(ErrUnterminatedString.Error())}, ErrUnterminatedString } if b == '"' { break } // Handle escape sequences if b == '\\' { escaped, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(ErrUnterminatedEscape.Error())}, ErrUnterminatedEscape } switch escaped { case '"': s.buffer = append(s.buffer, '"') case '\\': s.buffer = append(s.buffer, '\\') case 'n': s.buffer = append(s.buffer, '\n') case 't': s.buffer = append(s.buffer, '\t') default: s.buffer = append(s.buffer, '\\', escaped) } } else { s.buffer = append(s.buffer, b) } } // Return token with buffer value - important: consumer must copy if needed return Token{ Type: TokenString, Value: s.buffer, Line: startLine, Column: startColumn, }, nil } // scanName scans an identifier func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Read first character b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } if !isLetter(b) { return Token{Type: TokenError, Value: []byte(ErrNameStartWithLetter.Error())}, ErrNameStartWithLetter } s.buffer = append(s.buffer, b) // Read rest of name for { b, err := s.PeekByte() if err == io.EOF { break } if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } if !isLetter(b) && !isDigit(b) && b != '_' { break } s.buffer = append(s.buffer, b) _, _ = s.ReadByte() } // Check if it's a boolean - use direct byte comparison tokenType := TokenName if bytesEqual(s.buffer, []byte("true")) || bytesEqual(s.buffer, []byte("false")) { tokenType = TokenBoolean } return Token{ Type: tokenType, Value: s.buffer, // Direct buffer reference - consumer must copy! Line: startLine, Column: startColumn, }, nil } // scanNumber scans a numeric value func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Read first character (might be a minus sign or digit) b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } s.buffer = append(s.buffer, b) // Scan the rest of the number hasDot := false for { b, err := s.PeekByte() if err != nil { if err == io.EOF { break } return Token{Type: TokenError, Value: []byte(err.Error())}, err } if b == '.' && !hasDot { hasDot = true _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else if isDigit(b) { _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else { break } } return Token{ Type: TokenNumber, Value: s.buffer, // Direct buffer reference - consumer must copy! Line: startLine, Column: startColumn, }, nil }