Fin/scanner.go

404 lines
9.2 KiB
Go

package scf
import (
"bufio"
"errors"
"fmt"
"io"
"sync"
)
// Pre-declared errors to reduce allocations
var (
ErrUnterminatedString = errors.New("unterminated string")
ErrUnterminatedEscape = errors.New("unterminated escape sequence")
ErrUnterminatedComment = errors.New("unclosed block comment")
ErrInvalidComment = errors.New("invalid comment")
ErrNameStartWithLetter = errors.New("name must start with letter")
)
// Scanner handles the low-level parsing of the configuration format
type Scanner struct {
reader *bufio.Reader
line int
col int
buffer []byte // Slice to the pooled buffer
bufferRef *[]byte // Reference to the pooled buffer
token Token // Current token for unread
}
// scannerPool helps reuse scanner objects
var scannerPool = sync.Pool{
New: func() any {
bufferRef := GetByteSlice()
return &Scanner{
line: 1,
col: 0,
bufferRef: bufferRef,
buffer: (*bufferRef)[:0],
}
},
}
// NewScanner creates a new scanner from a pool
func NewScanner(r io.Reader) *Scanner {
s := scannerPool.Get().(*Scanner)
s.reader = bufio.NewReaderSize(r, 1024)
s.line = 1
s.col = 0
s.buffer = (*s.bufferRef)[:0]
s.token = Token{Type: TokenError}
return s
}
// ReleaseScanner returns a scanner to the pool
func ReleaseScanner(s *Scanner) {
if s != nil {
// Clear references but keep allocated memory
s.reader = nil
s.buffer = (*s.bufferRef)[:0]
scannerPool.Put(s)
}
}
// ReadByte reads a single byte from the input
func (s *Scanner) ReadByte() (byte, error) {
b, err := s.reader.ReadByte()
if err == nil {
if b == '\n' {
s.line++
s.col = 0
} else {
s.col++
}
}
return b, err
}
// PeekByte looks at the next byte without consuming it
func (s *Scanner) PeekByte() (byte, error) {
b, err := s.reader.Peek(1)
if err != nil {
return 0, err
}
return b[0], nil
}
// PeekBytes looks at the next n bytes without consuming them
func (s *Scanner) PeekBytes(n int) ([]byte, error) {
return s.reader.Peek(n)
}
// UnreadByte pushes back a byte to the reader
func (s *Scanner) UnreadByte() error {
err := s.reader.UnreadByte()
if err == nil && s.col > 0 {
s.col--
}
return err
}
// Error creates an error with line and column information
func (s *Scanner) Error(msg string) error {
return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg)
}
// SkipWhitespace skips whitespace characters
func (s *Scanner) SkipWhitespace() error {
for {
b, err := s.PeekByte()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
// Fast check for common whitespace bytes
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
return nil
}
_, err = s.ReadByte()
if err != nil {
return err
}
}
}
// UnreadToken stores a token to be returned by the next call to NextToken
func (s *Scanner) UnreadToken(token Token) {
s.token = token
}
// NextToken scans and returns the next token
func (s *Scanner) NextToken() (Token, error) {
if s.token.Type != TokenError {
// We have a stored token
token := s.token
s.token = Token{Type: TokenError} // Reset
return token, nil
}
// Skip whitespace
err := s.SkipWhitespace()
if err != nil {
if err == io.EOF {
return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil
}
return Token{Type: TokenError, Value: []byte(err.Error()), Line: s.line, Column: s.col}, err
}
b, err := s.PeekByte()
if err != nil {
if err == io.EOF {
return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil
}
return Token{Type: TokenError, Value: []byte(err.Error()), Line: s.line, Column: s.col}, err
}
// Record start position for error reporting
startLine, startColumn := s.line, s.col
// Process based on first character
switch {
case b == '{':
_, _ = s.ReadByte() // consume open brace
return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil
case b == '}':
_, _ = s.ReadByte() // consume close brace
return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil
case b == '-':
// Could be a comment or a negative number
peekBytes, err := s.PeekBytes(2)
if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' {
err = s.scanComment()
if err != nil {
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil
}
// Check if it's a negative number
if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) {
return s.scanNumber(startLine, startColumn)
}
// Just a single dash
_, _ = s.ReadByte() // consume dash
return Token{Type: TokenError, Value: []byte("unexpected '-'")},
s.Error("unexpected '-'")
case b == '"':
return s.scanString(startLine, startColumn)
case isLetter(b):
return s.scanName(startLine, startColumn)
case isDigit(b):
return s.scanNumber(startLine, startColumn)
default:
_, _ = s.ReadByte() // consume the unexpected character
return Token{Type: TokenError, Value: []byte(fmt.Sprintf("unexpected character: %c", b)), Line: startLine, Column: startColumn},
s.Error(fmt.Sprintf("unexpected character: %c", b))
}
}
// scanComment processes a comment
func (s *Scanner) scanComment() error {
// Consume the first dash
_, err := s.ReadByte()
if err != nil {
return err
}
// Check for second dash
b, err := s.ReadByte()
if err != nil {
return err
}
if b != '-' {
return ErrInvalidComment
}
// Check for block comment [[
if b1, err := s.PeekByte(); err == nil && b1 == '[' {
_, _ = s.ReadByte() // consume first [
if b2, err := s.PeekByte(); err == nil && b2 == '[' {
_, _ = s.ReadByte() // consume second [
// Process block comment
for {
b, err := s.ReadByte()
if err != nil {
return ErrUnterminatedComment
}
if b == ']' {
if n, err := s.PeekByte(); err == nil && n == ']' {
_, _ = s.ReadByte() // consume second ]
return nil
}
}
}
}
}
// Line comment - consume until newline or EOF
for {
b, err := s.ReadByte()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
if b == '\n' {
return nil
}
}
}
// scanString scans a quoted string
func (s *Scanner) scanString(startLine, startColumn int) (Token, error) {
// Reset buffer
s.buffer = (*s.bufferRef)[:0]
// Consume opening quote
_, err := s.ReadByte()
if err != nil {
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
for {
b, err := s.ReadByte()
if err != nil {
return Token{Type: TokenError, Value: []byte(ErrUnterminatedString.Error())}, ErrUnterminatedString
}
if b == '"' {
break
}
// Handle escape sequences
if b == '\\' {
escaped, err := s.ReadByte()
if err != nil {
return Token{Type: TokenError, Value: []byte(ErrUnterminatedEscape.Error())}, ErrUnterminatedEscape
}
switch escaped {
case '"':
s.buffer = append(s.buffer, '"')
case '\\':
s.buffer = append(s.buffer, '\\')
case 'n':
s.buffer = append(s.buffer, '\n')
case 't':
s.buffer = append(s.buffer, '\t')
default:
s.buffer = append(s.buffer, '\\', escaped)
}
} else {
s.buffer = append(s.buffer, b)
}
}
// Return token with buffer value - important: consumer must copy if needed
return Token{
Type: TokenString,
Value: s.buffer,
Line: startLine,
Column: startColumn,
}, nil
}
// scanName scans an identifier
func (s *Scanner) scanName(startLine, startColumn int) (Token, error) {
// Reset buffer
s.buffer = (*s.bufferRef)[:0]
// Read first character
b, err := s.ReadByte()
if err != nil {
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
if !isLetter(b) {
return Token{Type: TokenError, Value: []byte(ErrNameStartWithLetter.Error())}, ErrNameStartWithLetter
}
s.buffer = append(s.buffer, b)
// Read rest of name
for {
b, err := s.PeekByte()
if err == io.EOF {
break
}
if err != nil {
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
if !isLetter(b) && !isDigit(b) && b != '_' {
break
}
s.buffer = append(s.buffer, b)
_, _ = s.ReadByte()
}
// Check if it's a boolean - use direct byte comparison
tokenType := TokenName
if bytesEqual(s.buffer, []byte("true")) || bytesEqual(s.buffer, []byte("false")) {
tokenType = TokenBoolean
}
return Token{
Type: tokenType,
Value: s.buffer, // Direct buffer reference - consumer must copy!
Line: startLine,
Column: startColumn,
}, nil
}
// scanNumber scans a numeric value
func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) {
// Reset buffer
s.buffer = (*s.bufferRef)[:0]
// Read first character (might be a minus sign or digit)
b, err := s.ReadByte()
if err != nil {
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
s.buffer = append(s.buffer, b)
// Scan the rest of the number
hasDot := false
for {
b, err := s.PeekByte()
if err != nil {
if err == io.EOF {
break
}
return Token{Type: TokenError, Value: []byte(err.Error())}, err
}
if b == '.' && !hasDot {
hasDot = true
_, _ = s.ReadByte()
s.buffer = append(s.buffer, b)
} else if isDigit(b) {
_, _ = s.ReadByte()
s.buffer = append(s.buffer, b)
} else {
break
}
}
return Token{
Type: TokenNumber,
Value: s.buffer, // Direct buffer reference - consumer must copy!
Line: startLine,
Column: startColumn,
}, nil
}