Fin/scanner.go
2025-03-01 21:10:04 -06:00

485 lines
9.4 KiB
Go

package config
import (
"bufio"
"errors"
"fmt"
"io"
"strconv"
"strings"
"unicode"
)
// Scanner handles the low-level parsing of the configuration format
type Scanner struct {
reader *bufio.Reader
line int // Current line number
col int // Current column position
buffer []rune
}
// NewScanner creates a new scanner with the given reader
func NewScanner(r io.Reader) *Scanner {
return &Scanner{
reader: bufio.NewReader(r),
line: 1, // Start at line 1
col: 0,
buffer: make([]rune, 0, 64),
}
}
// ReadRune reads a single rune from the input
func (s *Scanner) ReadRune() (rune, int, error) {
r, i, err := s.reader.ReadRune()
if err == nil {
if r == '\n' {
s.line++
s.col = 0
} else {
s.col++
}
}
return r, i, err
}
// PeekRune looks at the next rune without consuming it
func (s *Scanner) PeekRune() (rune, error) {
r, _, err := s.reader.ReadRune()
if err != nil {
return 0, err
}
s.reader.UnreadRune()
return r, nil
}
// Error creates an error with line and column information
func (s *Scanner) Error(msg string) error {
return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg)
}
// SkipWhitespace skips whitespace characters
func (s *Scanner) SkipWhitespace() error {
for {
r, err := s.PeekRune()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
if !unicode.IsSpace(r) {
return nil
}
_, _, err = s.ReadRune()
if err != nil {
return err
}
}
}
// peekAndCheckRune checks if the next rune matches expected without consuming it
func (s *Scanner) peekAndCheckRune(expected rune) (bool, error) {
r, err := s.PeekRune()
if err != nil {
return false, err
}
return r == expected, nil
}
// consumeIfMatch consumes the next rune if it matches expected
func (s *Scanner) consumeIfMatch(expected rune) (bool, error) {
matches, err := s.peekAndCheckRune(expected)
if err != nil || !matches {
return false, err
}
_, _, err = s.ReadRune() // consume the rune
return err == nil, err
}
// ScanComment processes a comment
func (s *Scanner) ScanComment() error {
// Consume the first dash
_, _, err := s.ReadRune()
if err != nil {
return err
}
// Check for second dash
r, _, err := s.ReadRune()
if err != nil {
return err
}
if r != '-' {
return s.Error("invalid comment")
}
// Check for block comment [[
r, err = s.PeekRune()
if err == nil && r == '[' {
_, _, _ = s.ReadRune() // consume first [
r, err = s.PeekRune()
if err == nil && r == '[' {
_, _, _ = s.ReadRune() // consume second [
return s.scanBlockComment()
}
}
// Line comment
for {
r, _, err := s.ReadRune()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
if r == '\n' {
return nil
}
}
}
// scanBlockComment processes a block comment
func (s *Scanner) scanBlockComment() error {
for {
r, _, err := s.ReadRune()
if err != nil {
return s.Error("unclosed block comment")
}
if r == ']' {
r, err = s.PeekRune()
if err == nil && r == ']' {
_, _, _ = s.ReadRune() // consume second ]
return nil
}
}
}
}
// ScanName reads a name identifier
func (s *Scanner) ScanName() (string, error) {
s.buffer = s.buffer[:0] // Reset buffer
// Read first character
r, _, err := s.ReadRune()
if err != nil {
return "", err
}
if !unicode.IsLetter(r) {
return "", s.Error("name must start with letter")
}
s.buffer = append(s.buffer, r)
// Read rest of name
for {
r, err := s.PeekRune()
if err == io.EOF {
break
}
if err != nil {
return "", err
}
if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' {
break
}
s.buffer = append(s.buffer, r)
_, _, _ = s.ReadRune()
}
return string(s.buffer), nil
}
// processArrayElement processes a single array element
func (s *Scanner) processArrayElement() (any, error) {
r, err := s.PeekRune()
if err != nil {
return nil, err
}
// Handle identifier-like elements
if unicode.IsLetter(r) {
name, err := s.ScanName()
if err != nil {
return nil, err
}
// Try to convert to appropriate type
convertedValue, err := s.ConvertValue(name)
if err == nil {
return convertedValue, nil
}
return name, nil
}
// Handle other element types (strings, numbers, etc.)
return s.ScanValue()
}
// processMapEntry processes a key-value pair in a map
func (s *Scanner) processMapEntry() (string, any, bool, error) {
name, err := s.ScanName()
if err != nil {
return "", nil, false, err
}
err = s.SkipWhitespace()
if err != nil {
return "", nil, false, err
}
// Check for equals sign
isEquals, err := s.consumeIfMatch('=')
if err != nil && err != io.EOF {
return "", nil, false, err
}
if isEquals {
value, err := s.ScanValue()
if err != nil {
return "", nil, false, err
}
return name, value, true, nil // true indicates this is a map entry
}
// Check for opening brace (nested map/array)
isBrace, err := s.peekAndCheckRune('{')
if err != nil && err != io.EOF {
return "", nil, false, err
}
if isBrace {
value, err := s.ScanValue()
if err != nil {
return "", nil, false, err
}
return name, value, true, nil // true indicates this is a map entry
}
// If neither equals nor brace, it's an array element (name as string)
return name, name, false, nil // false indicates this is not a map entry
}
// ScanValue processes and returns a value from the config
func (s *Scanner) ScanValue() (any, error) {
err := s.SkipWhitespace()
if err != nil {
return nil, err
}
r, err := s.PeekRune()
if err != nil {
return nil, err
}
// Check if it's an array/map
if r == '{' {
return s.ScanArrayOrMap()
}
// Check if it's a quoted string
if r == '"' {
return s.ScanString()
}
// Otherwise, treat it as a simple value
var value []rune
for {
r, err := s.PeekRune()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if unicode.IsSpace(r) || r == '}' {
break
}
value = append(value, r)
_, _, _ = s.ReadRune()
}
if len(value) == 0 {
return nil, s.Error("empty value")
}
// Convert value to appropriate type
return s.ConvertValue(string(value))
}
// ScanArrayOrMap processes arrays and maps
func (s *Scanner) ScanArrayOrMap() (any, error) {
// Consume opening brace
_, _, err := s.ReadRune()
if err != nil {
return nil, err
}
// Initialize a new map to store contents
contents := make(map[string]any)
// And a slice to track array elements
var arrayElements []any
isArray := true
for {
err := s.SkipWhitespace()
if err != nil {
return nil, err
}
r, err := s.PeekRune()
if err == io.EOF {
return nil, s.Error("unclosed array/map")
}
if err != nil {
return nil, err
}
// Check for closing brace
if r == '}' {
_, _, _ = s.ReadRune() // consume the closing brace
break
}
// Handle comments inside arrays/maps
if r == '-' {
err = s.ScanComment()
if err != nil {
return nil, err
}
continue
}
// If we find a named property, it might be a map entry
if unicode.IsLetter(r) {
name, value, isMapEntry, err := s.processMapEntry()
if err != nil {
return nil, err
}
if isMapEntry {
// It's a key-value pair for a map
isArray = false
contents[name] = value
} else {
// It's an array element
arrayElements = append(arrayElements, value)
}
continue
}
// Handle array elements that start with quotes, numbers, etc.
value, err := s.processArrayElement()
if err != nil {
return nil, err
}
arrayElements = append(arrayElements, value)
}
// Check for array/map distinction and return appropriate result
if isArray && len(contents) == 0 {
return arrayElements, nil
}
return contents, nil
}
// ScanString reads a quoted string
func (s *Scanner) ScanString() (any, error) {
// Consume opening quote
_, _, err := s.ReadRune()
if err != nil {
return nil, err
}
var builder strings.Builder
builder.Grow(64) // Preallocate with reasonable capacity
for {
r, _, err := s.ReadRune()
if err != nil {
return nil, s.Error("unterminated string")
}
if r == '"' {
break
}
// Handle escape sequences
if r == '\\' {
escaped, _, err := s.ReadRune()
if err != nil {
return nil, err
}
switch escaped {
case '"':
builder.WriteRune('"')
case '\\':
builder.WriteRune('\\')
case 'n':
builder.WriteRune('\n')
case 't':
builder.WriteRune('\t')
default:
builder.WriteRune('\\')
builder.WriteRune(escaped)
}
} else {
builder.WriteRune(r)
}
}
return builder.String(), nil
}
// ConvertValue converts string values to their appropriate types
func (s *Scanner) ConvertValue(value string) (any, error) {
// Fast path for booleans
if value == "true" {
return true, nil
}
if value == "false" {
return false, nil
}
// Early exit for empty values
if len(value) == 0 {
return nil, errors.New("empty value")
}
// Check for number type in one pass
isNegative := value[0] == '-'
startIdx := 0
if isNegative {
if len(value) == 1 {
return nil, errors.New("invalid value: -")
}
startIdx = 1
}
hasDot := false
for i := startIdx; i < len(value); i++ {
if value[i] == '.' {
if hasDot {
return nil, errors.New("invalid number format")
}
hasDot = true
} else if value[i] < '0' || value[i] > '9' {
return nil, errors.New("invalid value format: " + value)
}
}
// Process as integer or float based on presence of decimal
if !hasDot {
return strconv.ParseInt(value, 10, 64)
}
// Float (ensure not ending with dot)
if value[len(value)-1] != '.' {
return strconv.ParseFloat(value, 64)
}
return nil, errors.New("invalid value format: " + value)
}