package fin import ( "bufio" "errors" "fmt" "io" "sync" ) var ( ErrUnterminatedString = errors.New("unterminated string") ErrUnterminatedEscape = errors.New("unterminated escape sequence") ErrUnterminatedComment = errors.New("unclosed block comment") ErrInvalidComment = errors.New("invalid comment") ErrNameStartWithLetter = errors.New("name must start with letter") ) type Scanner struct { reader *bufio.Reader line int col int buffer []byte bufferRef *[]byte token Token } var scannerPool = sync.Pool{ New: func() any { bufferRef := GetByteSlice() return &Scanner{ line: 1, col: 0, bufferRef: bufferRef, buffer: (*bufferRef)[:0], } }, } func NewScanner(r io.Reader) *Scanner { s := scannerPool.Get().(*Scanner) s.reader = bufio.NewReaderSize(r, 1024) s.line = 1 s.col = 0 s.resetBuffer() s.token = Token{Type: TokenError} return s } func ReleaseScanner(s *Scanner) { if s != nil { s.reader = nil s.resetBuffer() scannerPool.Put(s) } } // Helper to reset buffer consistently func (s *Scanner) resetBuffer() { s.buffer = (*s.bufferRef)[:0] } // Helper for creating error tokens func (s *Scanner) errorToken(msg string, line, col int) (Token, error) { err := fmt.Errorf("line %d, column %d: %s", line, col, msg) return Token{Type: TokenError, Value: []byte(msg), Line: line, Column: col}, err } func (s *Scanner) ReadByte() (byte, error) { b, err := s.reader.ReadByte() if err == nil { if b == '\n' { s.line++ s.col = 0 } else { s.col++ } } return b, err } func (s *Scanner) PeekByte() (byte, error) { b, err := s.reader.Peek(1) if err != nil { return 0, err } return b[0], nil } func (s *Scanner) PeekBytes(n int) ([]byte, error) { return s.reader.Peek(n) } func (s *Scanner) UnreadByte() error { err := s.reader.UnreadByte() if err == nil && s.col > 0 { s.col-- } return err } func (s *Scanner) Error(msg string) error { return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg) } func (s *Scanner) SkipWhitespace() error { for { b, err := s.PeekByte() if err == io.EOF { return nil } if err != nil { return err } if b != ' ' && b != '\t' && b != '\n' && b != '\r' { return nil } _, err = s.ReadByte() if err != nil { return err } } } func (s *Scanner) UnreadToken(token Token) { s.token = token } func (s *Scanner) NextToken() (Token, error) { if s.token.Type != TokenError { token := s.token s.token = Token{Type: TokenError} return token, nil } if err := s.SkipWhitespace(); err != nil { if err == io.EOF { return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil } return s.errorToken(err.Error(), s.line, s.col) } b, err := s.PeekByte() if err != nil { if err == io.EOF { return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil } return s.errorToken(err.Error(), s.line, s.col) } startLine, startColumn := s.line, s.col switch { case b == '{': _, _ = s.ReadByte() return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil case b == '}': _, _ = s.ReadByte() return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil case b == '-': peekBytes, err := s.PeekBytes(2) if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { if err := s.scanComment(); err != nil { return s.errorToken(err.Error(), startLine, startColumn) } return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil } if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) { return s.scanNumber(startLine, startColumn) } _, _ = s.ReadByte() return s.errorToken("unexpected '-'", startLine, startColumn) case b == '"': return s.scanString(startLine, startColumn) case isLetter(b): return s.scanName(startLine, startColumn) case isDigit(b): return s.scanNumber(startLine, startColumn) default: _, _ = s.ReadByte() msg := fmt.Sprintf("unexpected character: %c", b) return s.errorToken(msg, startLine, startColumn) } } func (s *Scanner) scanComment() error { _, err := s.ReadByte() // consume first dash if err != nil { return err } b, err := s.ReadByte() // consume second dash if err != nil { return err } if b != '-' { return ErrInvalidComment } // Check for block comment if b1, err := s.PeekByte(); err == nil && b1 == '[' { _, _ = s.ReadByte() if b2, err := s.PeekByte(); err == nil && b2 == '[' { _, _ = s.ReadByte() for { b, err := s.ReadByte() if err != nil { return ErrUnterminatedComment } if b == ']' { if n, err := s.PeekByte(); err == nil && n == ']' { _, _ = s.ReadByte() return nil } } } } } // Line comment for { b, err := s.ReadByte() if err == io.EOF { return nil } if err != nil { return err } if b == '\n' { return nil } } } func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { s.resetBuffer() _, err := s.ReadByte() // consume opening quote if err != nil { return s.errorToken(err.Error(), startLine, startColumn) } for { b, err := s.ReadByte() if err != nil { return s.errorToken(ErrUnterminatedString.Error(), startLine, startColumn) } if b == '"' { break } if b == '\\' { escaped, err := s.ReadByte() if err != nil { return s.errorToken(ErrUnterminatedEscape.Error(), startLine, startColumn) } switch escaped { case '"', '\\': s.buffer = append(s.buffer, escaped) case 'n': s.buffer = append(s.buffer, '\n') case 't': s.buffer = append(s.buffer, '\t') default: s.buffer = append(s.buffer, '\\', escaped) } } else { s.buffer = append(s.buffer, b) } } return Token{Type: TokenString, Value: s.buffer, Line: startLine, Column: startColumn}, nil } func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { s.resetBuffer() b, err := s.ReadByte() if err != nil { return s.errorToken(err.Error(), startLine, startColumn) } if !isLetter(b) { return s.errorToken(ErrNameStartWithLetter.Error(), startLine, startColumn) } s.buffer = append(s.buffer, b) for { b, err := s.PeekByte() if err == io.EOF { break } if err != nil { return s.errorToken(err.Error(), startLine, startColumn) } if !isLetter(b) && !isDigit(b) && b != '_' { break } s.buffer = append(s.buffer, b) _, _ = s.ReadByte() } tokenType := TokenName if bytesEqual(s.buffer, []byte("true")) || bytesEqual(s.buffer, []byte("false")) { tokenType = TokenBoolean } return Token{Type: tokenType, Value: s.buffer, Line: startLine, Column: startColumn}, nil } func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { s.resetBuffer() b, err := s.ReadByte() if err != nil { return s.errorToken(err.Error(), startLine, startColumn) } s.buffer = append(s.buffer, b) hasDot := false for { b, err := s.PeekByte() if err != nil { if err == io.EOF { break } return s.errorToken(err.Error(), startLine, startColumn) } if b == '.' && !hasDot { hasDot = true _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else if isDigit(b) { _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else { break } } return Token{Type: TokenNumber, Value: s.buffer, Line: startLine, Column: startColumn}, nil }