337 lines
7.2 KiB
Go
337 lines
7.2 KiB
Go
package fin
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"sync"
|
|
)
|
|
|
|
var (
|
|
ErrUnterminatedString = errors.New("unterminated string")
|
|
ErrUnterminatedEscape = errors.New("unterminated escape sequence")
|
|
ErrUnterminatedComment = errors.New("unclosed block comment")
|
|
ErrInvalidComment = errors.New("invalid comment")
|
|
ErrNameStartWithLetter = errors.New("name must start with letter")
|
|
)
|
|
|
|
type Scanner struct {
|
|
reader *bufio.Reader
|
|
line int
|
|
col int
|
|
buffer []byte
|
|
bufferRef *[]byte
|
|
token Token
|
|
}
|
|
|
|
var scannerPool = sync.Pool{
|
|
New: func() any {
|
|
bufferRef := GetByteSlice()
|
|
return &Scanner{
|
|
line: 1,
|
|
col: 0,
|
|
bufferRef: bufferRef,
|
|
buffer: (*bufferRef)[:0],
|
|
}
|
|
},
|
|
}
|
|
|
|
func NewScanner(r io.Reader) *Scanner {
|
|
s := scannerPool.Get().(*Scanner)
|
|
s.reader = bufio.NewReaderSize(r, 1024)
|
|
s.line = 1
|
|
s.col = 0
|
|
s.resetBuffer()
|
|
s.token = Token{Type: TokenError}
|
|
return s
|
|
}
|
|
|
|
func ReleaseScanner(s *Scanner) {
|
|
if s != nil {
|
|
s.reader = nil
|
|
s.resetBuffer()
|
|
scannerPool.Put(s)
|
|
}
|
|
}
|
|
|
|
// Helper to reset buffer consistently
|
|
func (s *Scanner) resetBuffer() {
|
|
s.buffer = (*s.bufferRef)[:0]
|
|
}
|
|
|
|
// Helper for creating error tokens
|
|
func (s *Scanner) errorToken(msg string, line, col int) (Token, error) {
|
|
err := fmt.Errorf("line %d, column %d: %s", line, col, msg)
|
|
return Token{Type: TokenError, Value: []byte(msg), Line: line, Column: col}, err
|
|
}
|
|
|
|
func (s *Scanner) ReadByte() (byte, error) {
|
|
b, err := s.reader.ReadByte()
|
|
if err == nil {
|
|
if b == '\n' {
|
|
s.line++
|
|
s.col = 0
|
|
} else {
|
|
s.col++
|
|
}
|
|
}
|
|
return b, err
|
|
}
|
|
|
|
func (s *Scanner) PeekByte() (byte, error) {
|
|
b, err := s.reader.Peek(1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return b[0], nil
|
|
}
|
|
|
|
func (s *Scanner) PeekBytes(n int) ([]byte, error) {
|
|
return s.reader.Peek(n)
|
|
}
|
|
|
|
func (s *Scanner) UnreadByte() error {
|
|
err := s.reader.UnreadByte()
|
|
if err == nil && s.col > 0 {
|
|
s.col--
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (s *Scanner) Error(msg string) error {
|
|
return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg)
|
|
}
|
|
|
|
func (s *Scanner) SkipWhitespace() error {
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
|
return nil
|
|
}
|
|
_, err = s.ReadByte()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) UnreadToken(token Token) {
|
|
s.token = token
|
|
}
|
|
|
|
func (s *Scanner) NextToken() (Token, error) {
|
|
if s.token.Type != TokenError {
|
|
token := s.token
|
|
s.token = Token{Type: TokenError}
|
|
return token, nil
|
|
}
|
|
|
|
if err := s.SkipWhitespace(); err != nil {
|
|
if err == io.EOF {
|
|
return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil
|
|
}
|
|
return s.errorToken(err.Error(), s.line, s.col)
|
|
}
|
|
|
|
b, err := s.PeekByte()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return Token{Type: TokenEOF, Line: s.line, Column: s.col}, nil
|
|
}
|
|
return s.errorToken(err.Error(), s.line, s.col)
|
|
}
|
|
|
|
startLine, startColumn := s.line, s.col
|
|
|
|
switch {
|
|
case b == '{':
|
|
_, _ = s.ReadByte()
|
|
return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil
|
|
case b == '}':
|
|
_, _ = s.ReadByte()
|
|
return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil
|
|
case b == '-':
|
|
peekBytes, err := s.PeekBytes(2)
|
|
if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' {
|
|
if err := s.scanComment(); err != nil {
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil
|
|
}
|
|
if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) {
|
|
return s.scanNumber(startLine, startColumn)
|
|
}
|
|
_, _ = s.ReadByte()
|
|
return s.errorToken("unexpected '-'", startLine, startColumn)
|
|
case b == '"':
|
|
return s.scanString(startLine, startColumn)
|
|
case isLetter(b):
|
|
return s.scanName(startLine, startColumn)
|
|
case isDigit(b):
|
|
return s.scanNumber(startLine, startColumn)
|
|
default:
|
|
_, _ = s.ReadByte()
|
|
msg := fmt.Sprintf("unexpected character: %c", b)
|
|
return s.errorToken(msg, startLine, startColumn)
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanComment() error {
|
|
_, err := s.ReadByte() // consume first dash
|
|
if err != nil {
|
|
return err
|
|
}
|
|
b, err := s.ReadByte() // consume second dash
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b != '-' {
|
|
return ErrInvalidComment
|
|
}
|
|
|
|
// Check for block comment
|
|
if b1, err := s.PeekByte(); err == nil && b1 == '[' {
|
|
_, _ = s.ReadByte()
|
|
if b2, err := s.PeekByte(); err == nil && b2 == '[' {
|
|
_, _ = s.ReadByte()
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return ErrUnterminatedComment
|
|
}
|
|
if b == ']' {
|
|
if n, err := s.PeekByte(); err == nil && n == ']' {
|
|
_, _ = s.ReadByte()
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Line comment
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b == '\n' {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanString(startLine, startColumn int) (Token, error) {
|
|
s.resetBuffer()
|
|
_, err := s.ReadByte() // consume opening quote
|
|
if err != nil {
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return s.errorToken(ErrUnterminatedString.Error(), startLine, startColumn)
|
|
}
|
|
if b == '"' {
|
|
break
|
|
}
|
|
if b == '\\' {
|
|
escaped, err := s.ReadByte()
|
|
if err != nil {
|
|
return s.errorToken(ErrUnterminatedEscape.Error(), startLine, startColumn)
|
|
}
|
|
switch escaped {
|
|
case '"', '\\':
|
|
s.buffer = append(s.buffer, escaped)
|
|
case 'n':
|
|
s.buffer = append(s.buffer, '\n')
|
|
case 't':
|
|
s.buffer = append(s.buffer, '\t')
|
|
default:
|
|
s.buffer = append(s.buffer, '\\', escaped)
|
|
}
|
|
} else {
|
|
s.buffer = append(s.buffer, b)
|
|
}
|
|
}
|
|
|
|
return Token{Type: TokenString, Value: s.buffer, Line: startLine, Column: startColumn}, nil
|
|
}
|
|
|
|
func (s *Scanner) scanName(startLine, startColumn int) (Token, error) {
|
|
s.resetBuffer()
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
if !isLetter(b) {
|
|
return s.errorToken(ErrNameStartWithLetter.Error(), startLine, startColumn)
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
if !isLetter(b) && !isDigit(b) && b != '_' {
|
|
break
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
_, _ = s.ReadByte()
|
|
}
|
|
|
|
tokenType := TokenName
|
|
if bytesEqual(s.buffer, []byte("true")) || bytesEqual(s.buffer, []byte("false")) {
|
|
tokenType = TokenBoolean
|
|
}
|
|
|
|
return Token{Type: tokenType, Value: s.buffer, Line: startLine, Column: startColumn}, nil
|
|
}
|
|
|
|
func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) {
|
|
s.resetBuffer()
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
|
|
hasDot := false
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return s.errorToken(err.Error(), startLine, startColumn)
|
|
}
|
|
if b == '.' && !hasDot {
|
|
hasDot = true
|
|
_, _ = s.ReadByte()
|
|
s.buffer = append(s.buffer, b)
|
|
} else if isDigit(b) {
|
|
_, _ = s.ReadByte()
|
|
s.buffer = append(s.buffer, b)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return Token{Type: TokenNumber, Value: s.buffer, Line: startLine, Column: startColumn}, nil
|
|
}
|