Mako/scanner/scanner.go

package scanner

import (
	"strconv"

	"git.sharkk.net/Sharkk/Mako/types"
)

// Scanner holds the state needed for scanning
type Scanner struct {
	source  string
	start   int // start of the current lexeme
	current int // current position in the source
	line    int // current line number
	column  int // current column number
}

// New creates a new scanner for the given source
func New(source string) *Scanner {
	return &Scanner{
		source: source,
		line:   1,
		column: 1,
	}
}

// NextToken returns the next token from the source
func (s *Scanner) NextToken() types.Token {
	s.skipWhitespace()

	s.start = s.current

	if s.isAtEnd() {
		return s.makeToken(types.EOF)
	}

	c := s.advance()

	if isAlpha(c) {
		return s.identifier()
	}

	if isDigit(c) {
		return s.number()
	}

	switch c {
	case '(':
		return s.makeToken(types.LEFT_PAREN)
	case ')':
		return s.makeToken(types.RIGHT_PAREN)
	case ',':
		return s.makeToken(types.COMMA)
	case '+':
		return s.makeToken(types.PLUS)
	case '-':
		return s.makeToken(types.MINUS)
	case '*':
		return s.makeToken(types.STAR)
	case '/':
		if s.match('/') {
			// Single-line comment
			for s.peek() != '\n' && !s.isAtEnd() {
				s.advance()
			}
			// Recursive call to get the next non-comment token
			return s.NextToken()
		} else if s.match('*') {
			// Multiline comment
			for !(s.peek() == '*' && s.peekNext() == '/') && !s.isAtEnd() {
				if s.peek() == '\n' {
					s.line++
					s.column = 0
				}
				s.advance()
			}

			if s.isAtEnd() {
				return s.errorToken("Unclosed multiline comment.")
			}

			// Consume the closing */
			s.advance() // *
			s.advance() // /

			// Recursive call to get the next non-comment token
			return s.NextToken()
		}
		return s.makeToken(types.SLASH)
	case '.':
		if s.match('.') {
			if s.match('.') {
				return s.makeToken(types.ELLIPSIS)
			}
			// Error for '..' without the third '.'
			return s.errorToken("Expected '...' (ellipsis).")
		}
		// Handle single '.' later (likely part of a number)
		// For now, error
		return s.errorToken("Unexpected '.'.")
	case '=':
		if s.match('=') {
			return s.makeToken(types.EQUAL_EQUAL)
		}
		return s.makeToken(types.EQUAL)
	case '!':
		if s.match('=') {
			return s.makeToken(types.BANG_EQUAL)
		}
		return s.errorToken("Unexpected character.")
	case '<':
		if s.match('=') {
			return s.makeToken(types.LESS_EQUAL)
		}
		return s.makeToken(types.LESS)
	case '>':
		if s.match('=') {
			return s.makeToken(types.GREATER_EQUAL)
		}
		return s.makeToken(types.GREATER)
	case '"':
		return s.string()
	}

	return s.errorToken("Unexpected character.")
}

// ScanTokens scans all tokens in the source and returns them
func (s *Scanner) ScanTokens() []types.Token {
	var tokens []types.Token

	for {
		token := s.NextToken()
		tokens = append(tokens, token)

		if token.Type == types.EOF {
			break
		}
	}

	return tokens
}

// Helper methods for scanning
func (s *Scanner) isAtEnd() bool {
	return s.current >= len(s.source)
}

func (s *Scanner) advance() byte {
	c := s.source[s.current]
	s.current++
	s.column++
	return c
}

func (s *Scanner) peek() byte {
	if s.isAtEnd() {
		return 0
	}
	return s.source[s.current]
}

func (s *Scanner) peekNext() byte {
	if s.current+1 >= len(s.source) {
		return 0
	}
	return s.source[s.current+1]
}

func (s *Scanner) match(expected byte) bool {
	if s.isAtEnd() || s.source[s.current] != expected {
		return false
	}

	s.current++
	s.column++
	return true
}

func (s *Scanner) makeToken(tokenType types.TokenType) types.Token {
	return s.makeTokenWithLiteral(tokenType, nil)
}

func (s *Scanner) makeTokenWithLiteral(tokenType types.TokenType, literal any) types.Token {
	lexeme := s.source[s.start:s.current]
	return types.Token{
		Type:    tokenType,
		Lexeme:  lexeme,
		Literal: literal,
		Line:    s.line,
		Column:  s.column - len(lexeme),
	}
}

func (s *Scanner) errorToken(message string) types.Token {
	return types.Token{
		Type:   types.ERROR,
		Lexeme: message,
		Line:   s.line,
		Column: s.column,
	}
}

func (s *Scanner) skipWhitespace() {
	for {
		c := s.peek()
		switch c {
		case ' ', '\r', '\t':
			s.advance()
		case '\n':
			s.line++
			s.column = 0 // Reset column for new line
			s.advance()
		default:
			return
		}
	}
}

func (s *Scanner) string() types.Token {
	// Scan until closing quote
	for s.peek() != '"' && !s.isAtEnd() {
		if s.peek() == '\n' {
			s.line++
			s.column = 0
		}
		s.advance()
	}

	if s.isAtEnd() {
		return s.errorToken("Unterminated string.")
	}

	// Consume the closing "
	s.advance()

	// Get the string value (without the quotes)
	value := s.source[s.start+1 : s.current-1]
	return s.makeTokenWithLiteral(types.STRING, value)
}

func (s *Scanner) number() types.Token {
	// Scan integer part
	for isDigit(s.peek()) {
		s.advance()
	}

	// Look for a decimal part
	if s.peek() == '.' && isDigit(s.peekNext()) {
		// Consume the .
		s.advance()

		// Consume decimal digits
		for isDigit(s.peek()) {
			s.advance()
		}
	}

	// Parse the number
	value, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
	if err != nil {
		return s.errorToken("Invalid number.")
	}

	return s.makeTokenWithLiteral(types.NUMBER, value)
}

func (s *Scanner) identifier() types.Token {
	for isAlphaNumeric(s.peek()) {
		s.advance()
	}

	// Check if the identifier is actually a keyword
	text := s.source[s.start:s.current]
	tokenType := s.keywordType(text)

	var literal any
	if tokenType == types.TRUE {
		literal = true
	} else if tokenType == types.FALSE {
		literal = false
	} else if tokenType == types.NIL {
		literal = nil
	}

	return s.makeTokenWithLiteral(tokenType, literal)
}

func (s *Scanner) keywordType(text string) types.TokenType {
	switch text {
	case "and":
		return types.AND
	case "or":
		return types.OR
	case "if":
		return types.IF
	case "elseif":
		return types.ELSEIF
	case "else":
		return types.ELSE
	case "then":
		return types.THEN
	case "end":
		return types.END
	case "fn":
		return types.FN
	case "return":
		return types.RETURN
	case "echo":
		return types.ECHO
	case "true":
		return types.TRUE
	case "false":
		return types.FALSE
	case "nil":
		return types.NIL
	default:
		return types.IDENTIFIER
	}
}

// Helper functions
func isDigit(c byte) bool {
	return c >= '0' && c <= '9'
}

func isAlpha(c byte) bool {
	return (c >= 'a' && c <= 'z') ||
		(c >= 'A' && c <= 'Z') ||
		c == '_'
}

func isAlphaNumeric(c byte) bool {
	return isAlpha(c) || isDigit(c)
}