Mako/scanner/scanner.go

336 lines
6.3 KiB
Go

package scanner
import (
"strconv"
"git.sharkk.net/Sharkk/Mako/types"
)
// Scanner holds the state needed for scanning
type Scanner struct {
source string
start int // start of the current lexeme
current int // current position in the source
line int // current line number
column int // current column number
}
// New creates a new scanner for the given source
func New(source string) *Scanner {
return &Scanner{
source: source,
line: 1,
column: 1,
}
}
// NextToken returns the next token from the source
func (s *Scanner) NextToken() types.Token {
s.skipWhitespace()
s.start = s.current
if s.isAtEnd() {
return s.makeToken(types.EOF)
}
c := s.advance()
if isAlpha(c) {
return s.identifier()
}
if isDigit(c) {
return s.number()
}
switch c {
case '(':
return s.makeToken(types.LEFT_PAREN)
case ')':
return s.makeToken(types.RIGHT_PAREN)
case ',':
return s.makeToken(types.COMMA)
case '+':
return s.makeToken(types.PLUS)
case '-':
return s.makeToken(types.MINUS)
case '*':
return s.makeToken(types.STAR)
case '/':
if s.match('/') {
// Single-line comment
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
// Recursive call to get the next non-comment token
return s.NextToken()
} else if s.match('*') {
// Multiline comment
for !(s.peek() == '*' && s.peekNext() == '/') && !s.isAtEnd() {
if s.peek() == '\n' {
s.line++
s.column = 0
}
s.advance()
}
if s.isAtEnd() {
return s.errorToken("Unclosed multiline comment.")
}
// Consume the closing */
s.advance() // *
s.advance() // /
// Recursive call to get the next non-comment token
return s.NextToken()
}
return s.makeToken(types.SLASH)
case '.':
if s.match('.') {
if s.match('.') {
return s.makeToken(types.ELLIPSIS)
}
// Error for '..' without the third '.'
return s.errorToken("Expected '...' (ellipsis).")
}
// Handle single '.' later (likely part of a number)
// For now, error
return s.errorToken("Unexpected '.'.")
case '=':
if s.match('=') {
return s.makeToken(types.EQUAL_EQUAL)
}
return s.makeToken(types.EQUAL)
case '!':
if s.match('=') {
return s.makeToken(types.BANG_EQUAL)
}
return s.errorToken("Unexpected character.")
case '<':
if s.match('=') {
return s.makeToken(types.LESS_EQUAL)
}
return s.makeToken(types.LESS)
case '>':
if s.match('=') {
return s.makeToken(types.GREATER_EQUAL)
}
return s.makeToken(types.GREATER)
case '"':
return s.string()
}
return s.errorToken("Unexpected character.")
}
// ScanTokens scans all tokens in the source and returns them
func (s *Scanner) ScanTokens() []types.Token {
var tokens []types.Token
for {
token := s.NextToken()
tokens = append(tokens, token)
if token.Type == types.EOF {
break
}
}
return tokens
}
// Helper methods for scanning
func (s *Scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
func (s *Scanner) advance() byte {
c := s.source[s.current]
s.current++
s.column++
return c
}
func (s *Scanner) peek() byte {
if s.isAtEnd() {
return 0
}
return s.source[s.current]
}
func (s *Scanner) peekNext() byte {
if s.current+1 >= len(s.source) {
return 0
}
return s.source[s.current+1]
}
func (s *Scanner) match(expected byte) bool {
if s.isAtEnd() || s.source[s.current] != expected {
return false
}
s.current++
s.column++
return true
}
func (s *Scanner) makeToken(tokenType types.TokenType) types.Token {
return s.makeTokenWithLiteral(tokenType, nil)
}
func (s *Scanner) makeTokenWithLiteral(tokenType types.TokenType, literal any) types.Token {
lexeme := s.source[s.start:s.current]
return types.Token{
Type: tokenType,
Lexeme: lexeme,
Literal: literal,
Line: s.line,
Column: s.column - len(lexeme),
}
}
func (s *Scanner) errorToken(message string) types.Token {
return types.Token{
Type: types.ERROR,
Lexeme: message,
Line: s.line,
Column: s.column,
}
}
func (s *Scanner) skipWhitespace() {
for {
c := s.peek()
switch c {
case ' ', '\r', '\t':
s.advance()
case '\n':
s.line++
s.column = 0 // Reset column for new line
s.advance()
default:
return
}
}
}
func (s *Scanner) string() types.Token {
// Scan until closing quote
for s.peek() != '"' && !s.isAtEnd() {
if s.peek() == '\n' {
s.line++
s.column = 0
}
s.advance()
}
if s.isAtEnd() {
return s.errorToken("Unterminated string.")
}
// Consume the closing "
s.advance()
// Get the string value (without the quotes)
value := s.source[s.start+1 : s.current-1]
return s.makeTokenWithLiteral(types.STRING, value)
}
func (s *Scanner) number() types.Token {
// Scan integer part
for isDigit(s.peek()) {
s.advance()
}
// Look for a decimal part
if s.peek() == '.' && isDigit(s.peekNext()) {
// Consume the .
s.advance()
// Consume decimal digits
for isDigit(s.peek()) {
s.advance()
}
}
// Parse the number
value, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
if err != nil {
return s.errorToken("Invalid number.")
}
return s.makeTokenWithLiteral(types.NUMBER, value)
}
func (s *Scanner) identifier() types.Token {
for isAlphaNumeric(s.peek()) {
s.advance()
}
// Check if the identifier is actually a keyword
text := s.source[s.start:s.current]
tokenType := s.keywordType(text)
var literal any
if tokenType == types.TRUE {
literal = true
} else if tokenType == types.FALSE {
literal = false
} else if tokenType == types.NIL {
literal = nil
}
return s.makeTokenWithLiteral(tokenType, literal)
}
func (s *Scanner) keywordType(text string) types.TokenType {
switch text {
case "and":
return types.AND
case "or":
return types.OR
case "if":
return types.IF
case "elseif":
return types.ELSEIF
case "else":
return types.ELSE
case "then":
return types.THEN
case "end":
return types.END
case "fn":
return types.FN
case "return":
return types.RETURN
case "echo":
return types.ECHO
case "true":
return types.TRUE
case "false":
return types.FALSE
case "nil":
return types.NIL
default:
return types.IDENTIFIER
}
}
// Helper functions
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isAlpha(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_'
}
func isAlphaNumeric(c byte) bool {
return isAlpha(c) || isDigit(c)
}