336 lines
6.3 KiB
Go
336 lines
6.3 KiB
Go
package scanner
|
|
|
|
import (
|
|
"strconv"
|
|
|
|
"git.sharkk.net/Sharkk/Mako/types"
|
|
)
|
|
|
|
// Scanner holds the state needed for scanning
|
|
type Scanner struct {
|
|
source string
|
|
start int // start of the current lexeme
|
|
current int // current position in the source
|
|
line int // current line number
|
|
column int // current column number
|
|
}
|
|
|
|
// New creates a new scanner for the given source
|
|
func New(source string) *Scanner {
|
|
return &Scanner{
|
|
source: source,
|
|
line: 1,
|
|
column: 1,
|
|
}
|
|
}
|
|
|
|
// NextToken returns the next token from the source
|
|
func (s *Scanner) NextToken() types.Token {
|
|
s.skipWhitespace()
|
|
|
|
s.start = s.current
|
|
|
|
if s.isAtEnd() {
|
|
return s.makeToken(types.EOF)
|
|
}
|
|
|
|
c := s.advance()
|
|
|
|
if isAlpha(c) {
|
|
return s.identifier()
|
|
}
|
|
|
|
if isDigit(c) {
|
|
return s.number()
|
|
}
|
|
|
|
switch c {
|
|
case '(':
|
|
return s.makeToken(types.LEFT_PAREN)
|
|
case ')':
|
|
return s.makeToken(types.RIGHT_PAREN)
|
|
case ',':
|
|
return s.makeToken(types.COMMA)
|
|
case '+':
|
|
return s.makeToken(types.PLUS)
|
|
case '-':
|
|
return s.makeToken(types.MINUS)
|
|
case '*':
|
|
return s.makeToken(types.STAR)
|
|
case '/':
|
|
if s.match('/') {
|
|
// Single-line comment
|
|
for s.peek() != '\n' && !s.isAtEnd() {
|
|
s.advance()
|
|
}
|
|
// Recursive call to get the next non-comment token
|
|
return s.NextToken()
|
|
} else if s.match('*') {
|
|
// Multiline comment
|
|
for !(s.peek() == '*' && s.peekNext() == '/') && !s.isAtEnd() {
|
|
if s.peek() == '\n' {
|
|
s.line++
|
|
s.column = 0
|
|
}
|
|
s.advance()
|
|
}
|
|
|
|
if s.isAtEnd() {
|
|
return s.errorToken("Unclosed multiline comment.")
|
|
}
|
|
|
|
// Consume the closing */
|
|
s.advance() // *
|
|
s.advance() // /
|
|
|
|
// Recursive call to get the next non-comment token
|
|
return s.NextToken()
|
|
}
|
|
return s.makeToken(types.SLASH)
|
|
case '.':
|
|
if s.match('.') {
|
|
if s.match('.') {
|
|
return s.makeToken(types.ELLIPSIS)
|
|
}
|
|
// Error for '..' without the third '.'
|
|
return s.errorToken("Expected '...' (ellipsis).")
|
|
}
|
|
// Handle single '.' later (likely part of a number)
|
|
// For now, error
|
|
return s.errorToken("Unexpected '.'.")
|
|
case '=':
|
|
if s.match('=') {
|
|
return s.makeToken(types.EQUAL_EQUAL)
|
|
}
|
|
return s.makeToken(types.EQUAL)
|
|
case '!':
|
|
if s.match('=') {
|
|
return s.makeToken(types.BANG_EQUAL)
|
|
}
|
|
return s.errorToken("Unexpected character.")
|
|
case '<':
|
|
if s.match('=') {
|
|
return s.makeToken(types.LESS_EQUAL)
|
|
}
|
|
return s.makeToken(types.LESS)
|
|
case '>':
|
|
if s.match('=') {
|
|
return s.makeToken(types.GREATER_EQUAL)
|
|
}
|
|
return s.makeToken(types.GREATER)
|
|
case '"':
|
|
return s.string()
|
|
}
|
|
|
|
return s.errorToken("Unexpected character.")
|
|
}
|
|
|
|
// ScanTokens scans all tokens in the source and returns them
|
|
func (s *Scanner) ScanTokens() []types.Token {
|
|
var tokens []types.Token
|
|
|
|
for {
|
|
token := s.NextToken()
|
|
tokens = append(tokens, token)
|
|
|
|
if token.Type == types.EOF {
|
|
break
|
|
}
|
|
}
|
|
|
|
return tokens
|
|
}
|
|
|
|
// Helper methods for scanning
|
|
func (s *Scanner) isAtEnd() bool {
|
|
return s.current >= len(s.source)
|
|
}
|
|
|
|
func (s *Scanner) advance() byte {
|
|
c := s.source[s.current]
|
|
s.current++
|
|
s.column++
|
|
return c
|
|
}
|
|
|
|
func (s *Scanner) peek() byte {
|
|
if s.isAtEnd() {
|
|
return 0
|
|
}
|
|
return s.source[s.current]
|
|
}
|
|
|
|
func (s *Scanner) peekNext() byte {
|
|
if s.current+1 >= len(s.source) {
|
|
return 0
|
|
}
|
|
return s.source[s.current+1]
|
|
}
|
|
|
|
func (s *Scanner) match(expected byte) bool {
|
|
if s.isAtEnd() || s.source[s.current] != expected {
|
|
return false
|
|
}
|
|
|
|
s.current++
|
|
s.column++
|
|
return true
|
|
}
|
|
|
|
func (s *Scanner) makeToken(tokenType types.TokenType) types.Token {
|
|
return s.makeTokenWithLiteral(tokenType, nil)
|
|
}
|
|
|
|
func (s *Scanner) makeTokenWithLiteral(tokenType types.TokenType, literal any) types.Token {
|
|
lexeme := s.source[s.start:s.current]
|
|
return types.Token{
|
|
Type: tokenType,
|
|
Lexeme: lexeme,
|
|
Literal: literal,
|
|
Line: s.line,
|
|
Column: s.column - len(lexeme),
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) errorToken(message string) types.Token {
|
|
return types.Token{
|
|
Type: types.ERROR,
|
|
Lexeme: message,
|
|
Line: s.line,
|
|
Column: s.column,
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) skipWhitespace() {
|
|
for {
|
|
c := s.peek()
|
|
switch c {
|
|
case ' ', '\r', '\t':
|
|
s.advance()
|
|
case '\n':
|
|
s.line++
|
|
s.column = 0 // Reset column for new line
|
|
s.advance()
|
|
default:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) string() types.Token {
|
|
// Scan until closing quote
|
|
for s.peek() != '"' && !s.isAtEnd() {
|
|
if s.peek() == '\n' {
|
|
s.line++
|
|
s.column = 0
|
|
}
|
|
s.advance()
|
|
}
|
|
|
|
if s.isAtEnd() {
|
|
return s.errorToken("Unterminated string.")
|
|
}
|
|
|
|
// Consume the closing "
|
|
s.advance()
|
|
|
|
// Get the string value (without the quotes)
|
|
value := s.source[s.start+1 : s.current-1]
|
|
return s.makeTokenWithLiteral(types.STRING, value)
|
|
}
|
|
|
|
func (s *Scanner) number() types.Token {
|
|
// Scan integer part
|
|
for isDigit(s.peek()) {
|
|
s.advance()
|
|
}
|
|
|
|
// Look for a decimal part
|
|
if s.peek() == '.' && isDigit(s.peekNext()) {
|
|
// Consume the .
|
|
s.advance()
|
|
|
|
// Consume decimal digits
|
|
for isDigit(s.peek()) {
|
|
s.advance()
|
|
}
|
|
}
|
|
|
|
// Parse the number
|
|
value, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
|
|
if err != nil {
|
|
return s.errorToken("Invalid number.")
|
|
}
|
|
|
|
return s.makeTokenWithLiteral(types.NUMBER, value)
|
|
}
|
|
|
|
func (s *Scanner) identifier() types.Token {
|
|
for isAlphaNumeric(s.peek()) {
|
|
s.advance()
|
|
}
|
|
|
|
// Check if the identifier is actually a keyword
|
|
text := s.source[s.start:s.current]
|
|
tokenType := s.keywordType(text)
|
|
|
|
var literal any
|
|
if tokenType == types.TRUE {
|
|
literal = true
|
|
} else if tokenType == types.FALSE {
|
|
literal = false
|
|
} else if tokenType == types.NIL {
|
|
literal = nil
|
|
}
|
|
|
|
return s.makeTokenWithLiteral(tokenType, literal)
|
|
}
|
|
|
|
func (s *Scanner) keywordType(text string) types.TokenType {
|
|
switch text {
|
|
case "and":
|
|
return types.AND
|
|
case "or":
|
|
return types.OR
|
|
case "if":
|
|
return types.IF
|
|
case "elseif":
|
|
return types.ELSEIF
|
|
case "else":
|
|
return types.ELSE
|
|
case "then":
|
|
return types.THEN
|
|
case "end":
|
|
return types.END
|
|
case "fn":
|
|
return types.FN
|
|
case "return":
|
|
return types.RETURN
|
|
case "echo":
|
|
return types.ECHO
|
|
case "true":
|
|
return types.TRUE
|
|
case "false":
|
|
return types.FALSE
|
|
case "nil":
|
|
return types.NIL
|
|
default:
|
|
return types.IDENTIFIER
|
|
}
|
|
}
|
|
|
|
// Helper functions
|
|
func isDigit(c byte) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isAlpha(c byte) bool {
|
|
return (c >= 'a' && c <= 'z') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
c == '_'
|
|
}
|
|
|
|
func isAlphaNumeric(c byte) bool {
|
|
return isAlpha(c) || isDigit(c)
|
|
}
|