scanner 1

This commit is contained in:
Sky Johnson 2025-05-07 08:38:33 -05:00
parent d4aa7cc282
commit 9716e88dcb
4 changed files with 553 additions and 2 deletions

2
go.mod
View File

@ -1,3 +1,5 @@
module git.sharkk.net/Sharkk/Mako
go 1.24.1
require git.sharkk.net/Go/Assert v1.1.0

4
go.sum
View File

@ -1,2 +1,2 @@
git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac h1:B6iLK3nv2ubDfk5Ve9Z2sRPqpTgPWgsm7PyaWlwr3NY=
git.sharkk.net/Go/Assert v0.0.0-20250426205601-1b0e5ea6e7ac/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM=
git.sharkk.net/Go/Assert v1.1.0 h1:1Nbu8C9vmv3gXaLR4S+NBXfQ01gnh3IHHD7PQRIVIe8=
git.sharkk.net/Go/Assert v1.1.0/go.mod h1:7AMVm0RCtLlQfWsnKs6h/IdSfzj52/o0nR03rCW68gM=

315
scanner/scanner.go Normal file
View File

@ -0,0 +1,315 @@
package scanner
import (
"strconv"
"git.sharkk.net/Sharkk/Mako/types"
)
// Scanner holds the state needed for scanning
type Scanner struct {
source string
start int // start of the current lexeme
current int // current position in the source
line int // current line number
column int // current column number
}
// New creates a new scanner for the given source
func New(source string) *Scanner {
return &Scanner{
source: source,
line: 1,
column: 1,
}
}
// NextToken returns the next token from the source
func (s *Scanner) NextToken() types.Token {
s.skipWhitespace()
s.start = s.current
if s.isAtEnd() {
return s.makeToken(types.EOF)
}
c := s.advance()
if isAlpha(c) {
return s.identifier()
}
if isDigit(c) {
return s.number()
}
switch c {
case '(':
return s.makeToken(types.LEFT_PAREN)
case ')':
return s.makeToken(types.RIGHT_PAREN)
case ',':
return s.makeToken(types.COMMA)
case '+':
return s.makeToken(types.PLUS)
case '-':
return s.makeToken(types.MINUS)
case '*':
return s.makeToken(types.STAR)
case '/':
if s.match('/') {
// Comment goes until end of line
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
// Recursive call to get the next non-comment token
return s.NextToken()
}
return s.makeToken(types.SLASH)
case '.':
if s.match('.') {
if s.match('.') {
return s.makeToken(types.ELLIPSIS)
}
// Error for '..' without the third '.'
return s.errorToken("Expected '...' (ellipsis).")
}
// Handle single '.' later (likely part of a number)
// For now, error
return s.errorToken("Unexpected '.'.")
case '=':
if s.match('=') {
return s.makeToken(types.EQUAL_EQUAL)
}
return s.makeToken(types.EQUAL)
case '!':
if s.match('=') {
return s.makeToken(types.BANG_EQUAL)
}
return s.errorToken("Unexpected character.")
case '<':
if s.match('=') {
return s.makeToken(types.LESS_EQUAL)
}
return s.makeToken(types.LESS)
case '>':
if s.match('=') {
return s.makeToken(types.GREATER_EQUAL)
}
return s.makeToken(types.GREATER)
case '"':
return s.string()
}
return s.errorToken("Unexpected character.")
}
// ScanTokens scans all tokens in the source and returns them
func (s *Scanner) ScanTokens() []types.Token {
var tokens []types.Token
for {
token := s.NextToken()
tokens = append(tokens, token)
if token.Type == types.EOF {
break
}
}
return tokens
}
// Helper methods for scanning
func (s *Scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
func (s *Scanner) advance() byte {
c := s.source[s.current]
s.current++
s.column++
return c
}
func (s *Scanner) peek() byte {
if s.isAtEnd() {
return 0
}
return s.source[s.current]
}
func (s *Scanner) peekNext() byte {
if s.current+1 >= len(s.source) {
return 0
}
return s.source[s.current+1]
}
func (s *Scanner) match(expected byte) bool {
if s.isAtEnd() || s.source[s.current] != expected {
return false
}
s.current++
s.column++
return true
}
func (s *Scanner) makeToken(tokenType types.TokenType) types.Token {
return s.makeTokenWithLiteral(tokenType, nil)
}
func (s *Scanner) makeTokenWithLiteral(tokenType types.TokenType, literal any) types.Token {
lexeme := s.source[s.start:s.current]
return types.Token{
Type: tokenType,
Lexeme: lexeme,
Literal: literal,
Line: s.line,
Column: s.column - len(lexeme),
}
}
func (s *Scanner) errorToken(message string) types.Token {
return types.Token{
Type: types.ERROR,
Lexeme: message,
Line: s.line,
Column: s.column,
}
}
func (s *Scanner) skipWhitespace() {
for {
c := s.peek()
switch c {
case ' ', '\r', '\t':
s.advance()
case '\n':
s.line++
s.column = 0 // Reset column for new line
s.advance()
default:
return
}
}
}
func (s *Scanner) string() types.Token {
// Scan until closing quote
for s.peek() != '"' && !s.isAtEnd() {
if s.peek() == '\n' {
s.line++
s.column = 0
}
s.advance()
}
if s.isAtEnd() {
return s.errorToken("Unterminated string.")
}
// Consume the closing "
s.advance()
// Get the string value (without the quotes)
value := s.source[s.start+1 : s.current-1]
return s.makeTokenWithLiteral(types.STRING, value)
}
func (s *Scanner) number() types.Token {
// Scan integer part
for isDigit(s.peek()) {
s.advance()
}
// Look for a decimal part
if s.peek() == '.' && isDigit(s.peekNext()) {
// Consume the .
s.advance()
// Consume decimal digits
for isDigit(s.peek()) {
s.advance()
}
}
// Parse the number
value, err := strconv.ParseFloat(s.source[s.start:s.current], 64)
if err != nil {
return s.errorToken("Invalid number.")
}
return s.makeTokenWithLiteral(types.NUMBER, value)
}
func (s *Scanner) identifier() types.Token {
for isAlphaNumeric(s.peek()) {
s.advance()
}
// Check if the identifier is actually a keyword
text := s.source[s.start:s.current]
tokenType := s.keywordType(text)
var literal any
if tokenType == types.TRUE {
literal = true
} else if tokenType == types.FALSE {
literal = false
} else if tokenType == types.NIL {
literal = nil
}
return s.makeTokenWithLiteral(tokenType, literal)
}
func (s *Scanner) keywordType(text string) types.TokenType {
switch text {
case "and":
return types.AND
case "or":
return types.OR
case "if":
return types.IF
case "elseif":
return types.ELSEIF
case "else":
return types.ELSE
case "then":
return types.THEN
case "end":
return types.END
case "fn":
return types.FN
case "return":
return types.RETURN
case "echo":
return types.ECHO
case "true":
return types.TRUE
case "false":
return types.FALSE
case "nil":
return types.NIL
default:
return types.IDENTIFIER
}
}
// Helper functions
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isAlpha(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_'
}
func isAlphaNumeric(c byte) bool {
return isAlpha(c) || isDigit(c)
}

234
scanner/scanner_test.go Normal file
View File

@ -0,0 +1,234 @@
package scanner_test
import (
"testing"
assert "git.sharkk.net/Go/Assert"
"git.sharkk.net/Sharkk/Mako/scanner"
"git.sharkk.net/Sharkk/Mako/types"
)
// Helper function to check token equality
func checkToken(t *testing.T, token types.Token, expectedType types.TokenType, expectedLexeme string, expectedLine int, expectedColumn int) {
assert.Equal(t, expectedType, token.Type)
assert.Equal(t, expectedLexeme, token.Lexeme)
assert.Equal(t, expectedLine, token.Line)
assert.Equal(t, expectedColumn, token.Column)
}
func TestSingleTokens(t *testing.T) {
tests := []struct {
source string
tokType types.TokenType
lexeme string
line int
column int
}{
{"(", types.LEFT_PAREN, "(", 1, 1},
{")", types.RIGHT_PAREN, ")", 1, 1},
{",", types.COMMA, ",", 1, 1},
{"+", types.PLUS, "+", 1, 1},
{"-", types.MINUS, "-", 1, 1},
{"*", types.STAR, "*", 1, 1},
{"/", types.SLASH, "/", 1, 1},
{"=", types.EQUAL, "=", 1, 1},
{"==", types.EQUAL_EQUAL, "==", 1, 1},
{"!=", types.BANG_EQUAL, "!=", 1, 1},
{"<", types.LESS, "<", 1, 1},
{"<=", types.LESS_EQUAL, "<=", 1, 1},
{">", types.GREATER, ">", 1, 1},
{">=", types.GREATER_EQUAL, ">=", 1, 1},
{"if", types.IF, "if", 1, 1},
{"then", types.THEN, "then", 1, 1},
{"elseif", types.ELSEIF, "elseif", 1, 1},
{"else", types.ELSE, "else", 1, 1},
{"end", types.END, "end", 1, 1},
{"fn", types.FN, "fn", 1, 1},
{"return", types.RETURN, "return", 1, 1},
{"echo", types.ECHO, "echo", 1, 1},
{"true", types.TRUE, "true", 1, 1},
{"false", types.FALSE, "false", 1, 1},
{"nil", types.NIL, "nil", 1, 1},
{"and", types.AND, "and", 1, 1},
{"or", types.OR, "or", 1, 1},
{"identifier", types.IDENTIFIER, "identifier", 1, 1},
{"...", types.ELLIPSIS, "...", 1, 1},
}
for _, test := range tests {
s := scanner.New(test.source)
token := s.NextToken()
checkToken(t, token, test.tokType, test.lexeme, test.line, test.column)
// Next token should be EOF
token = s.NextToken()
checkToken(t, token, types.EOF, "", test.line, test.column+len(test.lexeme))
}
}
func TestNumbers(t *testing.T) {
tests := []struct {
source string
lexeme string
value float64
}{
{"123", "123", 123.0},
{"123.456", "123.456", 123.456},
{"0.123", "0.123", 0.123},
{"0", "0", 0.0},
}
for _, test := range tests {
s := scanner.New(test.source)
token := s.NextToken()
assert.Equal(t, types.NUMBER, token.Type)
assert.Equal(t, test.lexeme, token.Lexeme)
assert.Equal(t, test.value, token.Literal.(float64))
}
}
func TestStrings(t *testing.T) {
tests := []struct {
source string
lexeme string
value string
}{
{"\"hello\"", "\"hello\"", "hello"},
{"\"\"", "\"\"", ""},
{"\"hello world\"", "\"hello world\"", "hello world"},
}
for _, test := range tests {
s := scanner.New(test.source)
token := s.NextToken()
assert.Equal(t, types.STRING, token.Type)
assert.Equal(t, test.lexeme, token.Lexeme)
assert.Equal(t, test.value, token.Literal.(string))
}
}
func TestComments(t *testing.T) {
s := scanner.New("// This is a comment\nx = 5")
token := s.NextToken()
checkToken(t, token, types.IDENTIFIER, "x", 2, 1)
token = s.NextToken()
checkToken(t, token, types.EQUAL, "=", 2, 3)
token = s.NextToken()
checkToken(t, token, types.NUMBER, "5", 2, 5)
token = s.NextToken()
checkToken(t, token, types.EOF, "", 2, 6)
}
func TestMultipleTokens(t *testing.T) {
source := "fn add(a, b) return a + b end"
s := scanner.New(source)
expected := []struct {
tokType types.TokenType
lexeme string
}{
{types.FN, "fn"},
{types.IDENTIFIER, "add"},
{types.LEFT_PAREN, "("},
{types.IDENTIFIER, "a"},
{types.COMMA, ","},
{types.IDENTIFIER, "b"},
{types.RIGHT_PAREN, ")"},
{types.RETURN, "return"},
{types.IDENTIFIER, "a"},
{types.PLUS, "+"},
{types.IDENTIFIER, "b"},
{types.END, "end"},
{types.EOF, ""},
}
for _, exp := range expected {
token := s.NextToken()
assert.Equal(t, exp.tokType, token.Type)
assert.Equal(t, exp.lexeme, token.Lexeme)
}
}
func TestScanTokens(t *testing.T) {
source := "fn add(a, b) return a + b end"
s := scanner.New(source)
tokens := s.ScanTokens()
assert.Equal(t, 13, len(tokens))
assert.Equal(t, types.FN, tokens[0].Type)
assert.Equal(t, types.EOF, tokens[12].Type)
}
func TestLineAndColumn(t *testing.T) {
source := "x = 1\ny = 2"
s := scanner.New(source)
token := s.NextToken() // x
checkToken(t, token, types.IDENTIFIER, "x", 1, 1)
token = s.NextToken() // =
checkToken(t, token, types.EQUAL, "=", 1, 3)
token = s.NextToken() // 1
checkToken(t, token, types.NUMBER, "1", 1, 5)
token = s.NextToken() // y
checkToken(t, token, types.IDENTIFIER, "y", 2, 1)
token = s.NextToken() // =
checkToken(t, token, types.EQUAL, "=", 2, 3)
token = s.NextToken() // 2
checkToken(t, token, types.NUMBER, "2", 2, 5)
}
func TestErrors(t *testing.T) {
// Unterminated string
s := scanner.New("\"unterminated")
token := s.NextToken()
assert.Equal(t, types.ERROR, token.Type)
// Invalid character
s = scanner.New("@")
token = s.NextToken()
assert.Equal(t, types.ERROR, token.Type)
// Standalone ! without =
s = scanner.New("!")
token = s.NextToken()
assert.Equal(t, types.ERROR, token.Type)
}
func TestLiterals(t *testing.T) {
// Test true literal
s := scanner.New("true")
token := s.NextToken()
assert.Equal(t, types.TRUE, token.Type)
assert.Equal(t, true, token.Literal.(bool))
// Test false literal
s = scanner.New("false")
token = s.NextToken()
assert.Equal(t, types.FALSE, token.Type)
assert.Equal(t, false, token.Literal.(bool))
// Test nil literal
s = scanner.New("nil")
token = s.NextToken()
assert.Equal(t, types.NIL, token.Type)
assert.Nil(t, token.Literal)
}
func TestWhitespace(t *testing.T) {
s := scanner.New(" \t \r\n x")
token := s.NextToken()
checkToken(t, token, types.IDENTIFIER, "x", 2, 3)
}