package scanner import ( "strconv" "git.sharkk.net/Sharkk/Mako/types" ) // Scanner holds the state needed for scanning type Scanner struct { source string start int // start of the current lexeme current int // current position in the source line int // current line number column int // current column number } // New creates a new scanner for the given source func New(source string) *Scanner { return &Scanner{ source: source, line: 1, column: 1, } } // NextToken returns the next token from the source func (s *Scanner) NextToken() types.Token { s.skipWhitespace() s.start = s.current if s.isAtEnd() { return s.makeToken(types.EOF) } c := s.advance() if isAlpha(c) { return s.identifier() } if isDigit(c) { return s.number() } switch c { case '(': return s.makeToken(types.LEFT_PAREN) case ')': return s.makeToken(types.RIGHT_PAREN) case ',': return s.makeToken(types.COMMA) case '+': return s.makeToken(types.PLUS) case '-': return s.makeToken(types.MINUS) case '*': return s.makeToken(types.STAR) case '/': if s.match('/') { // Single-line comment for s.peek() != '\n' && !s.isAtEnd() { s.advance() } // Recursive call to get the next non-comment token return s.NextToken() } else if s.match('*') { // Multiline comment for !(s.peek() == '*' && s.peekNext() == '/') && !s.isAtEnd() { if s.peek() == '\n' { s.line++ s.column = 0 } s.advance() } if s.isAtEnd() { return s.errorToken("Unclosed multiline comment.") } // Consume the closing */ s.advance() // * s.advance() // / // Recursive call to get the next non-comment token return s.NextToken() } return s.makeToken(types.SLASH) case '.': if s.match('.') { if s.match('.') { return s.makeToken(types.ELLIPSIS) } // Error for '..' without the third '.' return s.errorToken("Expected '...' (ellipsis).") } // Handle single '.' later (likely part of a number) // For now, error return s.errorToken("Unexpected '.'.") case '=': if s.match('=') { return s.makeToken(types.EQUAL_EQUAL) } return s.makeToken(types.EQUAL) case '!': if s.match('=') { return s.makeToken(types.BANG_EQUAL) } return s.errorToken("Unexpected character.") case '<': if s.match('=') { return s.makeToken(types.LESS_EQUAL) } return s.makeToken(types.LESS) case '>': if s.match('=') { return s.makeToken(types.GREATER_EQUAL) } return s.makeToken(types.GREATER) case '"': return s.string() } return s.errorToken("Unexpected character.") } // ScanTokens scans all tokens in the source and returns them func (s *Scanner) ScanTokens() []types.Token { var tokens []types.Token for { token := s.NextToken() tokens = append(tokens, token) if token.Type == types.EOF { break } } return tokens } // Helper methods for scanning func (s *Scanner) isAtEnd() bool { return s.current >= len(s.source) } func (s *Scanner) advance() byte { c := s.source[s.current] s.current++ s.column++ return c } func (s *Scanner) peek() byte { if s.isAtEnd() { return 0 } return s.source[s.current] } func (s *Scanner) peekNext() byte { if s.current+1 >= len(s.source) { return 0 } return s.source[s.current+1] } func (s *Scanner) match(expected byte) bool { if s.isAtEnd() || s.source[s.current] != expected { return false } s.current++ s.column++ return true } func (s *Scanner) makeToken(tokenType types.TokenType) types.Token { return s.makeTokenWithLiteral(tokenType, nil) } func (s *Scanner) makeTokenWithLiteral(tokenType types.TokenType, literal any) types.Token { lexeme := s.source[s.start:s.current] return types.Token{ Type: tokenType, Lexeme: lexeme, Literal: literal, Line: s.line, Column: s.column - len(lexeme), } } func (s *Scanner) errorToken(message string) types.Token { return types.Token{ Type: types.ERROR, Lexeme: message, Line: s.line, Column: s.column, } } func (s *Scanner) skipWhitespace() { for { c := s.peek() switch c { case ' ', '\r', '\t': s.advance() case '\n': s.line++ s.column = 0 // Reset column for new line s.advance() default: return } } } func (s *Scanner) string() types.Token { // Scan until closing quote for s.peek() != '"' && !s.isAtEnd() { if s.peek() == '\n' { s.line++ s.column = 0 } s.advance() } if s.isAtEnd() { return s.errorToken("Unterminated string.") } // Consume the closing " s.advance() // Get the string value (without the quotes) value := s.source[s.start+1 : s.current-1] return s.makeTokenWithLiteral(types.STRING, value) } func (s *Scanner) number() types.Token { // Scan integer part for isDigit(s.peek()) { s.advance() } // Look for a decimal part if s.peek() == '.' && isDigit(s.peekNext()) { // Consume the . s.advance() // Consume decimal digits for isDigit(s.peek()) { s.advance() } } // Parse the number value, err := strconv.ParseFloat(s.source[s.start:s.current], 64) if err != nil { return s.errorToken("Invalid number.") } return s.makeTokenWithLiteral(types.NUMBER, value) } func (s *Scanner) identifier() types.Token { for isAlphaNumeric(s.peek()) { s.advance() } // Check if the identifier is actually a keyword text := s.source[s.start:s.current] tokenType := s.keywordType(text) var literal any if tokenType == types.TRUE { literal = true } else if tokenType == types.FALSE { literal = false } else if tokenType == types.NIL { literal = nil } return s.makeTokenWithLiteral(tokenType, literal) } func (s *Scanner) keywordType(text string) types.TokenType { switch text { case "and": return types.AND case "or": return types.OR case "if": return types.IF case "elseif": return types.ELSEIF case "else": return types.ELSE case "then": return types.THEN case "end": return types.END case "fn": return types.FN case "return": return types.RETURN case "echo": return types.ECHO case "true": return types.TRUE case "false": return types.FALSE case "nil": return types.NIL default: return types.IDENTIFIER } } // Helper functions func isDigit(c byte) bool { return c >= '0' && c <= '9' } func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' } func isAlphaNumeric(c byte) bool { return isAlpha(c) || isDigit(c) }