Mako/lexer/lexer.go

227 lines
4.1 KiB
Go

package lexer
type TokenType byte
const (
TokenEOF TokenType = iota
TokenIdentifier
TokenString
TokenNumber
TokenEqual
TokenEcho
TokenSemicolon
TokenLeftBrace
TokenRightBrace
TokenLeftBracket
TokenRightBracket
TokenComma
TokenPlus
TokenMinus
TokenStar
TokenSlash
TokenLeftParen
TokenRightParen
TokenIf
TokenThen
TokenElse
TokenTrue
TokenFalse
TokenEqualEqual
TokenNotEqual
TokenLessThan
TokenGreaterThan
TokenLessEqual
TokenGreaterEqual
TokenEnd
)
type Token struct {
Type TokenType
Value string
}
type Lexer struct {
input string
pos int
readPos int
ch byte
}
func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}
func (l *Lexer) readChar() {
if l.readPos >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPos]
}
l.pos = l.readPos
l.readPos++
}
func (l *Lexer) NextToken() Token {
var tok Token
l.skipWhitespace()
l.skipComment()
switch l.ch {
case '=':
if l.peekChar() == '=' {
l.readChar() // consume the current '='
tok = Token{Type: TokenEqualEqual, Value: "=="}
} else {
tok = Token{Type: TokenEqual, Value: "="}
}
case '!':
if l.peekChar() == '=' {
l.readChar() // consume the current '!'
tok = Token{Type: TokenNotEqual, Value: "!="}
} else {
tok = Token{Type: TokenEOF, Value: ""} // Not supported yet
}
case '<':
if l.peekChar() == '=' {
l.readChar() // consume the current '<'
tok = Token{Type: TokenLessEqual, Value: "<="}
} else {
tok = Token{Type: TokenLessThan, Value: "<"}
}
case '>':
if l.peekChar() == '=' {
l.readChar() // consume the current '>'
tok = Token{Type: TokenGreaterEqual, Value: ">="}
} else {
tok = Token{Type: TokenGreaterThan, Value: ">"}
}
case ';':
tok = Token{Type: TokenSemicolon, Value: ";"}
case '"':
tok = Token{Type: TokenString, Value: l.readString()}
return tok
case '{':
tok = Token{Type: TokenLeftBrace, Value: "{"}
case '}':
tok = Token{Type: TokenRightBrace, Value: "}"}
case '[':
tok = Token{Type: TokenLeftBracket, Value: "["}
case ']':
tok = Token{Type: TokenRightBracket, Value: "]"}
case ',':
tok = Token{Type: TokenComma, Value: ","}
case '+':
tok = Token{Type: TokenPlus, Value: "+"}
case '-':
tok = Token{Type: TokenMinus, Value: "-"}
case '*':
tok = Token{Type: TokenStar, Value: "*"}
case '/':
tok = Token{Type: TokenSlash, Value: "/"}
case '(':
tok = Token{Type: TokenLeftParen, Value: "("}
case ')':
tok = Token{Type: TokenRightParen, Value: ")"}
case 0:
tok = Token{Type: TokenEOF, Value: ""}
default:
if isLetter(l.ch) {
tok.Value = l.readIdentifier()
switch tok.Value {
case "echo":
tok.Type = TokenEcho
case "if":
tok.Type = TokenIf
case "then":
tok.Type = TokenThen
case "else":
tok.Type = TokenElse
case "true":
tok.Type = TokenTrue
case "false":
tok.Type = TokenFalse
case "end":
tok.Type = TokenEnd
default:
tok.Type = TokenIdentifier
}
return tok
} else if isDigit(l.ch) {
tok.Type = TokenNumber
tok.Value = l.readNumber()
return tok
} else {
tok = Token{Type: TokenEOF, Value: ""}
}
}
l.readChar()
return tok
}
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
func (l *Lexer) readIdentifier() string {
pos := l.pos
for isLetter(l.ch) || isDigit(l.ch) {
l.readChar()
}
return l.input[pos:l.pos]
}
func (l *Lexer) readNumber() string {
pos := l.pos
for isDigit(l.ch) {
l.readChar()
}
return l.input[pos:l.pos]
}
func (l *Lexer) readString() string {
pos := l.pos + 1
for {
l.readChar()
if l.ch == '"' || l.ch == 0 {
break
}
}
str := l.input[pos:l.pos]
l.readChar() // Skip closing quote
return str
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func (l *Lexer) peekChar() byte {
if l.readPos >= len(l.input) {
return 0
}
return l.input[l.readPos]
}
func (l *Lexer) skipComment() {
if l.ch == '/' && l.peekChar() == '/' {
l.readChar()
l.readChar()
for l.ch != '\n' && l.ch != 0 {
l.readChar()
}
l.skipWhitespace()
}
}