227 lines
4.1 KiB
Go
227 lines
4.1 KiB
Go
package lexer
|
|
|
|
type TokenType byte
|
|
|
|
const (
|
|
TokenEOF TokenType = iota
|
|
TokenIdentifier
|
|
TokenString
|
|
TokenNumber
|
|
TokenEqual
|
|
TokenEcho
|
|
TokenSemicolon
|
|
TokenLeftBrace
|
|
TokenRightBrace
|
|
TokenLeftBracket
|
|
TokenRightBracket
|
|
TokenComma
|
|
TokenPlus
|
|
TokenMinus
|
|
TokenStar
|
|
TokenSlash
|
|
TokenLeftParen
|
|
TokenRightParen
|
|
TokenIf
|
|
TokenThen
|
|
TokenElse
|
|
TokenTrue
|
|
TokenFalse
|
|
TokenEqualEqual
|
|
TokenNotEqual
|
|
TokenLessThan
|
|
TokenGreaterThan
|
|
TokenLessEqual
|
|
TokenGreaterEqual
|
|
TokenEnd
|
|
)
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Value string
|
|
}
|
|
|
|
type Lexer struct {
|
|
input string
|
|
pos int
|
|
readPos int
|
|
ch byte
|
|
}
|
|
|
|
func New(input string) *Lexer {
|
|
l := &Lexer{input: input}
|
|
l.readChar()
|
|
return l
|
|
}
|
|
|
|
func (l *Lexer) readChar() {
|
|
if l.readPos >= len(l.input) {
|
|
l.ch = 0
|
|
} else {
|
|
l.ch = l.input[l.readPos]
|
|
}
|
|
l.pos = l.readPos
|
|
l.readPos++
|
|
}
|
|
|
|
func (l *Lexer) NextToken() Token {
|
|
var tok Token
|
|
|
|
l.skipWhitespace()
|
|
l.skipComment()
|
|
|
|
switch l.ch {
|
|
case '=':
|
|
if l.peekChar() == '=' {
|
|
l.readChar() // consume the current '='
|
|
tok = Token{Type: TokenEqualEqual, Value: "=="}
|
|
} else {
|
|
tok = Token{Type: TokenEqual, Value: "="}
|
|
}
|
|
case '!':
|
|
if l.peekChar() == '=' {
|
|
l.readChar() // consume the current '!'
|
|
tok = Token{Type: TokenNotEqual, Value: "!="}
|
|
} else {
|
|
tok = Token{Type: TokenEOF, Value: ""} // Not supported yet
|
|
}
|
|
case '<':
|
|
if l.peekChar() == '=' {
|
|
l.readChar() // consume the current '<'
|
|
tok = Token{Type: TokenLessEqual, Value: "<="}
|
|
} else {
|
|
tok = Token{Type: TokenLessThan, Value: "<"}
|
|
}
|
|
case '>':
|
|
if l.peekChar() == '=' {
|
|
l.readChar() // consume the current '>'
|
|
tok = Token{Type: TokenGreaterEqual, Value: ">="}
|
|
} else {
|
|
tok = Token{Type: TokenGreaterThan, Value: ">"}
|
|
}
|
|
case ';':
|
|
tok = Token{Type: TokenSemicolon, Value: ";"}
|
|
case '"':
|
|
tok = Token{Type: TokenString, Value: l.readString()}
|
|
return tok
|
|
case '{':
|
|
tok = Token{Type: TokenLeftBrace, Value: "{"}
|
|
case '}':
|
|
tok = Token{Type: TokenRightBrace, Value: "}"}
|
|
case '[':
|
|
tok = Token{Type: TokenLeftBracket, Value: "["}
|
|
case ']':
|
|
tok = Token{Type: TokenRightBracket, Value: "]"}
|
|
case ',':
|
|
tok = Token{Type: TokenComma, Value: ","}
|
|
case '+':
|
|
tok = Token{Type: TokenPlus, Value: "+"}
|
|
case '-':
|
|
tok = Token{Type: TokenMinus, Value: "-"}
|
|
case '*':
|
|
tok = Token{Type: TokenStar, Value: "*"}
|
|
case '/':
|
|
tok = Token{Type: TokenSlash, Value: "/"}
|
|
case '(':
|
|
tok = Token{Type: TokenLeftParen, Value: "("}
|
|
case ')':
|
|
tok = Token{Type: TokenRightParen, Value: ")"}
|
|
case 0:
|
|
tok = Token{Type: TokenEOF, Value: ""}
|
|
default:
|
|
if isLetter(l.ch) {
|
|
tok.Value = l.readIdentifier()
|
|
switch tok.Value {
|
|
case "echo":
|
|
tok.Type = TokenEcho
|
|
case "if":
|
|
tok.Type = TokenIf
|
|
case "then":
|
|
tok.Type = TokenThen
|
|
case "else":
|
|
tok.Type = TokenElse
|
|
case "true":
|
|
tok.Type = TokenTrue
|
|
case "false":
|
|
tok.Type = TokenFalse
|
|
case "end":
|
|
tok.Type = TokenEnd
|
|
default:
|
|
tok.Type = TokenIdentifier
|
|
}
|
|
return tok
|
|
} else if isDigit(l.ch) {
|
|
tok.Type = TokenNumber
|
|
tok.Value = l.readNumber()
|
|
return tok
|
|
} else {
|
|
tok = Token{Type: TokenEOF, Value: ""}
|
|
}
|
|
}
|
|
|
|
l.readChar()
|
|
return tok
|
|
}
|
|
|
|
func (l *Lexer) skipWhitespace() {
|
|
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
|
|
l.readChar()
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) readIdentifier() string {
|
|
pos := l.pos
|
|
for isLetter(l.ch) || isDigit(l.ch) {
|
|
l.readChar()
|
|
}
|
|
return l.input[pos:l.pos]
|
|
}
|
|
|
|
func (l *Lexer) readNumber() string {
|
|
pos := l.pos
|
|
for isDigit(l.ch) {
|
|
l.readChar()
|
|
}
|
|
return l.input[pos:l.pos]
|
|
}
|
|
|
|
func (l *Lexer) readString() string {
|
|
pos := l.pos + 1
|
|
for {
|
|
l.readChar()
|
|
if l.ch == '"' || l.ch == 0 {
|
|
break
|
|
}
|
|
}
|
|
str := l.input[pos:l.pos]
|
|
l.readChar() // Skip closing quote
|
|
return str
|
|
}
|
|
|
|
func isLetter(ch byte) bool {
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
|
|
}
|
|
|
|
func isDigit(ch byte) bool {
|
|
return '0' <= ch && ch <= '9'
|
|
}
|
|
|
|
func (l *Lexer) peekChar() byte {
|
|
if l.readPos >= len(l.input) {
|
|
return 0
|
|
}
|
|
return l.input[l.readPos]
|
|
}
|
|
|
|
func (l *Lexer) skipComment() {
|
|
if l.ch == '/' && l.peekChar() == '/' {
|
|
l.readChar()
|
|
l.readChar()
|
|
|
|
for l.ch != '\n' && l.ch != 0 {
|
|
l.readChar()
|
|
}
|
|
|
|
l.skipWhitespace()
|
|
}
|
|
}
|