package parser // Lexer tokenizes input source code type Lexer struct { input string position int readPosition int ch byte line int column int } // NewLexer creates a new lexer instance func NewLexer(input string) *Lexer { l := &Lexer{ input: input, line: 1, column: 0, } l.readChar() return l } // readChar reads the next character and advances position func (l *Lexer) readChar() { if l.readPosition >= len(l.input) { l.ch = 0 } else { l.ch = l.input[l.readPosition] } l.position = l.readPosition l.readPosition++ if l.ch == '\n' { l.line++ l.column = 0 } else { l.column++ } } // peekChar returns the next character without advancing position func (l *Lexer) peekChar() byte { if l.readPosition >= len(l.input) { return 0 } return l.input[l.readPosition] } // peekCharAt returns the character at offset positions ahead func (l *Lexer) peekCharAt(offset int) byte { pos := l.readPosition + offset - 1 if pos >= len(l.input) { return 0 } return l.input[pos] } // skipWhitespace skips whitespace characters func (l *Lexer) skipWhitespace() { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } } // skipComment skips both line and block comments func (l *Lexer) skipComment() { if l.ch == '/' && l.peekChar() == '/' { // Line comment for l.ch != '\n' && l.ch != 0 { l.readChar() } } else if l.ch == '/' && l.peekChar() == '*' { // Block comment l.readChar() // skip '/' l.readChar() // skip '*' for { if l.ch == 0 { break } if l.ch == '*' && l.peekChar() == '/' { l.readChar() // skip '*' l.readChar() // skip '/' break } l.readChar() } } } // readIdentifier reads an identifier func (l *Lexer) readIdentifier() string { position := l.position for isLetter(l.ch) || isDigit(l.ch) { l.readChar() } return l.input[position:l.position] } // readNumber reads a number (decimal, hex, binary, or scientific notation) func (l *Lexer) readNumber() string { position := l.position // Check for hex (0x/0X) or binary (0b/0B) prefix if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') { return l.readHexNumber() } if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') { return l.readBinaryNumber() } // Read regular decimal number for isDigit(l.ch) { l.readChar() } // Handle decimal point if l.ch == '.' && isDigit(l.peekChar()) { l.readChar() // consume '.' for isDigit(l.ch) { l.readChar() } } // Handle scientific notation (e/E) if l.ch == 'e' || l.ch == 'E' { l.readChar() // consume 'e'/'E' // Optional +/- sign if l.ch == '+' || l.ch == '-' { l.readChar() } // Continue reading digits for the exponent for isDigit(l.ch) { l.readChar() } } return l.input[position:l.position] } // readHexNumber reads a hexadecimal number (0x...) func (l *Lexer) readHexNumber() string { position := l.position l.readChar() // skip '0' l.readChar() // skip 'x'/'X' // Continue reading until we hit a non-hex character for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) { l.readChar() } return l.input[position:l.position] } // readBinaryNumber reads a binary number (0b...) func (l *Lexer) readBinaryNumber() string { position := l.position l.readChar() // skip '0' l.readChar() // skip 'b'/'B' // Continue reading until we hit a non-digit character for isDigit(l.ch) || isLetter(l.ch) { l.readChar() } return l.input[position:l.position] } // readString reads a string literal func (l *Lexer) readString() string { position := l.position + 1 for { l.readChar() if l.ch == '"' || l.ch == 0 { break } } return l.input[position:l.position] } // readMultilineString reads a multiline string literal using [[ ]] syntax func (l *Lexer) readMultilineString() string { l.readChar() // skip first '[' l.readChar() // skip second '[' start := l.position for { if l.ch == 0 { break // EOF - return what we have } if l.ch == ']' && l.peekChar() == ']' { content := l.input[start:l.position] l.readChar() // skip first ']', positioned at second ']' return content } l.readChar() } return l.input[start:l.position] } // NextToken returns the next token from the input func (l *Lexer) NextToken() Token { var tok Token l.skipWhitespace() // Handle comments if l.ch == '/' && (l.peekChar() == '/' || l.peekChar() == '*') { l.skipComment() l.skipWhitespace() } tok.Line = l.line tok.Column = l.column switch l.ch { case '=': if l.peekChar() == '=' { ch := l.ch l.readChar() tok = Token{Type: EQ, Literal: string(ch) + string(l.ch), Line: l.line, Column: l.column} } else { tok = Token{Type: ASSIGN, Literal: string(l.ch), Line: l.line, Column: l.column} } case '!': if l.peekChar() == '=' { ch := l.ch l.readChar() tok = Token{Type: NOT_EQ, Literal: string(ch) + string(l.ch), Line: l.line, Column: l.column} } else { tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column} } case '<': if l.peekChar() == '=' { ch := l.ch l.readChar() tok = Token{Type: LT_EQ, Literal: string(ch) + string(l.ch), Line: l.line, Column: l.column} } else { tok = Token{Type: LT, Literal: string(l.ch), Line: l.line, Column: l.column} } case '>': if l.peekChar() == '=' { ch := l.ch l.readChar() tok = Token{Type: GT_EQ, Literal: string(ch) + string(l.ch), Line: l.line, Column: l.column} } else { tok = Token{Type: GT, Literal: string(l.ch), Line: l.line, Column: l.column} } case '+': tok = Token{Type: PLUS, Literal: string(l.ch), Line: l.line, Column: l.column} case '-': tok = Token{Type: MINUS, Literal: string(l.ch), Line: l.line, Column: l.column} case '*': tok = Token{Type: STAR, Literal: string(l.ch), Line: l.line, Column: l.column} case '/': tok = Token{Type: SLASH, Literal: string(l.ch), Line: l.line, Column: l.column} case '%': tok = Token{Type: MOD, Literal: string(l.ch), Line: l.line, Column: l.column} case ':': tok = Token{Type: COLON, Literal: string(l.ch), Line: l.line, Column: l.column} case '.': // Check for ellipsis (...) if l.peekChar() == '.' && l.peekCharAt(2) == '.' { l.readChar() // skip first '.' l.readChar() // skip second '.' tok = Token{Type: ELLIPSIS, Literal: "...", Line: l.line, Column: l.column} } else { tok = Token{Type: DOT, Literal: string(l.ch), Line: l.line, Column: l.column} } case '(': tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} case ')': tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} case '{': tok = Token{Type: LBRACE, Literal: string(l.ch), Line: l.line, Column: l.column} case '}': tok = Token{Type: RBRACE, Literal: string(l.ch), Line: l.line, Column: l.column} case '[': if l.peekChar() == '[' { tok.Type = STRING tok.Literal = l.readMultilineString() } else { tok = Token{Type: LBRACKET, Literal: string(l.ch), Line: l.line, Column: l.column} } case ']': tok = Token{Type: RBRACKET, Literal: string(l.ch), Line: l.line, Column: l.column} case ',': tok = Token{Type: COMMA, Literal: string(l.ch), Line: l.line, Column: l.column} case '"': tok.Type = STRING tok.Literal = l.readString() case 0: tok.Literal = "" tok.Type = EOF default: if isLetter(l.ch) { tok.Literal = l.readIdentifier() tok.Type = lookupIdent(tok.Literal) return tok } else if isDigit(l.ch) { tok.Type = NUMBER tok.Literal = l.readNumber() return tok } else { tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column} } } l.readChar() return tok } // Helper functions func isLetter(ch byte) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' } func isDigit(ch byte) bool { return '0' <= ch && ch <= '9' } func isHexDigit(ch byte) bool { return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') } func isBinaryDigit(ch byte) bool { return ch == '0' || ch == '1' }