package parser // Lexer tokenizes input source code type Lexer struct { input string position int readPosition int ch byte line int column int } // NewLexer creates a new lexer instance func NewLexer(input string) *Lexer { l := &Lexer{ input: input, line: 1, column: 0, } l.readChar() return l } // readChar reads the next character and advances position func (l *Lexer) readChar() { if l.readPosition >= len(l.input) { l.ch = 0 } else { l.ch = l.input[l.readPosition] } l.position = l.readPosition l.readPosition++ if l.ch == '\n' { l.line++ l.column = 0 } else { l.column++ } } // peekChar returns the next character without advancing position func (l *Lexer) peekChar() byte { if l.readPosition >= len(l.input) { return 0 } return l.input[l.readPosition] } // skipWhitespace skips whitespace characters func (l *Lexer) skipWhitespace() { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } } // skipComment skips both line and block comments func (l *Lexer) skipComment() { if l.ch == '/' && l.peekChar() == '/' { // Line comment for l.ch != '\n' && l.ch != 0 { l.readChar() } } else if l.ch == '/' && l.peekChar() == '*' { // Block comment l.readChar() // skip '/' l.readChar() // skip '*' for { if l.ch == 0 { break } if l.ch == '*' && l.peekChar() == '/' { l.readChar() // skip '*' l.readChar() // skip '/' break } l.readChar() } } } // readIdentifier reads an identifier func (l *Lexer) readIdentifier() string { position := l.position for isLetter(l.ch) || isDigit(l.ch) { l.readChar() } return l.input[position:l.position] } // readNumber reads a number (including decimals) func (l *Lexer) readNumber() string { position := l.position for isDigit(l.ch) { l.readChar() } // Handle decimal points if l.ch == '.' && isDigit(l.peekChar()) { l.readChar() for isDigit(l.ch) { l.readChar() } } return l.input[position:l.position] } // readString reads a string literal func (l *Lexer) readString() string { position := l.position + 1 for { l.readChar() if l.ch == '"' || l.ch == 0 { break } } return l.input[position:l.position] } // NextToken returns the next token from the input func (l *Lexer) NextToken() Token { var tok Token l.skipWhitespace() // Handle comments if l.ch == '/' && (l.peekChar() == '/' || l.peekChar() == '*') { l.skipComment() l.skipWhitespace() } tok.Line = l.line tok.Column = l.column switch l.ch { case '=': tok = Token{Type: ASSIGN, Literal: string(l.ch), Line: l.line, Column: l.column} case '+': tok = Token{Type: PLUS, Literal: string(l.ch), Line: l.line, Column: l.column} case '-': tok = Token{Type: MINUS, Literal: string(l.ch), Line: l.line, Column: l.column} case '*': tok = Token{Type: STAR, Literal: string(l.ch), Line: l.line, Column: l.column} case '/': tok = Token{Type: SLASH, Literal: string(l.ch), Line: l.line, Column: l.column} case '(': tok = Token{Type: LPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} case ')': tok = Token{Type: RPAREN, Literal: string(l.ch), Line: l.line, Column: l.column} case '"': tok.Type = STRING tok.Literal = l.readString() case 0: tok.Literal = "" tok.Type = EOF default: if isLetter(l.ch) { tok.Literal = l.readIdentifier() tok.Type = lookupIdent(tok.Literal) return tok } else if isDigit(l.ch) { tok.Type = NUMBER tok.Literal = l.readNumber() return tok } else { tok = Token{Type: ILLEGAL, Literal: string(l.ch), Line: l.line, Column: l.column} } } l.readChar() return tok } // Helper functions func isLetter(ch byte) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' } func isDigit(ch byte) bool { return '0' <= ch && ch <= '9' }