diff --git a/parser/lexer.go b/parser/lexer.go index 84ebd10..43d2525 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -89,21 +89,74 @@ func (l *Lexer) readIdentifier() string { return l.input[position:l.position] } -// readNumber reads a number (including decimals) +// readNumber reads a number (decimal, hex, binary, or scientific notation) func (l *Lexer) readNumber() string { position := l.position + + // Check for hex (0x/0X) or binary (0b/0B) prefix + if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') { + return l.readHexNumber() + } + if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') { + return l.readBinaryNumber() + } + + // Read regular decimal number for isDigit(l.ch) { l.readChar() } - // Handle decimal points + // Handle decimal point if l.ch == '.' && isDigit(l.peekChar()) { - l.readChar() + l.readChar() // consume '.' for isDigit(l.ch) { l.readChar() } } + // Handle scientific notation (e/E) + if l.ch == 'e' || l.ch == 'E' { + l.readChar() // consume 'e'/'E' + + // Optional +/- sign + if l.ch == '+' || l.ch == '-' { + l.readChar() + } + + // Continue reading digits for the exponent + for isDigit(l.ch) { + l.readChar() + } + } + + return l.input[position:l.position] +} + +// readHexNumber reads a hexadecimal number (0x...) +func (l *Lexer) readHexNumber() string { + position := l.position + l.readChar() // skip '0' + l.readChar() // skip 'x'/'X' + + // Continue reading until we hit a non-hex character + for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) { + l.readChar() + } + + return l.input[position:l.position] +} + +// readBinaryNumber reads a binary number (0b...) +func (l *Lexer) readBinaryNumber() string { + position := l.position + l.readChar() // skip '0' + l.readChar() // skip 'b'/'B' + + // Continue reading until we hit a non-digit character + for isDigit(l.ch) || isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] } @@ -187,3 +240,11 @@ func isLetter(ch byte) bool { func isDigit(ch byte) bool { return '0' <= ch && ch <= '9' } + +func isHexDigit(ch byte) bool { + return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') +} + +func isBinaryDigit(ch byte) bool { + return ch == '0' || ch == '1' +} diff --git a/parser/parser.go b/parser/parser.go index dc45201..6d8052d 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -3,6 +3,7 @@ package parser import ( "fmt" "strconv" + "strings" ) // ParseError represents a parsing error with location information @@ -178,11 +179,60 @@ func (p *Parser) parseIdentifier() Expression { func (p *Parser) parseNumberLiteral() Expression { lit := &NumberLiteral{} + literal := p.curToken.Literal - value, err := strconv.ParseFloat(p.curToken.Literal, 64) - if err != nil { - p.addError(fmt.Sprintf("could not parse '%s' as number", p.curToken.Literal)) - return nil + var value float64 + var err error + + // Check for hexadecimal (0x/0X prefix) + if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") { + // Validate hex format + if len(literal) <= 2 { + p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) + return nil + } + hexPart := literal[2:] + for _, ch := range hexPart { + if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { + p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) + return nil + } + } + // Parse as hex and convert to float64 + intVal, parseErr := strconv.ParseInt(literal, 0, 64) + if parseErr != nil { + p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal)) + return nil + } + value = float64(intVal) + } else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") { + // Validate binary format + if len(literal) <= 2 { + p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) + return nil + } + binaryPart := literal[2:] + for _, ch := range binaryPart { + if ch != '0' && ch != '1' { + p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) + return nil + } + } + // Parse binary manually since Go doesn't support 0b in ParseInt with base 0 + binaryStr := literal[2:] // remove "0b" prefix + intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64) + if parseErr != nil { + p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal)) + return nil + } + value = float64(intVal) + } else { + // Parse as regular decimal (handles scientific notation automatically) + value, err = strconv.ParseFloat(literal, 64) + if err != nil { + p.addError(fmt.Sprintf("could not parse '%s' as number", literal)) + return nil + } } lit.Value = value diff --git a/parser/parser_test.go b/parser/parser_test.go index 1c0a8ee..1541ae4 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -5,6 +5,259 @@ import ( "testing" ) +func TestExtendedNumberLiterals(t *testing.T) { + tests := []struct { + input string + expected float64 + desc string + }{ + // Hexadecimal + {"0x10", 16.0, "lowercase hex"}, + {"0X10", 16.0, "uppercase hex"}, + {"0xff", 255.0, "hex with letters"}, + {"0XFF", 255.0, "hex with uppercase letters"}, + {"0x0", 0.0, "hex zero"}, + {"0xDEADBEEF", 3735928559.0, "large hex"}, + + // Binary + {"0b1010", 10.0, "lowercase binary"}, + {"0B1010", 10.0, "uppercase binary"}, + {"0b0", 0.0, "binary zero"}, + {"0b1", 1.0, "binary one"}, + {"0b11111111", 255.0, "8-bit binary"}, + + // Scientific notation + {"1e2", 100.0, "simple scientific"}, + {"1E2", 100.0, "uppercase E"}, + {"1.5e2", 150.0, "decimal with exponent"}, + {"2e-1", 0.2, "negative exponent"}, + {"1.23e+4", 12300.0, "positive exponent with +"}, + {"3.14159e0", 3.14159, "zero exponent"}, + {"1e10", 1e10, "large exponent"}, + + // Regular decimals (should still work) + {"42", 42.0, "integer"}, + {"3.14", 3.14, "decimal"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + testNumberLiteral(t, expr, tt.expected) + }) + } +} + +func TestExtendedNumberAssignments(t *testing.T) { + tests := []struct { + input string + identifier string + expected float64 + desc string + }{ + {"hex = 0xFF", "hex", 255.0, "hex assignment"}, + {"bin = 0b1111", "bin", 15.0, "binary assignment"}, + {"sci = 1.5e3", "sci", 1500.0, "scientific assignment"}, + {"large = 0xDEADBEEF", "large", 3735928559.0, "large hex"}, + {"small = 2e-5", "small", 0.00002, "small scientific"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("expected 1 statement, got %d", len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*AssignStatement) + if !ok { + t.Fatalf("expected AssignStatement, got %T", program.Statements[0]) + } + + if stmt.Name.Value != tt.identifier { + t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value) + } + + testNumberLiteral(t, stmt.Value, tt.expected) + }) + } +} + +func TestExtendedNumberExpressions(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {"0x10 + 0b1010", "(16.00 + 10.00)", "hex + binary"}, + {"1e2 * 0xFF", "(100.00 * 255.00)", "scientific * hex"}, + {"0b11 - 1e1", "(3.00 - 10.00)", "binary - scientific"}, + {"(0x10 + 0b10) * 1e1", "((16.00 + 2.00) * 10.00)", "mixed with precedence"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +} + +func TestExtendedNumberErrors(t *testing.T) { + tests := []struct { + input string + expectedError string + desc string + }{ + {"0x", "could not parse '0x' as hexadecimal number", "incomplete hex"}, + {"0b", "could not parse '0b' as binary number", "incomplete binary"}, + {"0xGHI", "could not parse '0xGHI' as hexadecimal number", "invalid hex digits"}, + {"0b123", "could not parse '0b123' as binary number", "invalid binary digits"}, + {"1e", "could not parse '1e' as number", "incomplete scientific"}, + {"1e+", "could not parse '1e+' as number", "scientific without digits"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + p.parseExpression(LOWEST) + + errors := p.Errors() + if len(errors) == 0 { + t.Fatalf("expected parsing errors, got none") + } + + found := false + for _, err := range errors { + if strings.Contains(err.Message, tt.expectedError) { + found = true + break + } + } + + if !found { + errorMsgs := make([]string, len(errors)) + for i, err := range errors { + errorMsgs[i] = err.Message + } + t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs) + } + }) + } +} + +func TestExtendedNumberStringRepresentation(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {"0xFF", "255.00", "hex string representation"}, + {"0b1111", "15.00", "binary string representation"}, + {"1e3", "1000.00", "scientific string representation"}, + {"1.5e2", "150.00", "decimal scientific string representation"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +} + +func TestTableWithExtendedNumbers(t *testing.T) { + tests := []struct { + input string + expected string + desc string + }{ + {"{0xFF, 0b1010}", "{255.00, 10.00}", "array with hex and binary"}, + {"{hex = 0xFF, bin = 0b1010}", "{hex = 255.00, bin = 10.00}", "hash with extended numbers"}, + {"{1e2, 0x10, 0b10}", "{100.00, 16.00, 2.00}", "mixed number formats"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + p := NewParser(l) + expr := p.parseExpression(LOWEST) + checkParserErrors(t, p) + + if expr.String() != tt.expected { + t.Errorf("expected %s, got %s", tt.expected, expr.String()) + } + }) + } +} + +func TestLexerExtendedNumbers(t *testing.T) { + tests := []struct { + input string + expected []Token + desc string + }{ + { + "0xFF + 0b1010", + []Token{ + {Type: NUMBER, Literal: "0xFF"}, + {Type: PLUS, Literal: "+"}, + {Type: NUMBER, Literal: "0b1010"}, + {Type: EOF, Literal: ""}, + }, + "hex and binary tokens", + }, + { + "1.5e-3 * 2E+4", + []Token{ + {Type: NUMBER, Literal: "1.5e-3"}, + {Type: STAR, Literal: "*"}, + {Type: NUMBER, Literal: "2E+4"}, + {Type: EOF, Literal: ""}, + }, + "scientific notation tokens", + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + l := NewLexer(tt.input) + + for i, expectedToken := range tt.expected { + tok := l.NextToken() + if tok.Type != expectedToken.Type { + t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type) + } + if tok.Literal != expectedToken.Literal { + t.Errorf("token %d: expected literal %s, got %s", i, expectedToken.Literal, tok.Literal) + } + } + }) + } +} + +// Additional existing tests would remain unchanged... func TestLiterals(t *testing.T) { tests := []struct { input string @@ -20,7 +273,6 @@ func TestLiterals(t *testing.T) { for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { - // Parse as expression directly - literals are not valid statements l := NewLexer(tt.input) p := NewParser(l) expr := p.parseExpression(LOWEST) @@ -158,7 +410,7 @@ func TestAssignStatements(t *testing.T) { input string expectedIdentifier string expectedValue any - isExpression bool // true if expectedValue is expression string representation + isExpression bool }{ {"x = 42", "x", 42.0, false}, {"name = \"test\"", "name", "test", false}, @@ -188,12 +440,10 @@ func TestAssignStatements(t *testing.T) { } if tt.isExpression { - // Test the string representation of the expression if stmt.Value.String() != tt.expectedValue.(string) { t.Errorf("expected expression %s, got %s", tt.expectedValue.(string), stmt.Value.String()) } } else { - // Test the actual value based on type switch expected := tt.expectedValue.(type) { case float64: testNumberLiteral(t, stmt.Value, expected) @@ -285,13 +535,10 @@ func TestParsingErrors(t *testing.T) { l := NewLexer(tt.input) p := NewParser(l) - // Decide parsing strategy based on the type of error we're testing switch tt.input { case "(1 + 2", "+ 5", "1 +", "{1, 2", "{a =", "{a = 1,": - // These are expression-level errors p.parseExpression(LOWEST) default: - // These are statement-level errors p.ParseProgram() } @@ -331,7 +578,6 @@ y = "hello"` p := NewParser(l) program := p.ParseProgram() - // Should have errors but still parse valid statements if !p.HasErrors() { t.Fatal("expected parsing errors") } @@ -352,7 +598,6 @@ y = "hello"` t.Error("expected specific assignment error") } - // Should still have parsed the valid statements validStatements := 0 for _, stmt := range program.Statements { if stmt != nil {