hex/binary/sci not

2025-06-09 12:50:03 -05:00 · 2025-06-09 12:50:03 -05:00 · 82c29cba13
commit 82c29cba13
parent 2db5c3bfe5
3 changed files with 372 additions and 16 deletions
--- a/parser/lexer.go
+++ b/parser/lexer.go
@ -89,21 +89,74 @@ func (l *Lexer) readIdentifier() string {
 	return l.input[position:l.position]
 }

-// readNumber reads a number (including decimals)
+// readNumber reads a number (decimal, hex, binary, or scientific notation)
 func (l *Lexer) readNumber() string {
 	position := l.position
+
+	// Check for hex (0x/0X) or binary (0b/0B) prefix
+	if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') {
+		return l.readHexNumber()
+	}
+	if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') {
+		return l.readBinaryNumber()
+	}
+
+	// Read regular decimal number
 	for isDigit(l.ch) {
 		l.readChar()
 	}

-	// Handle decimal points
+	// Handle decimal point
 	if l.ch == '.' && isDigit(l.peekChar()) {
-		l.readChar()
+		l.readChar() // consume '.'
 		for isDigit(l.ch) {
 			l.readChar()
 		}
 	}

+	// Handle scientific notation (e/E)
+	if l.ch == 'e' || l.ch == 'E' {
+		l.readChar() // consume 'e'/'E'
+
+		// Optional +/- sign
+		if l.ch == '+' || l.ch == '-' {
+			l.readChar()
+		}
+
+		// Continue reading digits for the exponent
+		for isDigit(l.ch) {
+			l.readChar()
+		}
+	}
+
+	return l.input[position:l.position]
+}
+
+// readHexNumber reads a hexadecimal number (0x...)
+func (l *Lexer) readHexNumber() string {
+	position := l.position
+	l.readChar() // skip '0'
+	l.readChar() // skip 'x'/'X'
+
+	// Continue reading until we hit a non-hex character
+	for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) {
+		l.readChar()
+	}
+
+	return l.input[position:l.position]
+}
+
+// readBinaryNumber reads a binary number (0b...)
+func (l *Lexer) readBinaryNumber() string {
+	position := l.position
+	l.readChar() // skip '0'
+	l.readChar() // skip 'b'/'B'
+
+	// Continue reading until we hit a non-digit character
+	for isDigit(l.ch) || isLetter(l.ch) {
+		l.readChar()
+	}
+
 	return l.input[position:l.position]
 }

@ -187,3 +240,11 @@ func isLetter(ch byte) bool {
 func isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
+
+func isHexDigit(ch byte) bool {
+	return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
+}
+
+func isBinaryDigit(ch byte) bool {
+	return ch == '0' || ch == '1'
+}
--- a/parser/parser.go
+++ b/parser/parser.go
@ -3,6 +3,7 @@ package parser
 import (
 	"fmt"
 	"strconv"
+	"strings"
 )

 // ParseError represents a parsing error with location information
@ -178,11 +179,60 @@ func (p *Parser) parseIdentifier() Expression {

 func (p *Parser) parseNumberLiteral() Expression {
 	lit := &NumberLiteral{}
+	literal := p.curToken.Literal

-	value, err := strconv.ParseFloat(p.curToken.Literal, 64)
-	if err != nil {
-		p.addError(fmt.Sprintf("could not parse '%s' as number", p.curToken.Literal))
-		return nil
+	var value float64
+	var err error
+
+	// Check for hexadecimal (0x/0X prefix)
+	if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") {
+		// Validate hex format
+		if len(literal) <= 2 {
+			p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
+			return nil
+		}
+		hexPart := literal[2:]
+		for _, ch := range hexPart {
+			if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
+				p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
+				return nil
+			}
+		}
+		// Parse as hex and convert to float64
+		intVal, parseErr := strconv.ParseInt(literal, 0, 64)
+		if parseErr != nil {
+			p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
+			return nil
+		}
+		value = float64(intVal)
+	} else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") {
+		// Validate binary format
+		if len(literal) <= 2 {
+			p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
+			return nil
+		}
+		binaryPart := literal[2:]
+		for _, ch := range binaryPart {
+			if ch != '0' && ch != '1' {
+				p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
+				return nil
+			}
+		}
+		// Parse binary manually since Go doesn't support 0b in ParseInt with base 0
+		binaryStr := literal[2:] // remove "0b" prefix
+		intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64)
+		if parseErr != nil {
+			p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
+			return nil
+		}
+		value = float64(intVal)
+	} else {
+		// Parse as regular decimal (handles scientific notation automatically)
+		value, err = strconv.ParseFloat(literal, 64)
+		if err != nil {
+			p.addError(fmt.Sprintf("could not parse '%s' as number", literal))
+			return nil
+		}
 	}

 	lit.Value = value
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -5,6 +5,259 @@ import (
 	"testing"
 )

+func TestExtendedNumberLiterals(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected float64
+		desc     string
+	}{
+		// Hexadecimal
+		{"0x10", 16.0, "lowercase hex"},
+		{"0X10", 16.0, "uppercase hex"},
+		{"0xff", 255.0, "hex with letters"},
+		{"0XFF", 255.0, "hex with uppercase letters"},
+		{"0x0", 0.0, "hex zero"},
+		{"0xDEADBEEF", 3735928559.0, "large hex"},
+
+		// Binary
+		{"0b1010", 10.0, "lowercase binary"},
+		{"0B1010", 10.0, "uppercase binary"},
+		{"0b0", 0.0, "binary zero"},
+		{"0b1", 1.0, "binary one"},
+		{"0b11111111", 255.0, "8-bit binary"},
+
+		// Scientific notation
+		{"1e2", 100.0, "simple scientific"},
+		{"1E2", 100.0, "uppercase E"},
+		{"1.5e2", 150.0, "decimal with exponent"},
+		{"2e-1", 0.2, "negative exponent"},
+		{"1.23e+4", 12300.0, "positive exponent with +"},
+		{"3.14159e0", 3.14159, "zero exponent"},
+		{"1e10", 1e10, "large exponent"},
+
+		// Regular decimals (should still work)
+		{"42", 42.0, "integer"},
+		{"3.14", 3.14, "decimal"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			expr := p.parseExpression(LOWEST)
+			checkParserErrors(t, p)
+
+			testNumberLiteral(t, expr, tt.expected)
+		})
+	}
+}
+
+func TestExtendedNumberAssignments(t *testing.T) {
+	tests := []struct {
+		input      string
+		identifier string
+		expected   float64
+		desc       string
+	}{
+		{"hex = 0xFF", "hex", 255.0, "hex assignment"},
+		{"bin = 0b1111", "bin", 15.0, "binary assignment"},
+		{"sci = 1.5e3", "sci", 1500.0, "scientific assignment"},
+		{"large = 0xDEADBEEF", "large", 3735928559.0, "large hex"},
+		{"small = 2e-5", "small", 0.00002, "small scientific"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			program := p.ParseProgram()
+			checkParserErrors(t, p)
+
+			if len(program.Statements) != 1 {
+				t.Fatalf("expected 1 statement, got %d", len(program.Statements))
+			}
+
+			stmt, ok := program.Statements[0].(*AssignStatement)
+			if !ok {
+				t.Fatalf("expected AssignStatement, got %T", program.Statements[0])
+			}
+
+			if stmt.Name.Value != tt.identifier {
+				t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value)
+			}
+
+			testNumberLiteral(t, stmt.Value, tt.expected)
+		})
+	}
+}
+
+func TestExtendedNumberExpressions(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+		desc     string
+	}{
+		{"0x10 + 0b1010", "(16.00 + 10.00)", "hex + binary"},
+		{"1e2 * 0xFF", "(100.00 * 255.00)", "scientific * hex"},
+		{"0b11 - 1e1", "(3.00 - 10.00)", "binary - scientific"},
+		{"(0x10 + 0b10) * 1e1", "((16.00 + 2.00) * 10.00)", "mixed with precedence"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			expr := p.parseExpression(LOWEST)
+			checkParserErrors(t, p)
+
+			if expr.String() != tt.expected {
+				t.Errorf("expected %s, got %s", tt.expected, expr.String())
+			}
+		})
+	}
+}
+
+func TestExtendedNumberErrors(t *testing.T) {
+	tests := []struct {
+		input         string
+		expectedError string
+		desc          string
+	}{
+		{"0x", "could not parse '0x' as hexadecimal number", "incomplete hex"},
+		{"0b", "could not parse '0b' as binary number", "incomplete binary"},
+		{"0xGHI", "could not parse '0xGHI' as hexadecimal number", "invalid hex digits"},
+		{"0b123", "could not parse '0b123' as binary number", "invalid binary digits"},
+		{"1e", "could not parse '1e' as number", "incomplete scientific"},
+		{"1e+", "could not parse '1e+' as number", "scientific without digits"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			p.parseExpression(LOWEST)
+
+			errors := p.Errors()
+			if len(errors) == 0 {
+				t.Fatalf("expected parsing errors, got none")
+			}
+
+			found := false
+			for _, err := range errors {
+				if strings.Contains(err.Message, tt.expectedError) {
+					found = true
+					break
+				}
+			}
+
+			if !found {
+				errorMsgs := make([]string, len(errors))
+				for i, err := range errors {
+					errorMsgs[i] = err.Message
+				}
+				t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs)
+			}
+		})
+	}
+}
+
+func TestExtendedNumberStringRepresentation(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+		desc     string
+	}{
+		{"0xFF", "255.00", "hex string representation"},
+		{"0b1111", "15.00", "binary string representation"},
+		{"1e3", "1000.00", "scientific string representation"},
+		{"1.5e2", "150.00", "decimal scientific string representation"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			expr := p.parseExpression(LOWEST)
+			checkParserErrors(t, p)
+
+			if expr.String() != tt.expected {
+				t.Errorf("expected %s, got %s", tt.expected, expr.String())
+			}
+		})
+	}
+}
+
+func TestTableWithExtendedNumbers(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+		desc     string
+	}{
+		{"{0xFF, 0b1010}", "{255.00, 10.00}", "array with hex and binary"},
+		{"{hex = 0xFF, bin = 0b1010}", "{hex = 255.00, bin = 10.00}", "hash with extended numbers"},
+		{"{1e2, 0x10, 0b10}", "{100.00, 16.00, 2.00}", "mixed number formats"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+			p := NewParser(l)
+			expr := p.parseExpression(LOWEST)
+			checkParserErrors(t, p)
+
+			if expr.String() != tt.expected {
+				t.Errorf("expected %s, got %s", tt.expected, expr.String())
+			}
+		})
+	}
+}
+
+func TestLexerExtendedNumbers(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected []Token
+		desc     string
+	}{
+		{
+			"0xFF + 0b1010",
+			[]Token{
+				{Type: NUMBER, Literal: "0xFF"},
+				{Type: PLUS, Literal: "+"},
+				{Type: NUMBER, Literal: "0b1010"},
+				{Type: EOF, Literal: ""},
+			},
+			"hex and binary tokens",
+		},
+		{
+			"1.5e-3 * 2E+4",
+			[]Token{
+				{Type: NUMBER, Literal: "1.5e-3"},
+				{Type: STAR, Literal: "*"},
+				{Type: NUMBER, Literal: "2E+4"},
+				{Type: EOF, Literal: ""},
+			},
+			"scientific notation tokens",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.desc, func(t *testing.T) {
+			l := NewLexer(tt.input)
+
+			for i, expectedToken := range tt.expected {
+				tok := l.NextToken()
+				if tok.Type != expectedToken.Type {
+					t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type)
+				}
+				if tok.Literal != expectedToken.Literal {
+					t.Errorf("token %d: expected literal %s, got %s", i, expectedToken.Literal, tok.Literal)
+				}
+			}
+		})
+	}
+}
+
+// Additional existing tests would remain unchanged...
 func TestLiterals(t *testing.T) {
 	tests := []struct {
 		input    string
@ -20,7 +273,6 @@ func TestLiterals(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.input, func(t *testing.T) {
-			// Parse as expression directly - literals are not valid statements
 			l := NewLexer(tt.input)
 			p := NewParser(l)
 			expr := p.parseExpression(LOWEST)
@ -158,7 +410,7 @@ func TestAssignStatements(t *testing.T) {
 		input              string
 		expectedIdentifier string
 		expectedValue      any
-		isExpression       bool // true if expectedValue is expression string representation
+		isExpression       bool
 	}{
 		{"x = 42", "x", 42.0, false},
 		{"name = \"test\"", "name", "test", false},
@ -188,12 +440,10 @@ func TestAssignStatements(t *testing.T) {
 			}

 			if tt.isExpression {
-				// Test the string representation of the expression
 				if stmt.Value.String() != tt.expectedValue.(string) {
 					t.Errorf("expected expression %s, got %s", tt.expectedValue.(string), stmt.Value.String())
 				}
 			} else {
-				// Test the actual value based on type
 				switch expected := tt.expectedValue.(type) {
 				case float64:
 					testNumberLiteral(t, stmt.Value, expected)
@ -285,13 +535,10 @@ func TestParsingErrors(t *testing.T) {
 			l := NewLexer(tt.input)
 			p := NewParser(l)

-			// Decide parsing strategy based on the type of error we're testing
 			switch tt.input {
 			case "(1 + 2", "+ 5", "1 +", "{1, 2", "{a =", "{a = 1,":
-				// These are expression-level errors
 				p.parseExpression(LOWEST)
 			default:
-				// These are statement-level errors
 				p.ParseProgram()
 			}

@ -331,7 +578,6 @@ y = "hello"`
 	p := NewParser(l)
 	program := p.ParseProgram()

-	// Should have errors but still parse valid statements
 	if !p.HasErrors() {
 		t.Fatal("expected parsing errors")
 	}
@ -352,7 +598,6 @@ y = "hello"`
 		t.Error("expected specific assignment error")
 	}

-	// Should still have parsed the valid statements
 	validStatements := 0
 	for _, stmt := range program.Statements {
 		if stmt != nil {