hex/binary/sci not

This commit is contained in:
Sky Johnson 2025-06-09 12:50:03 -05:00
parent 2db5c3bfe5
commit 82c29cba13
3 changed files with 372 additions and 16 deletions

View File

@ -89,21 +89,74 @@ func (l *Lexer) readIdentifier() string {
return l.input[position:l.position]
}
// readNumber reads a number (including decimals)
// readNumber reads a number (decimal, hex, binary, or scientific notation)
func (l *Lexer) readNumber() string {
position := l.position
// Check for hex (0x/0X) or binary (0b/0B) prefix
if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') {
return l.readHexNumber()
}
if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') {
return l.readBinaryNumber()
}
// Read regular decimal number
for isDigit(l.ch) {
l.readChar()
}
// Handle decimal points
// Handle decimal point
if l.ch == '.' && isDigit(l.peekChar()) {
l.readChar()
l.readChar() // consume '.'
for isDigit(l.ch) {
l.readChar()
}
}
// Handle scientific notation (e/E)
if l.ch == 'e' || l.ch == 'E' {
l.readChar() // consume 'e'/'E'
// Optional +/- sign
if l.ch == '+' || l.ch == '-' {
l.readChar()
}
// Continue reading digits for the exponent
for isDigit(l.ch) {
l.readChar()
}
}
return l.input[position:l.position]
}
// readHexNumber reads a hexadecimal number (0x...)
func (l *Lexer) readHexNumber() string {
position := l.position
l.readChar() // skip '0'
l.readChar() // skip 'x'/'X'
// Continue reading until we hit a non-hex character
for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
// readBinaryNumber reads a binary number (0b...)
func (l *Lexer) readBinaryNumber() string {
position := l.position
l.readChar() // skip '0'
l.readChar() // skip 'b'/'B'
// Continue reading until we hit a non-digit character
for isDigit(l.ch) || isLetter(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
@ -187,3 +240,11 @@ func isLetter(ch byte) bool {
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func isHexDigit(ch byte) bool {
return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
}
func isBinaryDigit(ch byte) bool {
return ch == '0' || ch == '1'
}

View File

@ -3,6 +3,7 @@ package parser
import (
"fmt"
"strconv"
"strings"
)
// ParseError represents a parsing error with location information
@ -178,11 +179,60 @@ func (p *Parser) parseIdentifier() Expression {
func (p *Parser) parseNumberLiteral() Expression {
lit := &NumberLiteral{}
literal := p.curToken.Literal
value, err := strconv.ParseFloat(p.curToken.Literal, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse '%s' as number", p.curToken.Literal))
return nil
var value float64
var err error
// Check for hexadecimal (0x/0X prefix)
if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") {
// Validate hex format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
hexPart := literal[2:]
for _, ch := range hexPart {
if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
}
// Parse as hex and convert to float64
intVal, parseErr := strconv.ParseInt(literal, 0, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
return nil
}
value = float64(intVal)
} else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") {
// Validate binary format
if len(literal) <= 2 {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
binaryPart := literal[2:]
for _, ch := range binaryPart {
if ch != '0' && ch != '1' {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
}
// Parse binary manually since Go doesn't support 0b in ParseInt with base 0
binaryStr := literal[2:] // remove "0b" prefix
intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64)
if parseErr != nil {
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
return nil
}
value = float64(intVal)
} else {
// Parse as regular decimal (handles scientific notation automatically)
value, err = strconv.ParseFloat(literal, 64)
if err != nil {
p.addError(fmt.Sprintf("could not parse '%s' as number", literal))
return nil
}
}
lit.Value = value

View File

@ -5,6 +5,259 @@ import (
"testing"
)
func TestExtendedNumberLiterals(t *testing.T) {
tests := []struct {
input string
expected float64
desc string
}{
// Hexadecimal
{"0x10", 16.0, "lowercase hex"},
{"0X10", 16.0, "uppercase hex"},
{"0xff", 255.0, "hex with letters"},
{"0XFF", 255.0, "hex with uppercase letters"},
{"0x0", 0.0, "hex zero"},
{"0xDEADBEEF", 3735928559.0, "large hex"},
// Binary
{"0b1010", 10.0, "lowercase binary"},
{"0B1010", 10.0, "uppercase binary"},
{"0b0", 0.0, "binary zero"},
{"0b1", 1.0, "binary one"},
{"0b11111111", 255.0, "8-bit binary"},
// Scientific notation
{"1e2", 100.0, "simple scientific"},
{"1E2", 100.0, "uppercase E"},
{"1.5e2", 150.0, "decimal with exponent"},
{"2e-1", 0.2, "negative exponent"},
{"1.23e+4", 12300.0, "positive exponent with +"},
{"3.14159e0", 3.14159, "zero exponent"},
{"1e10", 1e10, "large exponent"},
// Regular decimals (should still work)
{"42", 42.0, "integer"},
{"3.14", 3.14, "decimal"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
testNumberLiteral(t, expr, tt.expected)
})
}
}
func TestExtendedNumberAssignments(t *testing.T) {
tests := []struct {
input string
identifier string
expected float64
desc string
}{
{"hex = 0xFF", "hex", 255.0, "hex assignment"},
{"bin = 0b1111", "bin", 15.0, "binary assignment"},
{"sci = 1.5e3", "sci", 1500.0, "scientific assignment"},
{"large = 0xDEADBEEF", "large", 3735928559.0, "large hex"},
{"small = 2e-5", "small", 0.00002, "small scientific"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 1 {
t.Fatalf("expected 1 statement, got %d", len(program.Statements))
}
stmt, ok := program.Statements[0].(*AssignStatement)
if !ok {
t.Fatalf("expected AssignStatement, got %T", program.Statements[0])
}
if stmt.Name.Value != tt.identifier {
t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value)
}
testNumberLiteral(t, stmt.Value, tt.expected)
})
}
}
func TestExtendedNumberExpressions(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{"0x10 + 0b1010", "(16.00 + 10.00)", "hex + binary"},
{"1e2 * 0xFF", "(100.00 * 255.00)", "scientific * hex"},
{"0b11 - 1e1", "(3.00 - 10.00)", "binary - scientific"},
{"(0x10 + 0b10) * 1e1", "((16.00 + 2.00) * 10.00)", "mixed with precedence"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}
func TestExtendedNumberErrors(t *testing.T) {
tests := []struct {
input string
expectedError string
desc string
}{
{"0x", "could not parse '0x' as hexadecimal number", "incomplete hex"},
{"0b", "could not parse '0b' as binary number", "incomplete binary"},
{"0xGHI", "could not parse '0xGHI' as hexadecimal number", "invalid hex digits"},
{"0b123", "could not parse '0b123' as binary number", "invalid binary digits"},
{"1e", "could not parse '1e' as number", "incomplete scientific"},
{"1e+", "could not parse '1e+' as number", "scientific without digits"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
p.parseExpression(LOWEST)
errors := p.Errors()
if len(errors) == 0 {
t.Fatalf("expected parsing errors, got none")
}
found := false
for _, err := range errors {
if strings.Contains(err.Message, tt.expectedError) {
found = true
break
}
}
if !found {
errorMsgs := make([]string, len(errors))
for i, err := range errors {
errorMsgs[i] = err.Message
}
t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs)
}
})
}
}
func TestExtendedNumberStringRepresentation(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{"0xFF", "255.00", "hex string representation"},
{"0b1111", "15.00", "binary string representation"},
{"1e3", "1000.00", "scientific string representation"},
{"1.5e2", "150.00", "decimal scientific string representation"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}
func TestTableWithExtendedNumbers(t *testing.T) {
tests := []struct {
input string
expected string
desc string
}{
{"{0xFF, 0b1010}", "{255.00, 10.00}", "array with hex and binary"},
{"{hex = 0xFF, bin = 0b1010}", "{hex = 255.00, bin = 10.00}", "hash with extended numbers"},
{"{1e2, 0x10, 0b10}", "{100.00, 16.00, 2.00}", "mixed number formats"},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
checkParserErrors(t, p)
if expr.String() != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, expr.String())
}
})
}
}
func TestLexerExtendedNumbers(t *testing.T) {
tests := []struct {
input string
expected []Token
desc string
}{
{
"0xFF + 0b1010",
[]Token{
{Type: NUMBER, Literal: "0xFF"},
{Type: PLUS, Literal: "+"},
{Type: NUMBER, Literal: "0b1010"},
{Type: EOF, Literal: ""},
},
"hex and binary tokens",
},
{
"1.5e-3 * 2E+4",
[]Token{
{Type: NUMBER, Literal: "1.5e-3"},
{Type: STAR, Literal: "*"},
{Type: NUMBER, Literal: "2E+4"},
{Type: EOF, Literal: ""},
},
"scientific notation tokens",
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
l := NewLexer(tt.input)
for i, expectedToken := range tt.expected {
tok := l.NextToken()
if tok.Type != expectedToken.Type {
t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type)
}
if tok.Literal != expectedToken.Literal {
t.Errorf("token %d: expected literal %s, got %s", i, expectedToken.Literal, tok.Literal)
}
}
})
}
}
// Additional existing tests would remain unchanged...
func TestLiterals(t *testing.T) {
tests := []struct {
input string
@ -20,7 +273,6 @@ func TestLiterals(t *testing.T) {
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
// Parse as expression directly - literals are not valid statements
l := NewLexer(tt.input)
p := NewParser(l)
expr := p.parseExpression(LOWEST)
@ -158,7 +410,7 @@ func TestAssignStatements(t *testing.T) {
input string
expectedIdentifier string
expectedValue any
isExpression bool // true if expectedValue is expression string representation
isExpression bool
}{
{"x = 42", "x", 42.0, false},
{"name = \"test\"", "name", "test", false},
@ -188,12 +440,10 @@ func TestAssignStatements(t *testing.T) {
}
if tt.isExpression {
// Test the string representation of the expression
if stmt.Value.String() != tt.expectedValue.(string) {
t.Errorf("expected expression %s, got %s", tt.expectedValue.(string), stmt.Value.String())
}
} else {
// Test the actual value based on type
switch expected := tt.expectedValue.(type) {
case float64:
testNumberLiteral(t, stmt.Value, expected)
@ -285,13 +535,10 @@ func TestParsingErrors(t *testing.T) {
l := NewLexer(tt.input)
p := NewParser(l)
// Decide parsing strategy based on the type of error we're testing
switch tt.input {
case "(1 + 2", "+ 5", "1 +", "{1, 2", "{a =", "{a = 1,":
// These are expression-level errors
p.parseExpression(LOWEST)
default:
// These are statement-level errors
p.ParseProgram()
}
@ -331,7 +578,6 @@ y = "hello"`
p := NewParser(l)
program := p.ParseProgram()
// Should have errors but still parse valid statements
if !p.HasErrors() {
t.Fatal("expected parsing errors")
}
@ -352,7 +598,6 @@ y = "hello"`
t.Error("expected specific assignment error")
}
// Should still have parsed the valid statements
validStatements := 0
for _, stmt := range program.Statements {
if stmt != nil {