hex/binary/sci not
This commit is contained in:
parent
2db5c3bfe5
commit
82c29cba13
@ -89,21 +89,74 @@ func (l *Lexer) readIdentifier() string {
|
||||
return l.input[position:l.position]
|
||||
}
|
||||
|
||||
// readNumber reads a number (including decimals)
|
||||
// readNumber reads a number (decimal, hex, binary, or scientific notation)
|
||||
func (l *Lexer) readNumber() string {
|
||||
position := l.position
|
||||
|
||||
// Check for hex (0x/0X) or binary (0b/0B) prefix
|
||||
if l.ch == '0' && (l.peekChar() == 'x' || l.peekChar() == 'X') {
|
||||
return l.readHexNumber()
|
||||
}
|
||||
if l.ch == '0' && (l.peekChar() == 'b' || l.peekChar() == 'B') {
|
||||
return l.readBinaryNumber()
|
||||
}
|
||||
|
||||
// Read regular decimal number
|
||||
for isDigit(l.ch) {
|
||||
l.readChar()
|
||||
}
|
||||
|
||||
// Handle decimal points
|
||||
// Handle decimal point
|
||||
if l.ch == '.' && isDigit(l.peekChar()) {
|
||||
l.readChar()
|
||||
l.readChar() // consume '.'
|
||||
for isDigit(l.ch) {
|
||||
l.readChar()
|
||||
}
|
||||
}
|
||||
|
||||
// Handle scientific notation (e/E)
|
||||
if l.ch == 'e' || l.ch == 'E' {
|
||||
l.readChar() // consume 'e'/'E'
|
||||
|
||||
// Optional +/- sign
|
||||
if l.ch == '+' || l.ch == '-' {
|
||||
l.readChar()
|
||||
}
|
||||
|
||||
// Continue reading digits for the exponent
|
||||
for isDigit(l.ch) {
|
||||
l.readChar()
|
||||
}
|
||||
}
|
||||
|
||||
return l.input[position:l.position]
|
||||
}
|
||||
|
||||
// readHexNumber reads a hexadecimal number (0x...)
|
||||
func (l *Lexer) readHexNumber() string {
|
||||
position := l.position
|
||||
l.readChar() // skip '0'
|
||||
l.readChar() // skip 'x'/'X'
|
||||
|
||||
// Continue reading until we hit a non-hex character
|
||||
for isHexDigit(l.ch) || isLetter(l.ch) || isDigit(l.ch) {
|
||||
l.readChar()
|
||||
}
|
||||
|
||||
return l.input[position:l.position]
|
||||
}
|
||||
|
||||
// readBinaryNumber reads a binary number (0b...)
|
||||
func (l *Lexer) readBinaryNumber() string {
|
||||
position := l.position
|
||||
l.readChar() // skip '0'
|
||||
l.readChar() // skip 'b'/'B'
|
||||
|
||||
// Continue reading until we hit a non-digit character
|
||||
for isDigit(l.ch) || isLetter(l.ch) {
|
||||
l.readChar()
|
||||
}
|
||||
|
||||
return l.input[position:l.position]
|
||||
}
|
||||
|
||||
@ -187,3 +240,11 @@ func isLetter(ch byte) bool {
|
||||
func isDigit(ch byte) bool {
|
||||
return '0' <= ch && ch <= '9'
|
||||
}
|
||||
|
||||
func isHexDigit(ch byte) bool {
|
||||
return isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
|
||||
}
|
||||
|
||||
func isBinaryDigit(ch byte) bool {
|
||||
return ch == '0' || ch == '1'
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package parser
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseError represents a parsing error with location information
|
||||
@ -178,11 +179,60 @@ func (p *Parser) parseIdentifier() Expression {
|
||||
|
||||
func (p *Parser) parseNumberLiteral() Expression {
|
||||
lit := &NumberLiteral{}
|
||||
literal := p.curToken.Literal
|
||||
|
||||
value, err := strconv.ParseFloat(p.curToken.Literal, 64)
|
||||
if err != nil {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as number", p.curToken.Literal))
|
||||
return nil
|
||||
var value float64
|
||||
var err error
|
||||
|
||||
// Check for hexadecimal (0x/0X prefix)
|
||||
if strings.HasPrefix(literal, "0x") || strings.HasPrefix(literal, "0X") {
|
||||
// Validate hex format
|
||||
if len(literal) <= 2 {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
|
||||
return nil
|
||||
}
|
||||
hexPart := literal[2:]
|
||||
for _, ch := range hexPart {
|
||||
if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// Parse as hex and convert to float64
|
||||
intVal, parseErr := strconv.ParseInt(literal, 0, 64)
|
||||
if parseErr != nil {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as hexadecimal number", literal))
|
||||
return nil
|
||||
}
|
||||
value = float64(intVal)
|
||||
} else if strings.HasPrefix(literal, "0b") || strings.HasPrefix(literal, "0B") {
|
||||
// Validate binary format
|
||||
if len(literal) <= 2 {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
|
||||
return nil
|
||||
}
|
||||
binaryPart := literal[2:]
|
||||
for _, ch := range binaryPart {
|
||||
if ch != '0' && ch != '1' {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// Parse binary manually since Go doesn't support 0b in ParseInt with base 0
|
||||
binaryStr := literal[2:] // remove "0b" prefix
|
||||
intVal, parseErr := strconv.ParseInt(binaryStr, 2, 64)
|
||||
if parseErr != nil {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as binary number", literal))
|
||||
return nil
|
||||
}
|
||||
value = float64(intVal)
|
||||
} else {
|
||||
// Parse as regular decimal (handles scientific notation automatically)
|
||||
value, err = strconv.ParseFloat(literal, 64)
|
||||
if err != nil {
|
||||
p.addError(fmt.Sprintf("could not parse '%s' as number", literal))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
lit.Value = value
|
||||
|
@ -5,6 +5,259 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtendedNumberLiterals(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected float64
|
||||
desc string
|
||||
}{
|
||||
// Hexadecimal
|
||||
{"0x10", 16.0, "lowercase hex"},
|
||||
{"0X10", 16.0, "uppercase hex"},
|
||||
{"0xff", 255.0, "hex with letters"},
|
||||
{"0XFF", 255.0, "hex with uppercase letters"},
|
||||
{"0x0", 0.0, "hex zero"},
|
||||
{"0xDEADBEEF", 3735928559.0, "large hex"},
|
||||
|
||||
// Binary
|
||||
{"0b1010", 10.0, "lowercase binary"},
|
||||
{"0B1010", 10.0, "uppercase binary"},
|
||||
{"0b0", 0.0, "binary zero"},
|
||||
{"0b1", 1.0, "binary one"},
|
||||
{"0b11111111", 255.0, "8-bit binary"},
|
||||
|
||||
// Scientific notation
|
||||
{"1e2", 100.0, "simple scientific"},
|
||||
{"1E2", 100.0, "uppercase E"},
|
||||
{"1.5e2", 150.0, "decimal with exponent"},
|
||||
{"2e-1", 0.2, "negative exponent"},
|
||||
{"1.23e+4", 12300.0, "positive exponent with +"},
|
||||
{"3.14159e0", 3.14159, "zero exponent"},
|
||||
{"1e10", 1e10, "large exponent"},
|
||||
|
||||
// Regular decimals (should still work)
|
||||
{"42", 42.0, "integer"},
|
||||
{"3.14", 3.14, "decimal"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
testNumberLiteral(t, expr, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtendedNumberAssignments(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
identifier string
|
||||
expected float64
|
||||
desc string
|
||||
}{
|
||||
{"hex = 0xFF", "hex", 255.0, "hex assignment"},
|
||||
{"bin = 0b1111", "bin", 15.0, "binary assignment"},
|
||||
{"sci = 1.5e3", "sci", 1500.0, "scientific assignment"},
|
||||
{"large = 0xDEADBEEF", "large", 3735928559.0, "large hex"},
|
||||
{"small = 2e-5", "small", 0.00002, "small scientific"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
program := p.ParseProgram()
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if len(program.Statements) != 1 {
|
||||
t.Fatalf("expected 1 statement, got %d", len(program.Statements))
|
||||
}
|
||||
|
||||
stmt, ok := program.Statements[0].(*AssignStatement)
|
||||
if !ok {
|
||||
t.Fatalf("expected AssignStatement, got %T", program.Statements[0])
|
||||
}
|
||||
|
||||
if stmt.Name.Value != tt.identifier {
|
||||
t.Errorf("expected identifier %s, got %s", tt.identifier, stmt.Name.Value)
|
||||
}
|
||||
|
||||
testNumberLiteral(t, stmt.Value, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtendedNumberExpressions(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{"0x10 + 0b1010", "(16.00 + 10.00)", "hex + binary"},
|
||||
{"1e2 * 0xFF", "(100.00 * 255.00)", "scientific * hex"},
|
||||
{"0b11 - 1e1", "(3.00 - 10.00)", "binary - scientific"},
|
||||
{"(0x10 + 0b10) * 1e1", "((16.00 + 2.00) * 10.00)", "mixed with precedence"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtendedNumberErrors(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expectedError string
|
||||
desc string
|
||||
}{
|
||||
{"0x", "could not parse '0x' as hexadecimal number", "incomplete hex"},
|
||||
{"0b", "could not parse '0b' as binary number", "incomplete binary"},
|
||||
{"0xGHI", "could not parse '0xGHI' as hexadecimal number", "invalid hex digits"},
|
||||
{"0b123", "could not parse '0b123' as binary number", "invalid binary digits"},
|
||||
{"1e", "could not parse '1e' as number", "incomplete scientific"},
|
||||
{"1e+", "could not parse '1e+' as number", "scientific without digits"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
p.parseExpression(LOWEST)
|
||||
|
||||
errors := p.Errors()
|
||||
if len(errors) == 0 {
|
||||
t.Fatalf("expected parsing errors, got none")
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, err := range errors {
|
||||
if strings.Contains(err.Message, tt.expectedError) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
errorMsgs := make([]string, len(errors))
|
||||
for i, err := range errors {
|
||||
errorMsgs[i] = err.Message
|
||||
}
|
||||
t.Errorf("expected error containing %q, got %v", tt.expectedError, errorMsgs)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtendedNumberStringRepresentation(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{"0xFF", "255.00", "hex string representation"},
|
||||
{"0b1111", "15.00", "binary string representation"},
|
||||
{"1e3", "1000.00", "scientific string representation"},
|
||||
{"1.5e2", "150.00", "decimal scientific string representation"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTableWithExtendedNumbers(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
desc string
|
||||
}{
|
||||
{"{0xFF, 0b1010}", "{255.00, 10.00}", "array with hex and binary"},
|
||||
{"{hex = 0xFF, bin = 0b1010}", "{hex = 255.00, bin = 10.00}", "hash with extended numbers"},
|
||||
{"{1e2, 0x10, 0b10}", "{100.00, 16.00, 2.00}", "mixed number formats"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
checkParserErrors(t, p)
|
||||
|
||||
if expr.String() != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, expr.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLexerExtendedNumbers(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected []Token
|
||||
desc string
|
||||
}{
|
||||
{
|
||||
"0xFF + 0b1010",
|
||||
[]Token{
|
||||
{Type: NUMBER, Literal: "0xFF"},
|
||||
{Type: PLUS, Literal: "+"},
|
||||
{Type: NUMBER, Literal: "0b1010"},
|
||||
{Type: EOF, Literal: ""},
|
||||
},
|
||||
"hex and binary tokens",
|
||||
},
|
||||
{
|
||||
"1.5e-3 * 2E+4",
|
||||
[]Token{
|
||||
{Type: NUMBER, Literal: "1.5e-3"},
|
||||
{Type: STAR, Literal: "*"},
|
||||
{Type: NUMBER, Literal: "2E+4"},
|
||||
{Type: EOF, Literal: ""},
|
||||
},
|
||||
"scientific notation tokens",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
|
||||
for i, expectedToken := range tt.expected {
|
||||
tok := l.NextToken()
|
||||
if tok.Type != expectedToken.Type {
|
||||
t.Errorf("token %d: expected type %v, got %v", i, expectedToken.Type, tok.Type)
|
||||
}
|
||||
if tok.Literal != expectedToken.Literal {
|
||||
t.Errorf("token %d: expected literal %s, got %s", i, expectedToken.Literal, tok.Literal)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Additional existing tests would remain unchanged...
|
||||
func TestLiterals(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
@ -20,7 +273,6 @@ func TestLiterals(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
// Parse as expression directly - literals are not valid statements
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
expr := p.parseExpression(LOWEST)
|
||||
@ -158,7 +410,7 @@ func TestAssignStatements(t *testing.T) {
|
||||
input string
|
||||
expectedIdentifier string
|
||||
expectedValue any
|
||||
isExpression bool // true if expectedValue is expression string representation
|
||||
isExpression bool
|
||||
}{
|
||||
{"x = 42", "x", 42.0, false},
|
||||
{"name = \"test\"", "name", "test", false},
|
||||
@ -188,12 +440,10 @@ func TestAssignStatements(t *testing.T) {
|
||||
}
|
||||
|
||||
if tt.isExpression {
|
||||
// Test the string representation of the expression
|
||||
if stmt.Value.String() != tt.expectedValue.(string) {
|
||||
t.Errorf("expected expression %s, got %s", tt.expectedValue.(string), stmt.Value.String())
|
||||
}
|
||||
} else {
|
||||
// Test the actual value based on type
|
||||
switch expected := tt.expectedValue.(type) {
|
||||
case float64:
|
||||
testNumberLiteral(t, stmt.Value, expected)
|
||||
@ -285,13 +535,10 @@ func TestParsingErrors(t *testing.T) {
|
||||
l := NewLexer(tt.input)
|
||||
p := NewParser(l)
|
||||
|
||||
// Decide parsing strategy based on the type of error we're testing
|
||||
switch tt.input {
|
||||
case "(1 + 2", "+ 5", "1 +", "{1, 2", "{a =", "{a = 1,":
|
||||
// These are expression-level errors
|
||||
p.parseExpression(LOWEST)
|
||||
default:
|
||||
// These are statement-level errors
|
||||
p.ParseProgram()
|
||||
}
|
||||
|
||||
@ -331,7 +578,6 @@ y = "hello"`
|
||||
p := NewParser(l)
|
||||
program := p.ParseProgram()
|
||||
|
||||
// Should have errors but still parse valid statements
|
||||
if !p.HasErrors() {
|
||||
t.Fatal("expected parsing errors")
|
||||
}
|
||||
@ -352,7 +598,6 @@ y = "hello"`
|
||||
t.Error("expected specific assignment error")
|
||||
}
|
||||
|
||||
// Should still have parsed the valid statements
|
||||
validStatements := 0
|
||||
for _, stmt := range program.Statements {
|
||||
if stmt != nil {
|
||||
|
Loading…
x
Reference in New Issue
Block a user