rewrite packet parser as string-based recursive descent

This commit is contained in:
Sky Johnson 2025-08-07 15:13:56 -05:00
parent 46121dcfc6
commit d24ec376a8
9 changed files with 1202 additions and 1145 deletions

View File

@ -78,7 +78,7 @@ func processDirectory(dirPath string, packets map[string]*parser.PacketDef) erro
err := processXMLFile(entryPath, packets)
if err != nil {
log.Printf("Warning: failed to process %s: %v", entryPath, err)
log.Printf("Warning: %s: %v", entryPath, err)
}
}
@ -93,7 +93,7 @@ func processXMLFile(filePath string, packets map[string]*parser.PacketDef) error
parsedPackets, err := parser.Parse(string(content))
if err != nil {
return fmt.Errorf("failed to parse XML: %w", err)
return fmt.Errorf("failed to parse packet def: %w", err)
}
for name, packet := range parsedPackets {

View File

@ -1,358 +0,0 @@
package parser
import (
"fmt"
"sync"
"unicode"
)
// Object pools for heavy reuse
var tokenPool = sync.Pool{
New: func() any {
return &Token{
Attributes: make(map[string]string, 8),
TagStart: -1,
TagEnd: -1,
TextStart: -1,
TextEnd: -1,
}
},
}
// More efficient lexer using byte operations and minimal allocations
type Lexer struct {
input []byte // Use byte slice for faster operations
pos int
line int
col int
}
// Creates a new lexer
func NewLexer(input string) *Lexer {
return &Lexer{
input: []byte(input),
line: 1,
col: 1,
}
}
// Returns next byte without advancing
func (l *Lexer) peek() byte {
if l.pos >= len(l.input) {
return 0
}
return l.input[l.pos]
}
// Advances and returns next byte
func (l *Lexer) next() byte {
if l.pos >= len(l.input) {
return 0
}
ch := l.input[l.pos]
l.pos++
if ch == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
return ch
}
// Checks if a tag should be treated as self-closing (using byte comparison)
func (l *Lexer) isSelfClosingTag(start, end int) bool {
length := end - start
if length < 2 || length > 6 {
return false
}
// Fast byte-based comparison
switch length {
case 2:
return (l.input[start] == 'i' && l.input[start+1] == '8') ||
(l.input[start] == 'f' && l.input[start+1] == '2')
case 3:
return (l.input[start] == 'i' && l.input[start+1] == '1' && l.input[start+2] == '6') ||
(l.input[start] == 'i' && l.input[start+1] == '3' && l.input[start+2] == '2') ||
(l.input[start] == 'i' && l.input[start+1] == '6' && l.input[start+2] == '4') ||
(l.input[start] == 's' && l.input[start+1] == 'i' && l.input[start+2] == '8') ||
(l.input[start] == 'f' && l.input[start+1] == '3' && l.input[start+2] == '2') ||
(l.input[start] == 'f' && l.input[start+1] == '6' && l.input[start+2] == '4')
case 4:
return (l.input[start] == 's' && l.input[start+1] == 'i' &&
l.input[start+2] == '1' && l.input[start+3] == '6') ||
(l.input[start] == 's' && l.input[start+1] == 'i' &&
l.input[start+2] == '3' && l.input[start+3] == '2') ||
(l.input[start] == 's' && l.input[start+1] == 'i' &&
l.input[start+2] == '6' && l.input[start+3] == '4') ||
(l.input[start] == 'c' && l.input[start+1] == 'h' &&
l.input[start+2] == 'a' && l.input[start+3] == 'r') ||
(l.input[start] == 's' && l.input[start+1] == 't' &&
l.input[start+2] == 'r' && l.input[start+3] == '8')
case 5:
return (l.input[start] == 'c' && l.input[start+1] == 'o' &&
l.input[start+2] == 'l' && l.input[start+3] == 'o' &&
l.input[start+4] == 'r') ||
(l.input[start] == 'e' && l.input[start+1] == 'q' &&
l.input[start+2] == 'u' && l.input[start+3] == 'i' &&
l.input[start+4] == 'p') ||
(l.input[start] == 's' && l.input[start+1] == 't' &&
l.input[start+2] == 'r' && l.input[start+3] == '1' &&
l.input[start+4] == '6') ||
(l.input[start] == 's' && l.input[start+1] == 't' &&
l.input[start+2] == 'r' && l.input[start+3] == '3' &&
l.input[start+4] == '2')
case 6:
return (l.input[start] == 'd' && l.input[start+1] == 'o' &&
l.input[start+2] == 'u' && l.input[start+3] == 'b' &&
l.input[start+4] == 'l' && l.input[start+5] == 'e')
}
return false
}
// Skips whitespace using byte operations
func (l *Lexer) skipWhitespace() {
for l.pos < len(l.input) {
ch := l.input[l.pos]
if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
if ch == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
l.pos++
} else {
break
}
}
}
// Optimized attribute parsing with minimal allocations - FIXED BUG
func (l *Lexer) parseAttributes(attrs map[string]string) error {
// Clear existing attributes without deallocating
for k := range attrs {
delete(attrs, k)
}
for {
l.skipWhitespace()
if l.pos >= len(l.input) || l.peek() == '>' ||
(l.peek() == '/' && l.pos+1 < len(l.input) && l.input[l.pos+1] == '>') {
break
}
// Read attribute name using byte operations
nameStart := l.pos
for l.pos < len(l.input) {
ch := l.input[l.pos]
if ch == '=' || ch == ' ' || ch == '\t' || ch == '\n' || ch == '>' {
break
}
l.pos++
if ch != '\n' {
l.col++
}
}
nameEnd := l.pos // FIXED: Store end of name here
if nameStart == nameEnd {
break
}
l.skipWhitespace()
if l.peek() != '=' {
return fmt.Errorf("expected '=' after attribute name")
}
l.next() // skip '='
l.skipWhitespace()
// Read attribute value
quote := l.peek()
if quote != '"' && quote != '\'' {
return fmt.Errorf("attribute value must be quoted")
}
l.next() // skip opening quote
valueStart := l.pos
for l.pos < len(l.input) && l.input[l.pos] != quote {
if l.input[l.pos] == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
l.pos++
}
if l.pos >= len(l.input) {
return fmt.Errorf("unclosed attribute value")
}
// FIXED: Correct name and value extraction
name := string(l.input[nameStart:nameEnd])
value := string(l.input[valueStart:l.pos])
attrs[name] = value
l.next() // skip closing quote
}
return nil
}
// Optimized token generation with pooling
func (l *Lexer) NextToken() *Token {
token := tokenPool.Get().(*Token)
token.Type = TokenError
token.TagStart = -1
token.TagEnd = -1
token.TextStart = -1
token.TextEnd = -1
token.Line = l.line
token.Col = l.col
l.skipWhitespace()
if l.pos >= len(l.input) {
token.Type = TokenEOF
return token
}
if l.peek() == '<' {
l.next() // skip '<'
// Check for comment using byte comparison
if l.pos+2 < len(l.input) &&
l.input[l.pos] == '!' && l.input[l.pos+1] == '-' && l.input[l.pos+2] == '-' {
l.pos += 3
start := l.pos
// Find end of comment efficiently
for l.pos+2 < len(l.input) {
if l.input[l.pos] == '-' && l.input[l.pos+1] == '-' && l.input[l.pos+2] == '>' {
token.Type = TokenComment
token.TextStart = start
token.TextEnd = l.pos
l.pos += 3
return token
}
if l.input[l.pos] == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
l.pos++
}
token.Type = TokenError
return token
}
// Check for closing tag
if l.peek() == '/' {
l.next() // skip '/'
start := l.pos
for l.pos < len(l.input) && l.input[l.pos] != '>' {
l.pos++
l.col++
}
if l.pos >= len(l.input) {
token.Type = TokenError
return token
}
token.Type = TokenCloseTag
token.TagStart = start
token.TagEnd = l.pos
l.next() // skip '>'
return token
}
// Opening or self-closing tag
start := l.pos
for l.pos < len(l.input) {
ch := l.input[l.pos]
if ch == '>' || ch == '/' || ch == ' ' || ch == '\t' || ch == '\n' {
break
}
l.pos++
l.col++
}
if start == l.pos {
token.Type = TokenError
return token
}
token.TagStart = start
token.TagEnd = l.pos
if err := l.parseAttributes(token.Attributes); err != nil {
token.Type = TokenError
return token
}
l.skipWhitespace()
if l.pos >= len(l.input) {
token.Type = TokenError
return token
}
if l.peek() == '/' && l.pos+1 < len(l.input) && l.input[l.pos+1] == '>' {
token.Type = TokenSelfCloseTag
l.pos += 2
} else {
// Check if this is a self-closing field type
if l.isSelfClosingTag(token.TagStart, token.TagEnd) {
token.Type = TokenSelfCloseTag
} else {
token.Type = TokenOpenTag
}
if l.peek() == '>' {
l.next()
} else {
token.Type = TokenError
return token
}
}
return token
}
// Text content - find range without copying
start := l.pos
for l.pos < len(l.input) && l.input[l.pos] != '<' {
if l.input[l.pos] == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
l.pos++
}
// Trim whitespace from range
for start < l.pos && unicode.IsSpace(rune(l.input[start])) {
start++
}
end := l.pos
for end > start && unicode.IsSpace(rune(l.input[end-1])) {
end--
}
if start < end {
token.Type = TokenText
token.TextStart = start
token.TextEnd = end
return token
}
// Skip empty text, get next token
return l.NextToken()
}
// Returns token to pool
func (l *Lexer) ReleaseToken(token *Token) {
if token != nil {
tokenPool.Put(token)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -377,8 +377,7 @@ func TestSubstructReference(t *testing.T) {
</version>
</packet>`
parser := NewParser(pml)
packets, err := parser.Parse()
packets, err := Parse(pml)
if err != nil {
t.Fatalf("Parse failed: %v", err)
}

View File

@ -1,42 +0,0 @@
package parser
// Token types for PML parsing
type TokenType int
const (
TokenError TokenType = iota
TokenOpenTag
TokenCloseTag
TokenSelfCloseTag
TokenText
TokenComment
TokenEOF
)
// Represents a parsed token with string ranges instead of copies
type Token struct {
Type TokenType
TagStart int // Start index in input for tag name
TagEnd int // End index in input for tag name
TextStart int // Start index for text content
TextEnd int // End index for text content
Attributes map[string]string
Line int
Col int
}
// Gets tag name from input (avoids allocation until needed)
func (t *Token) Tag(input string) string {
if t.TagStart >= 0 && t.TagEnd > t.TagStart {
return input[t.TagStart:t.TagEnd]
}
return ""
}
// Gets text content from input
func (t *Token) Text(input string) string {
if t.TextStart >= 0 && t.TextEnd > t.TextStart {
return input[t.TextStart:t.TextEnd]
}
return ""
}

View File

@ -10,7 +10,7 @@
<u32 name="unknown2" size="2">
<u32 name="technique">
<u32 name="knowledge">
<u8 name="level" size="1 ">
<u8 name="level" size="1">
<u32 name="unknown3">
<char name="recipe_book" size="200">
<char name="device" size="40">

View File

@ -19,13 +19,13 @@
<i32 name="item_id2">
<u16 name="stack_size2">
<u8 name="unknown7" size="4">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>
@ -122,13 +122,13 @@
<i32 name="item_id2">
<u16 name="stack_size2">
<u8 name="unknown7" size="4">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>
@ -156,13 +156,13 @@
<u32 name="status2">
<u32 name="station_cash">
<u8 name="unknown7" size="2">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>
@ -191,13 +191,13 @@
<u32 name="status2">
<u32 name="station_cash">
<u8 name="unknown7" size="2">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>
@ -226,13 +226,13 @@
<u32 name="status2">
<u32 name="station_cash">
<u8 name="unknown7" size="2">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str8 name="description">
<u8 name="unknown" size="3">
@ -263,13 +263,13 @@
<u32 name="status">
<u32 name="station_cash">
<u8 name="unknown7" size="4">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>
@ -300,13 +300,13 @@
<u32 name="status2">
<u32 name="station_cash">
<u8 name="unknown7" size="2">
<u8 name="num_tokens" size=" 1">
<u8 name="num_tokens" size="1">
<array name="token_array" count="var:num_tokens">
<u16 name="token_icon" size=" 1">
<u16 name="token_qty" size=" 1">
<i32 name="token_id" size=" 1">
<i32 name="token_id2" size=" 1">
<str16 name="token_name" size=" 1">
<u16 name="token_icon" size="1">
<u16 name="token_qty" size="1">
<i32 name="token_id" size="1">
<i32 name="token_id2" size="1">
<str16 name="token_name" size="1">
</array>
<str16 name="description">
</array>

33
test_empty_packets.go Normal file
View File

@ -0,0 +1,33 @@
package main
import (
"fmt"
"log"
"eq2emu/internal/packets"
)
func main() {
// Enable all logging to see warnings
log.SetFlags(log.LstdFlags | log.Lshortfile)
// This will trigger the init() function in the packets loader
count := packets.GetPacketCount()
fmt.Printf("Loaded %d packet definitions\n", count)
// Get all packet names to look for any patterns
names := packets.GetPacketNames()
if len(names) == 0 {
fmt.Println("No packets loaded!")
return
}
// Look for packets that might be empty
fmt.Println("Checking for potentially problematic packets...")
for _, name := range names {
if packet, exists := packets.GetPacket(name); exists {
if len(packet.Fields) == 0 {
fmt.Printf("Empty packet found: %s\n", name)
}
}
}
}

39
test_specific_empty.go Normal file
View File

@ -0,0 +1,39 @@
package main
import (
"fmt"
"eq2emu/internal/packets/parser"
)
func main() {
// Test parsing an empty packet that might fail
emptyPacketXML := `<packet name="EmptyTest">
<version number="1">
</version>
</packet>`
fmt.Println("Testing empty packet parsing...")
packets, err := parser.Parse(emptyPacketXML)
if err != nil {
fmt.Printf("ERROR parsing empty packet: %v\n", err)
} else {
fmt.Printf("SUCCESS: Parsed %d packets\n", len(packets))
if packet, exists := packets["EmptyTest"]; exists {
fmt.Printf("EmptyTest packet has %d fields\n", len(packet.Fields))
}
}
// Test a completely self-closing packet
selfClosingXML := `<packet name="SelfClosingTest" />`
fmt.Println("\nTesting self-closing packet parsing...")
packets2, err2 := parser.Parse(selfClosingXML)
if err2 != nil {
fmt.Printf("ERROR parsing self-closing packet: %v\n", err2)
} else {
fmt.Printf("SUCCESS: Parsed %d packets\n", len(packets2))
if packet, exists := packets2["SelfClosingTest"]; exists {
fmt.Printf("SelfClosingTest packet has %d fields\n", len(packet.Fields))
}
}
}