From d24ec376a8205f3d9a98910722a91b22e09ffe4f Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Thu, 7 Aug 2025 15:13:56 -0500 Subject: [PATCH] rewrite packet parser as string-based recursive descent --- internal/packets/loader.go | 4 +- internal/packets/parser/lexer.go | 358 ---- internal/packets/parser/parser.go | 1782 ++++++++++------- internal/packets/parser/parser_test.go | 3 +- internal/packets/parser/tokens.go | 42 - internal/packets/xml/world/RecipeDetails.xml | 2 +- internal/packets/xml/world/UpdateMerchant.xml | 84 +- test_empty_packets.go | 33 + test_specific_empty.go | 39 + 9 files changed, 1202 insertions(+), 1145 deletions(-) delete mode 100644 internal/packets/parser/lexer.go delete mode 100644 internal/packets/parser/tokens.go create mode 100644 test_empty_packets.go create mode 100644 test_specific_empty.go diff --git a/internal/packets/loader.go b/internal/packets/loader.go index b7a7101..acb86b1 100644 --- a/internal/packets/loader.go +++ b/internal/packets/loader.go @@ -78,7 +78,7 @@ func processDirectory(dirPath string, packets map[string]*parser.PacketDef) erro err := processXMLFile(entryPath, packets) if err != nil { - log.Printf("Warning: failed to process %s: %v", entryPath, err) + log.Printf("Warning: %s: %v", entryPath, err) } } @@ -93,7 +93,7 @@ func processXMLFile(filePath string, packets map[string]*parser.PacketDef) error parsedPackets, err := parser.Parse(string(content)) if err != nil { - return fmt.Errorf("failed to parse XML: %w", err) + return fmt.Errorf("failed to parse packet def: %w", err) } for name, packet := range parsedPackets { diff --git a/internal/packets/parser/lexer.go b/internal/packets/parser/lexer.go deleted file mode 100644 index 8a992c7..0000000 --- a/internal/packets/parser/lexer.go +++ /dev/null @@ -1,358 +0,0 @@ -package parser - -import ( - "fmt" - "sync" - "unicode" -) - -// Object pools for heavy reuse -var tokenPool = sync.Pool{ - New: func() any { - return &Token{ - Attributes: make(map[string]string, 8), - TagStart: -1, - TagEnd: -1, - TextStart: -1, - TextEnd: -1, - } - }, -} - -// More efficient lexer using byte operations and minimal allocations -type Lexer struct { - input []byte // Use byte slice for faster operations - pos int - line int - col int -} - -// Creates a new lexer -func NewLexer(input string) *Lexer { - return &Lexer{ - input: []byte(input), - line: 1, - col: 1, - } -} - -// Returns next byte without advancing -func (l *Lexer) peek() byte { - if l.pos >= len(l.input) { - return 0 - } - return l.input[l.pos] -} - -// Advances and returns next byte -func (l *Lexer) next() byte { - if l.pos >= len(l.input) { - return 0 - } - ch := l.input[l.pos] - l.pos++ - if ch == '\n' { - l.line++ - l.col = 1 - } else { - l.col++ - } - return ch -} - -// Checks if a tag should be treated as self-closing (using byte comparison) -func (l *Lexer) isSelfClosingTag(start, end int) bool { - length := end - start - if length < 2 || length > 6 { - return false - } - - // Fast byte-based comparison - switch length { - case 2: - return (l.input[start] == 'i' && l.input[start+1] == '8') || - (l.input[start] == 'f' && l.input[start+1] == '2') - case 3: - return (l.input[start] == 'i' && l.input[start+1] == '1' && l.input[start+2] == '6') || - (l.input[start] == 'i' && l.input[start+1] == '3' && l.input[start+2] == '2') || - (l.input[start] == 'i' && l.input[start+1] == '6' && l.input[start+2] == '4') || - (l.input[start] == 's' && l.input[start+1] == 'i' && l.input[start+2] == '8') || - (l.input[start] == 'f' && l.input[start+1] == '3' && l.input[start+2] == '2') || - (l.input[start] == 'f' && l.input[start+1] == '6' && l.input[start+2] == '4') - case 4: - return (l.input[start] == 's' && l.input[start+1] == 'i' && - l.input[start+2] == '1' && l.input[start+3] == '6') || - (l.input[start] == 's' && l.input[start+1] == 'i' && - l.input[start+2] == '3' && l.input[start+3] == '2') || - (l.input[start] == 's' && l.input[start+1] == 'i' && - l.input[start+2] == '6' && l.input[start+3] == '4') || - (l.input[start] == 'c' && l.input[start+1] == 'h' && - l.input[start+2] == 'a' && l.input[start+3] == 'r') || - (l.input[start] == 's' && l.input[start+1] == 't' && - l.input[start+2] == 'r' && l.input[start+3] == '8') - case 5: - return (l.input[start] == 'c' && l.input[start+1] == 'o' && - l.input[start+2] == 'l' && l.input[start+3] == 'o' && - l.input[start+4] == 'r') || - (l.input[start] == 'e' && l.input[start+1] == 'q' && - l.input[start+2] == 'u' && l.input[start+3] == 'i' && - l.input[start+4] == 'p') || - (l.input[start] == 's' && l.input[start+1] == 't' && - l.input[start+2] == 'r' && l.input[start+3] == '1' && - l.input[start+4] == '6') || - (l.input[start] == 's' && l.input[start+1] == 't' && - l.input[start+2] == 'r' && l.input[start+3] == '3' && - l.input[start+4] == '2') - case 6: - return (l.input[start] == 'd' && l.input[start+1] == 'o' && - l.input[start+2] == 'u' && l.input[start+3] == 'b' && - l.input[start+4] == 'l' && l.input[start+5] == 'e') - } - return false -} - -// Skips whitespace using byte operations -func (l *Lexer) skipWhitespace() { - for l.pos < len(l.input) { - ch := l.input[l.pos] - if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { - if ch == '\n' { - l.line++ - l.col = 1 - } else { - l.col++ - } - l.pos++ - } else { - break - } - } -} - -// Optimized attribute parsing with minimal allocations - FIXED BUG -func (l *Lexer) parseAttributes(attrs map[string]string) error { - // Clear existing attributes without deallocating - for k := range attrs { - delete(attrs, k) - } - - for { - l.skipWhitespace() - if l.pos >= len(l.input) || l.peek() == '>' || - (l.peek() == '/' && l.pos+1 < len(l.input) && l.input[l.pos+1] == '>') { - break - } - - // Read attribute name using byte operations - nameStart := l.pos - for l.pos < len(l.input) { - ch := l.input[l.pos] - if ch == '=' || ch == ' ' || ch == '\t' || ch == '\n' || ch == '>' { - break - } - l.pos++ - if ch != '\n' { - l.col++ - } - } - - nameEnd := l.pos // FIXED: Store end of name here - - if nameStart == nameEnd { - break - } - - l.skipWhitespace() - if l.peek() != '=' { - return fmt.Errorf("expected '=' after attribute name") - } - l.next() // skip '=' - l.skipWhitespace() - - // Read attribute value - quote := l.peek() - if quote != '"' && quote != '\'' { - return fmt.Errorf("attribute value must be quoted") - } - l.next() // skip opening quote - - valueStart := l.pos - for l.pos < len(l.input) && l.input[l.pos] != quote { - if l.input[l.pos] == '\n' { - l.line++ - l.col = 1 - } else { - l.col++ - } - l.pos++ - } - - if l.pos >= len(l.input) { - return fmt.Errorf("unclosed attribute value") - } - - // FIXED: Correct name and value extraction - name := string(l.input[nameStart:nameEnd]) - value := string(l.input[valueStart:l.pos]) - attrs[name] = value - - l.next() // skip closing quote - } - - return nil -} - -// Optimized token generation with pooling -func (l *Lexer) NextToken() *Token { - token := tokenPool.Get().(*Token) - token.Type = TokenError - token.TagStart = -1 - token.TagEnd = -1 - token.TextStart = -1 - token.TextEnd = -1 - token.Line = l.line - token.Col = l.col - - l.skipWhitespace() - if l.pos >= len(l.input) { - token.Type = TokenEOF - return token - } - - if l.peek() == '<' { - l.next() // skip '<' - - // Check for comment using byte comparison - if l.pos+2 < len(l.input) && - l.input[l.pos] == '!' && l.input[l.pos+1] == '-' && l.input[l.pos+2] == '-' { - l.pos += 3 - start := l.pos - // Find end of comment efficiently - for l.pos+2 < len(l.input) { - if l.input[l.pos] == '-' && l.input[l.pos+1] == '-' && l.input[l.pos+2] == '>' { - token.Type = TokenComment - token.TextStart = start - token.TextEnd = l.pos - l.pos += 3 - return token - } - if l.input[l.pos] == '\n' { - l.line++ - l.col = 1 - } else { - l.col++ - } - l.pos++ - } - token.Type = TokenError - return token - } - - // Check for closing tag - if l.peek() == '/' { - l.next() // skip '/' - start := l.pos - for l.pos < len(l.input) && l.input[l.pos] != '>' { - l.pos++ - l.col++ - } - if l.pos >= len(l.input) { - token.Type = TokenError - return token - } - token.Type = TokenCloseTag - token.TagStart = start - token.TagEnd = l.pos - l.next() // skip '>' - return token - } - - // Opening or self-closing tag - start := l.pos - for l.pos < len(l.input) { - ch := l.input[l.pos] - if ch == '>' || ch == '/' || ch == ' ' || ch == '\t' || ch == '\n' { - break - } - l.pos++ - l.col++ - } - - if start == l.pos { - token.Type = TokenError - return token - } - - token.TagStart = start - token.TagEnd = l.pos - - if err := l.parseAttributes(token.Attributes); err != nil { - token.Type = TokenError - return token - } - - l.skipWhitespace() - if l.pos >= len(l.input) { - token.Type = TokenError - return token - } - - if l.peek() == '/' && l.pos+1 < len(l.input) && l.input[l.pos+1] == '>' { - token.Type = TokenSelfCloseTag - l.pos += 2 - } else { - // Check if this is a self-closing field type - if l.isSelfClosingTag(token.TagStart, token.TagEnd) { - token.Type = TokenSelfCloseTag - } else { - token.Type = TokenOpenTag - } - if l.peek() == '>' { - l.next() - } else { - token.Type = TokenError - return token - } - } - - return token - } - - // Text content - find range without copying - start := l.pos - for l.pos < len(l.input) && l.input[l.pos] != '<' { - if l.input[l.pos] == '\n' { - l.line++ - l.col = 1 - } else { - l.col++ - } - l.pos++ - } - - // Trim whitespace from range - for start < l.pos && unicode.IsSpace(rune(l.input[start])) { - start++ - } - end := l.pos - for end > start && unicode.IsSpace(rune(l.input[end-1])) { - end-- - } - - if start < end { - token.Type = TokenText - token.TextStart = start - token.TextEnd = end - return token - } - - // Skip empty text, get next token - return l.NextToken() -} - -// Returns token to pool -func (l *Lexer) ReleaseToken(token *Token) { - if token != nil { - tokenPool.Put(token) - } -} diff --git a/internal/packets/parser/parser.go b/internal/packets/parser/parser.go index 67d301c..8b54f34 100644 --- a/internal/packets/parser/parser.go +++ b/internal/packets/parser/parser.go @@ -5,54 +5,1090 @@ import ( "fmt" "strconv" "strings" - "sync" + "unicode" ) -var ( - fieldOrderPool = sync.Pool{ - New: func() any { - slice := make([]string, 0, 32) - return &slice - }, - } - conditionBuilder = sync.Pool{ - New: func() any { - buf := make([]byte, 0, 128) - return &buf - }, - } - stringBuilderPool = sync.Pool{ - New: func() any { - return &stringBuilder{buf: make([]byte, 0, 64)} - }, - } -) - -type stringBuilder struct { - buf []byte -} - -func (sb *stringBuilder) reset() { - sb.buf = sb.buf[:0] -} - -func (sb *stringBuilder) writeString(s string) { - sb.buf = append(sb.buf, s...) -} - -func (sb *stringBuilder) string() string { - return string(sb.buf) -} - -// Parses PML into PacketDef structures +// Parser is a single-pass recursive descent parser for packet definitions type Parser struct { - lexer *Lexer - current *Token input string + pos int + line int + col int substructs map[string]*PacketDef templates map[string]*PacketDef tagStack []string - fieldNames []string +} + +// NewParser creates a new string-based parser +func NewParser(input string) *Parser { + return &Parser{ + input: input, + line: 1, + col: 1, + substructs: make(map[string]*PacketDef), + templates: make(map[string]*PacketDef), + tagStack: make([]string, 0, 16), + } +} + +// peek returns the current character without advancing +func (p *Parser) peek() rune { + if p.pos >= len(p.input) { + return 0 + } + return rune(p.input[p.pos]) +} + +// peekN looks ahead n characters +func (p *Parser) peekN(n int) rune { + pos := p.pos + n + if pos >= len(p.input) { + return 0 + } + return rune(p.input[pos]) +} + +// advance moves to the next character and returns it +func (p *Parser) advance() rune { + if p.pos >= len(p.input) { + return 0 + } + + ch := rune(p.input[p.pos]) + p.pos++ + + if ch == '\n' { + p.line++ + p.col = 1 + } else { + p.col++ + } + + return ch +} + +// skipWhitespace skips whitespace characters +func (p *Parser) skipWhitespace() { + for p.pos < len(p.input) && unicode.IsSpace(p.peek()) { + p.advance() + } +} + +// consumeChar consumes the expected character, returns true if found +func (p *Parser) consumeChar(expected rune) bool { + if p.peek() == expected { + p.advance() + return true + } + return false +} + +// consumeString consumes the expected string, returns true if found +func (p *Parser) consumeString(expected string) bool { + if p.pos+len(expected) > len(p.input) { + return false + } + + if p.input[p.pos:p.pos+len(expected)] == expected { + for range expected { + p.advance() + } + return true + } + return false +} + +// parseIdentifier parses an identifier (alphanumeric + underscore) +func (p *Parser) parseIdentifier() (string, error) { + start := p.pos + + if p.pos >= len(p.input) || (!unicode.IsLetter(p.peek()) && p.peek() != '_') { + return "", fmt.Errorf("expected identifier at line %d, col %d", p.line, p.col) + } + + for p.pos < len(p.input) { + ch := p.peek() + if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' || ch == '-' { + p.advance() + } else { + break + } + } + + return p.input[start:p.pos], nil +} + +// parseQuotedString parses a quoted string value +func (p *Parser) parseQuotedString() (string, error) { + quote := p.peek() + if quote != '"' && quote != '\'' { + return "", fmt.Errorf("expected quoted string at line %d, col %d", p.line, p.col) + } + + p.advance() // consume opening quote + start := p.pos + + for p.pos < len(p.input) && p.peek() != quote { + if p.peek() == '\\' { + p.advance() // consume backslash + if p.pos < len(p.input) { + p.advance() // consume escaped character + } + } else { + p.advance() + } + } + + if p.pos >= len(p.input) { + return "", fmt.Errorf("unclosed quoted string at line %d", p.line) + } + + value := p.input[start:p.pos] + p.advance() // consume closing quote + return value, nil +} + +// parseAttributes parses tag attributes +func (p *Parser) parseAttributes() (map[string]string, error) { + attrs := make(map[string]string) + + for { + p.skipWhitespace() + + // Check for end of tag + if p.pos >= len(p.input) || p.peek() == '>' || (p.peek() == '/' && p.peekN(1) == '>') { + break + } + + // Parse attribute name + name, err := p.parseIdentifier() + if err != nil { + return nil, err + } + + p.skipWhitespace() + if !p.consumeChar('=') { + return nil, fmt.Errorf("expected '=' after attribute name '%s' at line %d", name, p.line) + } + + p.skipWhitespace() + + // Parse attribute value + value, err := p.parseQuotedString() + if err != nil { + return nil, err + } + + attrs[name] = strings.TrimSpace(value) + } + + return attrs, nil +} + +// parseComment skips over comment content +func (p *Parser) parseComment() error { + // We're at the start of + for p.pos+2 < len(p.input) { + if p.consumeString("-->") { + return nil + } + p.advance() + } + + return fmt.Errorf("unclosed comment at line %d", p.line) +} + +// Tag represents a parsed XML tag +type Tag struct { + Name string + Attributes map[string]string + SelfClosing bool + IsClosing bool +} + +// parseTag parses an opening, closing, or self-closing tag +func (p *Parser) parseTag() (*Tag, error) { + if !p.consumeChar('<') { + return nil, fmt.Errorf("expected '<' at line %d", p.line) + } + + // Check for comment + if p.peek() == '!' && p.peekN(1) == '-' && p.peekN(2) == '-' { + // We already consumed the '<', so parse comment from here + if !p.consumeString("!--") { + return nil, fmt.Errorf("expected ' + for p.pos+2 < len(p.input) { + if p.consumeString("-->") { + return p.parseTag() // recursively get the next tag + } + p.advance() + } + + return nil, fmt.Errorf("unclosed comment at line %d", p.line) + } + + tag := &Tag{} + + // Check for closing tag + if p.consumeChar('/') { + tag.IsClosing = true + name, err := p.parseIdentifier() + if err != nil { + return nil, err + } + tag.Name = name + + p.skipWhitespace() + if !p.consumeChar('>') { + return nil, fmt.Errorf("expected '>' after closing tag '%s' at line %d", name, p.line) + } + return tag, nil + } + + // Parse tag name + name, err := p.parseIdentifier() + if err != nil { + return nil, err + } + tag.Name = name + + // Parse attributes + attrs, err := p.parseAttributes() + if err != nil { + return nil, err + } + tag.Attributes = attrs + + p.skipWhitespace() + + // Check for self-closing + if p.consumeString("/>") { + tag.SelfClosing = true + } else if p.consumeChar('>') { + // Regular opening tag + } else { + return nil, fmt.Errorf("expected '>' or '/>' after tag '%s' at line %d", name, p.line) + } + + return tag, nil +} + +// pushTag pushes a tag name onto the stack for validation +func (p *Parser) pushTag(tag string) { + p.tagStack = append(p.tagStack, tag) +} + +// popTag pops and validates the closing tag +func (p *Parser) popTag(expectedTag string) error { + if len(p.tagStack) == 0 { + return fmt.Errorf("unexpected closing tag '%s' at line %d", expectedTag, p.line) + } + + lastTag := p.tagStack[len(p.tagStack)-1] + if lastTag != expectedTag { + return fmt.Errorf("mismatched closing tag: expected '%s', got '%s' at line %d", lastTag, expectedTag, p.line) + } + + p.tagStack = p.tagStack[:len(p.tagStack)-1] + return nil +} + +// validateAllTagsClosed checks for unclosed tags +func (p *Parser) validateAllTagsClosed() error { + if len(p.tagStack) > 0 { + return fmt.Errorf("unclosed tag '%s'", p.tagStack[len(p.tagStack)-1]) + } + return nil +} + +// Helper functions for robust attribute parsing +func (p *Parser) parseIntAttribute(value string, attributeName string) (int, error) { + if value == "" { + return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) + } + + cleanValue := strings.TrimSpace(value) + if cleanValue == "" { + return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) + } + + result, err := strconv.Atoi(cleanValue) + if err != nil { + return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) + } + + return result, nil +} + +func (p *Parser) parseUintAttribute(value string, attributeName string) (uint32, error) { + if value == "" { + return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) + } + + cleanValue := strings.TrimSpace(value) + if cleanValue == "" { + return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) + } + + result, err := strconv.ParseUint(cleanValue, 10, 32) + if err != nil { + return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) + } + + return uint32(result), nil +} + +func (p *Parser) parseInt8Attribute(value string, attributeName string) (int8, error) { + if value == "" { + return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) + } + + cleanValue := strings.TrimSpace(value) + if cleanValue == "" { + return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) + } + + result, err := strconv.ParseInt(cleanValue, 10, 8) + if err != nil { + return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) + } + + return int8(result), nil +} + +// parseFieldNames parses comma-separated field names +func (p *Parser) parseFieldNames(nameAttr string) []string { + if nameAttr == "" { + return nil + } + + // Fast path for single name + if !strings.Contains(nameAttr, ",") { + name := strings.TrimSpace(nameAttr) + if name != "" { + return []string{name} + } + return nil + } + + // Parse multiple names + names := strings.Split(nameAttr, ",") + result := make([]string, 0, len(names)) + for _, name := range names { + if trimmed := strings.TrimSpace(name); trimmed != "" { + result = append(result, trimmed) + } + } + + return result +} + +// Parse parses the entire packet definition document +func (p *Parser) Parse() (map[string]*PacketDef, error) { + packets := make(map[string]*PacketDef) + + for p.pos < len(p.input) { + p.skipWhitespace() + if p.pos >= len(p.input) { + break + } + + if p.peek() != '<' { + return nil, fmt.Errorf("expected '<' at line %d", p.line) + } + + // Handle comments at the top level + if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { + err := p.parseComment() + if err != nil { + return nil, err + } + continue + } + + tag, err := p.parseTag() + if err != nil { + return nil, err + } + + switch tag.Name { + case "packet": + name := tag.Attributes["name"] + packet, err := p.parsePacket(tag) + if err != nil { + return nil, err + } + if name != "" { + packets[name] = packet + } + + case "substruct": + name := tag.Attributes["name"] + substruct, err := p.parseSubstruct(tag) + if err != nil { + return nil, err + } + if name != "" { + p.substructs[name] = substruct + } + + case "template": + name := tag.Attributes["name"] + if name != "" { + err := p.parseTemplateDefinition(tag, name) + if err != nil { + return nil, err + } + } + + default: + return nil, fmt.Errorf("unexpected top-level tag '%s' at line %d", tag.Name, p.line) + } + } + + if err := p.validateAllTagsClosed(); err != nil { + return nil, err + } + + return packets, nil +} + +// parsePacket parses a packet element +func (p *Parser) parsePacket(openTag *Tag) (*PacketDef, error) { + packetDef := NewPacketDef(16) + + if openTag.SelfClosing { + return packetDef, nil + } + + p.pushTag("packet") + + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return nil, fmt.Errorf("unexpected end of input in packet") + } + + if p.peek() != '<' { + return nil, fmt.Errorf("expected '<' at line %d", p.line) + } + + // Handle comments + if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { + err := p.parseComment() + if err != nil { + return nil, err + } + continue + } + + tag, err := p.parseTag() + if err != nil { + return nil, err + } + + if tag.IsClosing { + if tag.Name != "packet" { + return nil, fmt.Errorf("expected closing tag 'packet', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("packet"); err != nil { + return nil, err + } + break + } + + if tag.Name == "version" { + err := p.parseVersion(tag, packetDef) + if err != nil { + return nil, err + } + } else { + return nil, fmt.Errorf("unexpected tag '%s' in packet at line %d", tag.Name, p.line) + } + } + + return packetDef, nil +} + +// parseSubstruct parses a substruct element +func (p *Parser) parseSubstruct(openTag *Tag) (*PacketDef, error) { + packetDef := NewPacketDef(16) + + if openTag.SelfClosing { + return packetDef, nil + } + + p.pushTag("substruct") + + // Check if this substruct contains version elements + hasVersions := false + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return nil, fmt.Errorf("unexpected end of input in substruct") + } + + if p.peek() != '<' { + return nil, fmt.Errorf("expected '<' at line %d", p.line) + } + + tag, err := p.parseTag() + if err != nil { + return nil, err + } + + if tag.IsClosing { + if tag.Name != "substruct" { + return nil, fmt.Errorf("expected closing tag 'substruct', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("substruct"); err != nil { + return nil, err + } + break + } + + if tag.Name == "version" { + hasVersions = true + err := p.parseVersion(tag, packetDef) + if err != nil { + return nil, err + } + } else if hasVersions { + return nil, fmt.Errorf("unexpected tag '%s' after version in substruct at line %d", tag.Name, p.line) + } else { + // No versions found, parse as direct elements + fieldOrder := make([]string, 0) + err := p.parseElement(tag, packetDef, &fieldOrder, "") + if err != nil { + return nil, err + } + + // Continue parsing remaining elements + for { + p.skipWhitespace() + if p.pos >= len(p.input) || p.peek() != '<' { + break + } + + nextTag, err := p.parseTag() + if err != nil { + return nil, err + } + + if nextTag.IsClosing { + if nextTag.Name != "substruct" { + return nil, fmt.Errorf("expected closing tag 'substruct', got '%s' at line %d", nextTag.Name, p.line) + } + if err := p.popTag("substruct"); err != nil { + return nil, err + } + break + } + + err = p.parseElement(nextTag, packetDef, &fieldOrder, "") + if err != nil { + return nil, err + } + } + + // Set field order for version 1 + packetDef.Orders[1] = make([]string, len(fieldOrder)) + copy(packetDef.Orders[1], fieldOrder) + break + } + } + + return packetDef, nil +} + +// parseVersion parses a version element +func (p *Parser) parseVersion(openTag *Tag, packetDef *PacketDef) error { + version := uint32(1) + if v := openTag.Attributes["number"]; v != "" { + if parsed, err := p.parseUintAttribute(v, "number"); err == nil { + version = parsed + } + // Don't fail on invalid version numbers, just use default + } + + fieldOrder := make([]string, 0) + + if openTag.SelfClosing { + packetDef.Orders[version] = fieldOrder + return nil + } + + p.pushTag("version") + + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return fmt.Errorf("unexpected end of input in version") + } + + if p.peek() != '<' { + return fmt.Errorf("expected '<' at line %d", p.line) + } + + // Handle comments + if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { + err := p.parseComment() + if err != nil { + return err + } + continue + } + + tag, err := p.parseTag() + if err != nil { + return err + } + + if tag.IsClosing { + if tag.Name != "version" { + return fmt.Errorf("expected closing tag 'version', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("version"); err != nil { + return err + } + break + } + + err = p.parseElement(tag, packetDef, &fieldOrder, "") + if err != nil { + return err + } + } + + packetDef.Orders[version] = make([]string, len(fieldOrder)) + copy(packetDef.Orders[version], fieldOrder) + return nil +} + +// parseElement parses any element (field, array, group, template, substruct reference) +func (p *Parser) parseElement(tag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { + switch tag.Name { + case "group": + return p.parseGroup(tag, packetDef, fieldOrder, prefix) + case "array": + return p.parseArray(tag, packetDef, fieldOrder, prefix) + case "template": + return p.parseTemplateUsage(tag, packetDef, fieldOrder, prefix) + case "substruct": + return p.parseSubstructReference(tag, packetDef, fieldOrder, prefix) + case "item": + return p.parseItemField(tag, packetDef, fieldOrder, prefix) + default: + // Try to parse as a field + return p.parseField(tag, packetDef, fieldOrder, prefix) + } +} + +// parseGroup parses a group element +func (p *Parser) parseGroup(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { + groupPrefix := prefix + if name := openTag.Attributes["name"]; name != "" { + if prefix == "" { + groupPrefix = name + "_" + } else { + groupPrefix = prefix + name + "_" + } + } + + if openTag.SelfClosing { + return nil + } + + p.pushTag("group") + + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return fmt.Errorf("unexpected end of input in group") + } + + if p.peek() != '<' { + return fmt.Errorf("expected '<' at line %d", p.line) + } + + tag, err := p.parseTag() + if err != nil { + return err + } + + if tag.IsClosing { + if tag.Name != "group" { + return fmt.Errorf("expected closing tag 'group', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("group"); err != nil { + return err + } + break + } + + err = p.parseElement(tag, packetDef, fieldOrder, groupPrefix) + if err != nil { + return err + } + } + + return nil +} + +// parseArray parses an array element +func (p *Parser) parseArray(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { + var arrayName string + if prefix == "" { + arrayName = openTag.Attributes["name"] + } else { + arrayName = prefix + openTag.Attributes["name"] + } + + fieldDesc := FieldDesc{ + Type: common.TypeArray, + Condition: openTag.Attributes["count"], + AddToStruct: true, // Default to true + } + + if ifCond := openTag.Attributes["if"]; ifCond != "" { + fieldDesc.Condition = combineConditions(fieldDesc.Condition, ifCond) + } + + // Parse additional attributes + if maxSize := openTag.Attributes["max_size"]; maxSize != "" { + if m, err := p.parseIntAttribute(maxSize, "max_size"); err == nil { + fieldDesc.MaxArraySize = m + } else { + return err + } + } + + if optional := openTag.Attributes["optional"]; optional == "true" { + fieldDesc.Optional = true + } + + if addToStruct := openTag.Attributes["add_to_struct"]; addToStruct == "false" { + fieldDesc.AddToStruct = false + } + + // Handle substruct reference + if substruct := openTag.Attributes["substruct"]; substruct != "" { + if subDef, exists := p.substructs[substruct]; exists { + fieldDesc.SubDef = subDef + } + } + + // Arrays with substruct references or explicit self-closing syntax are self-closing + if openTag.SelfClosing || fieldDesc.SubDef != nil { + packetDef.Fields[arrayName] = fieldDesc + *fieldOrder = append(*fieldOrder, arrayName) + return nil + } + + p.pushTag("array") + + // Handle direct child elements as substruct fields + if fieldDesc.SubDef == nil { + subDef := NewPacketDef(16) + subOrder := make([]string, 0) + + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return fmt.Errorf("unexpected end of input in array") + } + + if p.peek() != '<' { + return fmt.Errorf("expected '<' at line %d", p.line) + } + + tag, err := p.parseTag() + if err != nil { + return err + } + + if tag.IsClosing { + if tag.Name != "array" { + return fmt.Errorf("expected closing tag 'array', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("array"); err != nil { + return err + } + break + } + + err = p.parseElement(tag, subDef, &subOrder, "") + if err != nil { + return err + } + } + + // Only create substruct if we actually have fields + if len(subOrder) > 0 { + subDef.Orders[1] = make([]string, len(subOrder)) + copy(subDef.Orders[1], subOrder) + fieldDesc.SubDef = subDef + } + } + + packetDef.Fields[arrayName] = fieldDesc + *fieldOrder = append(*fieldOrder, arrayName) + return nil +} + +// combineConditions combines two conditions with AND logic - using existing function + +// parseField parses a field element +func (p *Parser) parseField(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { + dataType, exists := getDataType(openTag.Name) + if !exists { + return fmt.Errorf("unknown field type '%s' at line %d", openTag.Name, p.line) + } + + nameAttr := openTag.Attributes["name"] + if nameAttr == "" { + return fmt.Errorf("field missing name attribute at line %d", p.line) + } + + names := p.parseFieldNames(nameAttr) + for _, name := range names { + var fullName string + if prefix == "" { + fullName = name + } else { + fullName = prefix + name + } + + fieldDesc := FieldDesc{ + Type: dataType, + Condition: openTag.Attributes["if"], + AddToStruct: true, // Default to true + AddType: dataType, + } + + // Parse size attribute + if size := openTag.Attributes["size"]; size != "" { + if s, err := p.parseIntAttribute(size, "size"); err == nil { + fieldDesc.Length = s + } else { + return err + } + } + + // Parse oversized attribute + if oversized := openTag.Attributes["oversized"]; oversized != "" { + if o, err := p.parseIntAttribute(oversized, "oversized"); err == nil { + fieldDesc.Oversized = o + } else { + return err + } + } + + // Parse type2 attributes + if type2 := openTag.Attributes["type2"]; type2 != "" { + if t2, exists := getDataType(type2); exists { + fieldDesc.Type2 = t2 + fieldDesc.Type2Cond = openTag.Attributes["type2_if"] + } + } + + // Parse default value + if defaultVal := openTag.Attributes["default"]; defaultVal != "" { + if d, err := p.parseInt8Attribute(defaultVal, "default"); err == nil { + fieldDesc.DefaultValue = d + } else { + return err + } + } + + // Parse max_size + if maxSize := openTag.Attributes["max_size"]; maxSize != "" { + if m, err := p.parseIntAttribute(maxSize, "max_size"); err == nil { + fieldDesc.MaxArraySize = m + } else { + return err + } + } + + // Parse optional + if optional := openTag.Attributes["optional"]; optional == "true" { + fieldDesc.Optional = true + } + + // Parse add_to_struct + if addToStruct := openTag.Attributes["add_to_struct"]; addToStruct == "false" { + fieldDesc.AddToStruct = false + } + + // Parse add_type + if addType := openTag.Attributes["add_type"]; addType != "" { + if at, exists := getDataType(addType); exists { + fieldDesc.AddType = at + } + } + + packetDef.Fields[fullName] = fieldDesc + *fieldOrder = append(*fieldOrder, fullName) + } + + return nil +} + +// parseTemplateDefinition parses a template definition +func (p *Parser) parseTemplateDefinition(openTag *Tag, templateName string) error { + templateDef := NewPacketDef(16) + fieldOrder := make([]string, 0) + + if openTag.SelfClosing { + templateDef.Orders[1] = fieldOrder + p.templates[templateName] = templateDef + return nil + } + + p.pushTag("template") + + for { + p.skipWhitespace() + if p.pos >= len(p.input) { + return fmt.Errorf("unexpected end of input in template") + } + + if p.peek() != '<' { + return fmt.Errorf("expected '<' at line %d", p.line) + } + + tag, err := p.parseTag() + if err != nil { + return err + } + + if tag.IsClosing { + if tag.Name != "template" { + return fmt.Errorf("expected closing tag 'template', got '%s' at line %d", tag.Name, p.line) + } + if err := p.popTag("template"); err != nil { + return err + } + break + } + + err = p.parseElement(tag, templateDef, &fieldOrder, "") + if err != nil { + return err + } + } + + templateDef.Orders[1] = make([]string, len(fieldOrder)) + copy(templateDef.Orders[1], fieldOrder) + p.templates[templateName] = templateDef + return nil +} + +// parseTemplateUsage parses template usage +func (p *Parser) parseTemplateUsage(tag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { + // Template usage: