package parser import ( "eq2emu/internal/common" "fmt" "strconv" "strings" "unicode" ) // Parser is a single-pass recursive descent parser for packet definitions type Parser struct { input string pos int line int col int substructs map[string]*PacketDef templates map[string]*PacketDef tagStack []string } // NewParser creates a new string-based parser func NewParser(input string) *Parser { return &Parser{ input: input, line: 1, col: 1, substructs: make(map[string]*PacketDef), templates: make(map[string]*PacketDef), tagStack: make([]string, 0, 16), } } // peek returns the current character without advancing func (p *Parser) peek() rune { if p.pos >= len(p.input) { return 0 } return rune(p.input[p.pos]) } // peekN looks ahead n characters func (p *Parser) peekN(n int) rune { pos := p.pos + n if pos >= len(p.input) { return 0 } return rune(p.input[pos]) } // advance moves to the next character and returns it func (p *Parser) advance() rune { if p.pos >= len(p.input) { return 0 } ch := rune(p.input[p.pos]) p.pos++ if ch == '\n' { p.line++ p.col = 1 } else { p.col++ } return ch } // skipWhitespace skips whitespace characters func (p *Parser) skipWhitespace() { for p.pos < len(p.input) && unicode.IsSpace(p.peek()) { p.advance() } } // consumeChar consumes the expected character, returns true if found func (p *Parser) consumeChar(expected rune) bool { if p.peek() == expected { p.advance() return true } return false } // consumeString consumes the expected string, returns true if found func (p *Parser) consumeString(expected string) bool { if p.pos+len(expected) > len(p.input) { return false } if p.input[p.pos:p.pos+len(expected)] == expected { for range expected { p.advance() } return true } return false } // parseIdentifier parses an identifier (alphanumeric + underscore) func (p *Parser) parseIdentifier() (string, error) { start := p.pos if p.pos >= len(p.input) || (!unicode.IsLetter(p.peek()) && p.peek() != '_') { return "", fmt.Errorf("expected identifier at line %d, col %d", p.line, p.col) } for p.pos < len(p.input) { ch := p.peek() if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' || ch == '-' { p.advance() } else { break } } return p.input[start:p.pos], nil } // parseQuotedString parses a quoted string value func (p *Parser) parseQuotedString() (string, error) { quote := p.peek() if quote != '"' && quote != '\'' { return "", fmt.Errorf("expected quoted string at line %d, col %d", p.line, p.col) } p.advance() // consume opening quote start := p.pos for p.pos < len(p.input) && p.peek() != quote { if p.peek() == '\\' { p.advance() // consume backslash if p.pos < len(p.input) { p.advance() // consume escaped character } } else { p.advance() } } if p.pos >= len(p.input) { return "", fmt.Errorf("unclosed quoted string at line %d", p.line) } value := p.input[start:p.pos] p.advance() // consume closing quote return value, nil } // parseAttributes parses tag attributes func (p *Parser) parseAttributes() (map[string]string, error) { attrs := make(map[string]string) for { p.skipWhitespace() // Check for end of tag if p.pos >= len(p.input) || p.peek() == '>' || (p.peek() == '/' && p.peekN(1) == '>') { break } // Parse attribute name name, err := p.parseIdentifier() if err != nil { return nil, err } p.skipWhitespace() if !p.consumeChar('=') { return nil, fmt.Errorf("expected '=' after attribute name '%s' at line %d", name, p.line) } p.skipWhitespace() // Parse attribute value value, err := p.parseQuotedString() if err != nil { return nil, err } attrs[name] = strings.TrimSpace(value) } return attrs, nil } // parseComment skips over comment content func (p *Parser) parseComment() error { // We're at the start of for p.pos+2 < len(p.input) { if p.consumeString("-->") { return nil } p.advance() } return fmt.Errorf("unclosed comment at line %d", p.line) } // Tag represents a parsed XML tag type Tag struct { Name string Attributes map[string]string SelfClosing bool IsClosing bool } // parseTag parses an opening, closing, or self-closing tag func (p *Parser) parseTag() (*Tag, error) { if !p.consumeChar('<') { return nil, fmt.Errorf("expected '<' at line %d", p.line) } // Check for comment if p.peek() == '!' && p.peekN(1) == '-' && p.peekN(2) == '-' { // We already consumed the '<', so parse comment from here if !p.consumeString("!--") { return nil, fmt.Errorf("expected ' for p.pos+2 < len(p.input) { if p.consumeString("-->") { return p.parseTag() // recursively get the next tag } p.advance() } return nil, fmt.Errorf("unclosed comment at line %d", p.line) } tag := &Tag{} // Check for closing tag if p.consumeChar('/') { tag.IsClosing = true name, err := p.parseIdentifier() if err != nil { return nil, err } tag.Name = name p.skipWhitespace() if !p.consumeChar('>') { return nil, fmt.Errorf("expected '>' after closing tag '%s' at line %d", name, p.line) } return tag, nil } // Parse tag name name, err := p.parseIdentifier() if err != nil { return nil, err } tag.Name = name // Parse attributes attrs, err := p.parseAttributes() if err != nil { return nil, err } tag.Attributes = attrs p.skipWhitespace() // Check for self-closing if p.consumeString("/>") { tag.SelfClosing = true } else if p.consumeChar('>') { // Regular opening tag } else { return nil, fmt.Errorf("expected '>' or '/>' after tag '%s' at line %d", name, p.line) } return tag, nil } // pushTag pushes a tag name onto the stack for validation func (p *Parser) pushTag(tag string) { p.tagStack = append(p.tagStack, tag) } // popTag pops and validates the closing tag func (p *Parser) popTag(expectedTag string) error { if len(p.tagStack) == 0 { return fmt.Errorf("unexpected closing tag '%s' at line %d", expectedTag, p.line) } lastTag := p.tagStack[len(p.tagStack)-1] if lastTag != expectedTag { return fmt.Errorf("mismatched closing tag: expected '%s', got '%s' at line %d", lastTag, expectedTag, p.line) } p.tagStack = p.tagStack[:len(p.tagStack)-1] return nil } // validateAllTagsClosed checks for unclosed tags func (p *Parser) validateAllTagsClosed() error { if len(p.tagStack) > 0 { return fmt.Errorf("unclosed tag '%s'", p.tagStack[len(p.tagStack)-1]) } return nil } // Helper functions for robust attribute parsing func (p *Parser) parseIntAttribute(value string, attributeName string) (int, error) { if value == "" { return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) } cleanValue := strings.TrimSpace(value) if cleanValue == "" { return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) } result, err := strconv.Atoi(cleanValue) if err != nil { return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) } return result, nil } func (p *Parser) parseUintAttribute(value string, attributeName string) (uint32, error) { if value == "" { return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) } cleanValue := strings.TrimSpace(value) if cleanValue == "" { return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) } result, err := strconv.ParseUint(cleanValue, 10, 32) if err != nil { return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) } return uint32(result), nil } func (p *Parser) parseInt8Attribute(value string, attributeName string) (int8, error) { if value == "" { return 0, fmt.Errorf("empty %s attribute at line %d", attributeName, p.line) } cleanValue := strings.TrimSpace(value) if cleanValue == "" { return 0, fmt.Errorf("empty %s attribute value '%s' at line %d", attributeName, value, p.line) } result, err := strconv.ParseInt(cleanValue, 10, 8) if err != nil { return 0, fmt.Errorf("invalid %s value '%s' at line %d: %v", attributeName, value, p.line, err) } return int8(result), nil } // parseFieldNames parses comma-separated field names func (p *Parser) parseFieldNames(nameAttr string) []string { if nameAttr == "" { return nil } // Fast path for single name if !strings.Contains(nameAttr, ",") { name := strings.TrimSpace(nameAttr) if name != "" { return []string{name} } return nil } // Parse multiple names names := strings.Split(nameAttr, ",") result := make([]string, 0, len(names)) for _, name := range names { if trimmed := strings.TrimSpace(name); trimmed != "" { result = append(result, trimmed) } } return result } // Parse parses the entire packet definition document func (p *Parser) Parse() (map[string]*PacketDef, error) { packets := make(map[string]*PacketDef) for p.pos < len(p.input) { p.skipWhitespace() if p.pos >= len(p.input) { break } if p.peek() != '<' { return nil, fmt.Errorf("expected '<' at line %d", p.line) } // Handle comments at the top level if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { err := p.parseComment() if err != nil { return nil, err } continue } tag, err := p.parseTag() if err != nil { return nil, err } switch tag.Name { case "packet": name := tag.Attributes["name"] packet, err := p.parsePacket(tag) if err != nil { return nil, err } if name != "" { packets[name] = packet } case "substruct": name := tag.Attributes["name"] substruct, err := p.parseSubstruct(tag) if err != nil { return nil, err } if name != "" { p.substructs[name] = substruct } case "template": name := tag.Attributes["name"] if name != "" { err := p.parseTemplateDefinition(tag, name) if err != nil { return nil, err } } default: return nil, fmt.Errorf("unexpected top-level tag '%s' at line %d", tag.Name, p.line) } } if err := p.validateAllTagsClosed(); err != nil { return nil, err } return packets, nil } // parsePacket parses a packet element func (p *Parser) parsePacket(openTag *Tag) (*PacketDef, error) { packetDef := NewPacketDef(16) if openTag.SelfClosing { return packetDef, nil } p.pushTag("packet") for { p.skipWhitespace() if p.pos >= len(p.input) { return nil, fmt.Errorf("unexpected end of input in packet") } if p.peek() != '<' { return nil, fmt.Errorf("expected '<' at line %d", p.line) } // Handle comments if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { err := p.parseComment() if err != nil { return nil, err } continue } tag, err := p.parseTag() if err != nil { return nil, err } if tag.IsClosing { if tag.Name != "packet" { return nil, fmt.Errorf("expected closing tag 'packet', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("packet"); err != nil { return nil, err } break } if tag.Name == "version" { err := p.parseVersion(tag, packetDef) if err != nil { return nil, err } } else { return nil, fmt.Errorf("unexpected tag '%s' in packet at line %d", tag.Name, p.line) } } return packetDef, nil } // parseSubstruct parses a substruct element func (p *Parser) parseSubstruct(openTag *Tag) (*PacketDef, error) { packetDef := NewPacketDef(16) if openTag.SelfClosing { return packetDef, nil } p.pushTag("substruct") // Check if this substruct contains version elements hasVersions := false for { p.skipWhitespace() if p.pos >= len(p.input) { return nil, fmt.Errorf("unexpected end of input in substruct") } if p.peek() != '<' { return nil, fmt.Errorf("expected '<' at line %d", p.line) } tag, err := p.parseTag() if err != nil { return nil, err } if tag.IsClosing { if tag.Name != "substruct" { return nil, fmt.Errorf("expected closing tag 'substruct', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("substruct"); err != nil { return nil, err } break } if tag.Name == "version" { hasVersions = true err := p.parseVersion(tag, packetDef) if err != nil { return nil, err } } else if hasVersions { return nil, fmt.Errorf("unexpected tag '%s' after version in substruct at line %d", tag.Name, p.line) } else { // No versions found, parse as direct elements fieldOrder := make([]string, 0) err := p.parseElement(tag, packetDef, &fieldOrder, "") if err != nil { return nil, err } // Continue parsing remaining elements for { p.skipWhitespace() if p.pos >= len(p.input) || p.peek() != '<' { break } nextTag, err := p.parseTag() if err != nil { return nil, err } if nextTag.IsClosing { if nextTag.Name != "substruct" { return nil, fmt.Errorf("expected closing tag 'substruct', got '%s' at line %d", nextTag.Name, p.line) } if err := p.popTag("substruct"); err != nil { return nil, err } break } err = p.parseElement(nextTag, packetDef, &fieldOrder, "") if err != nil { return nil, err } } // Set field order for version 1 packetDef.Orders[1] = make([]string, len(fieldOrder)) copy(packetDef.Orders[1], fieldOrder) break } } return packetDef, nil } // parseVersion parses a version element func (p *Parser) parseVersion(openTag *Tag, packetDef *PacketDef) error { version := uint32(1) if v := openTag.Attributes["number"]; v != "" { if parsed, err := p.parseUintAttribute(v, "number"); err == nil { version = parsed } // Don't fail on invalid version numbers, just use default } fieldOrder := make([]string, 0) if openTag.SelfClosing { packetDef.Orders[version] = fieldOrder return nil } p.pushTag("version") for { p.skipWhitespace() if p.pos >= len(p.input) { return fmt.Errorf("unexpected end of input in version") } if p.peek() != '<' { return fmt.Errorf("expected '<' at line %d", p.line) } // Handle comments if p.peek() == '<' && p.peekN(1) == '!' && p.peekN(2) == '-' && p.peekN(3) == '-' { err := p.parseComment() if err != nil { return err } continue } tag, err := p.parseTag() if err != nil { return err } if tag.IsClosing { if tag.Name != "version" { return fmt.Errorf("expected closing tag 'version', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("version"); err != nil { return err } break } err = p.parseElement(tag, packetDef, &fieldOrder, "") if err != nil { return err } } packetDef.Orders[version] = make([]string, len(fieldOrder)) copy(packetDef.Orders[version], fieldOrder) return nil } // parseElement parses any element (field, array, group, template, substruct reference) func (p *Parser) parseElement(tag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { switch tag.Name { case "group": return p.parseGroup(tag, packetDef, fieldOrder, prefix) case "array": return p.parseArray(tag, packetDef, fieldOrder, prefix) case "template": return p.parseTemplateUsage(tag, packetDef, fieldOrder, prefix) case "substruct": return p.parseSubstructReference(tag, packetDef, fieldOrder, prefix) case "item": return p.parseItemField(tag, packetDef, fieldOrder, prefix) default: // Try to parse as a field return p.parseField(tag, packetDef, fieldOrder, prefix) } } // parseGroup parses a group element func (p *Parser) parseGroup(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { groupPrefix := prefix if name := openTag.Attributes["name"]; name != "" { if prefix == "" { groupPrefix = name + "_" } else { groupPrefix = prefix + name + "_" } } if openTag.SelfClosing { return nil } p.pushTag("group") for { p.skipWhitespace() if p.pos >= len(p.input) { return fmt.Errorf("unexpected end of input in group") } if p.peek() != '<' { return fmt.Errorf("expected '<' at line %d", p.line) } tag, err := p.parseTag() if err != nil { return err } if tag.IsClosing { if tag.Name != "group" { return fmt.Errorf("expected closing tag 'group', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("group"); err != nil { return err } break } err = p.parseElement(tag, packetDef, fieldOrder, groupPrefix) if err != nil { return err } } return nil } // parseArray parses an array element func (p *Parser) parseArray(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { var arrayName string if prefix == "" { arrayName = openTag.Attributes["name"] } else { arrayName = prefix + openTag.Attributes["name"] } fieldDesc := FieldDesc{ Type: common.TypeArray, Condition: openTag.Attributes["count"], AddToStruct: true, // Default to true } if ifCond := openTag.Attributes["if"]; ifCond != "" { fieldDesc.Condition = combineConditions(fieldDesc.Condition, ifCond) } // Parse additional attributes if maxSize := openTag.Attributes["max_size"]; maxSize != "" { if m, err := p.parseIntAttribute(maxSize, "max_size"); err == nil { fieldDesc.MaxArraySize = m } else { return err } } if optional := openTag.Attributes["optional"]; optional == "true" { fieldDesc.Optional = true } if addToStruct := openTag.Attributes["add_to_struct"]; addToStruct == "false" { fieldDesc.AddToStruct = false } // Handle substruct reference if substruct := openTag.Attributes["substruct"]; substruct != "" { if subDef, exists := p.substructs[substruct]; exists { fieldDesc.SubDef = subDef } } // Arrays with substruct references or explicit self-closing syntax are self-closing if openTag.SelfClosing || fieldDesc.SubDef != nil { packetDef.Fields[arrayName] = fieldDesc *fieldOrder = append(*fieldOrder, arrayName) return nil } p.pushTag("array") // Handle direct child elements as substruct fields if fieldDesc.SubDef == nil { subDef := NewPacketDef(16) subOrder := make([]string, 0) for { p.skipWhitespace() if p.pos >= len(p.input) { return fmt.Errorf("unexpected end of input in array") } if p.peek() != '<' { return fmt.Errorf("expected '<' at line %d", p.line) } tag, err := p.parseTag() if err != nil { return err } if tag.IsClosing { if tag.Name != "array" { return fmt.Errorf("expected closing tag 'array', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("array"); err != nil { return err } break } err = p.parseElement(tag, subDef, &subOrder, "") if err != nil { return err } } // Only create substruct if we actually have fields if len(subOrder) > 0 { subDef.Orders[1] = make([]string, len(subOrder)) copy(subDef.Orders[1], subOrder) fieldDesc.SubDef = subDef } } packetDef.Fields[arrayName] = fieldDesc *fieldOrder = append(*fieldOrder, arrayName) return nil } // combineConditions combines two conditions with AND logic - using existing function // parseField parses a field element func (p *Parser) parseField(openTag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { dataType, exists := getDataType(openTag.Name) if !exists { return fmt.Errorf("unknown field type '%s' at line %d", openTag.Name, p.line) } nameAttr := openTag.Attributes["name"] if nameAttr == "" { return fmt.Errorf("field missing name attribute at line %d", p.line) } names := p.parseFieldNames(nameAttr) for _, name := range names { var fullName string if prefix == "" { fullName = name } else { fullName = prefix + name } fieldDesc := FieldDesc{ Type: dataType, Condition: openTag.Attributes["if"], AddToStruct: true, // Default to true AddType: dataType, } // Parse size attribute if size := openTag.Attributes["size"]; size != "" { if s, err := p.parseIntAttribute(size, "size"); err == nil { fieldDesc.Length = s } else { return err } } // Parse oversized attribute if oversized := openTag.Attributes["oversized"]; oversized != "" { if o, err := p.parseIntAttribute(oversized, "oversized"); err == nil { fieldDesc.Oversized = o } else { return err } } // Parse type2 attributes if type2 := openTag.Attributes["type2"]; type2 != "" { if t2, exists := getDataType(type2); exists { fieldDesc.Type2 = t2 fieldDesc.Type2Cond = openTag.Attributes["type2_if"] } } // Parse default value if defaultVal := openTag.Attributes["default"]; defaultVal != "" { if d, err := p.parseInt8Attribute(defaultVal, "default"); err == nil { fieldDesc.DefaultValue = d } else { return err } } // Parse max_size if maxSize := openTag.Attributes["max_size"]; maxSize != "" { if m, err := p.parseIntAttribute(maxSize, "max_size"); err == nil { fieldDesc.MaxArraySize = m } else { return err } } // Parse optional if optional := openTag.Attributes["optional"]; optional == "true" { fieldDesc.Optional = true } // Parse add_to_struct if addToStruct := openTag.Attributes["add_to_struct"]; addToStruct == "false" { fieldDesc.AddToStruct = false } // Parse add_type if addType := openTag.Attributes["add_type"]; addType != "" { if at, exists := getDataType(addType); exists { fieldDesc.AddType = at } } packetDef.Fields[fullName] = fieldDesc *fieldOrder = append(*fieldOrder, fullName) } return nil } // parseTemplateDefinition parses a template definition func (p *Parser) parseTemplateDefinition(openTag *Tag, templateName string) error { templateDef := NewPacketDef(16) fieldOrder := make([]string, 0) if openTag.SelfClosing { templateDef.Orders[1] = fieldOrder p.templates[templateName] = templateDef return nil } p.pushTag("template") for { p.skipWhitespace() if p.pos >= len(p.input) { return fmt.Errorf("unexpected end of input in template") } if p.peek() != '<' { return fmt.Errorf("expected '<' at line %d", p.line) } tag, err := p.parseTag() if err != nil { return err } if tag.IsClosing { if tag.Name != "template" { return fmt.Errorf("expected closing tag 'template', got '%s' at line %d", tag.Name, p.line) } if err := p.popTag("template"); err != nil { return err } break } err = p.parseElement(tag, templateDef, &fieldOrder, "") if err != nil { return err } } templateDef.Orders[1] = make([]string, len(fieldOrder)) copy(templateDef.Orders[1], fieldOrder) p.templates[templateName] = templateDef return nil } // parseTemplateUsage parses template usage func (p *Parser) parseTemplateUsage(tag *Tag, packetDef *PacketDef, fieldOrder *[]string, prefix string) error { // Template usage: