eq2go/old/common/separator.hpp
2025-08-06 19:00:30 -05:00

329 lines
9.0 KiB
C++

// Copyright (C) 2007 EQ2EMulator Development Team - GPL v3 License
#pragma once
#include <string>
#include <vector>
#include <cstring>
#include <cctype>
/**
* String separator class that intelligently splits strings at delimiter characters.
* Supports multiple delimiters, quote handling, and empty argument skipping.
* Written by Quagmire, modernized for C++20.
*/
class Separator
{
public:
/**
* Constructs a separator that splits the input message at specified delimiters.
* @param message The string to split
* @param div Primary delimiter character (default: space)
* @param maxArgNum Maximum number of arguments to parse (default: 10)
* @param argLen Maximum length per argument (default: 100)
* @param obeyQuotes Whether to respect quoted strings (default: false)
* @param div2 Secondary delimiter character (default: tab)
* @param div3 Tertiary delimiter character (default: none)
* @param skipEmpty Whether to skip empty arguments (default: true)
*/
Separator(const char* message, char div = ' ', int16_t maxArgNum = 10,
int16_t argLen = 100, bool obeyQuotes = false, char div2 = '\t',
char div3 = 0, bool skipEmpty = true)
: maxArgNum_(maxArgNum), argNum_(0), originalMessage_(message ? message : "")
{
parseMessage(message, div, argLen, obeyQuotes, div2, div3, skipEmpty);
}
/**
* Default destructor - automatic cleanup with STL containers.
*/
~Separator() = default;
/**
* Copy constructor for proper resource management.
*/
Separator(const Separator& other) = default;
/**
* Move constructor for efficient transfers.
*/
Separator(Separator&& other) noexcept = default;
/**
* Copy assignment operator.
*/
Separator& operator=(const Separator& other) = default;
/**
* Move assignment operator.
*/
Separator& operator=(Separator&& other) noexcept = default;
/**
* Checks if the argument at the specified index is set (non-empty).
* @param num Index of the argument to check
* @return True if argument exists and is non-empty
*/
bool IsSet(int num) const
{
return num >= 0 && num < static_cast<int>(arguments_.size()) &&
IsSet(arguments_[num].c_str());
}
/**
* Checks if the argument at the specified index is a valid number.
* @param num Index of the argument to check
* @return True if argument is a valid numeric value
*/
bool IsNumber(int num) const
{
return num >= 0 && num < static_cast<int>(arguments_.size()) &&
IsNumber(arguments_[num].c_str());
}
/**
* Checks if the argument at the specified index is a valid hexadecimal number.
* @param num Index of the argument to check
* @return True if argument is a valid hex number (0x... format)
*/
bool IsHexNumber(int num) const
{
return num >= 0 && num < static_cast<int>(arguments_.size()) &&
IsHexNumber(arguments_[num].c_str());
}
/**
* Static utility to check if a string is set (non-empty).
* @param check The string to check
* @return True if string is non-null and non-empty
*/
static bool IsSet(const char* check)
{
return check && check[0] != '\0';
}
/**
* Static utility to validate if a string represents a number.
* Supports integers, floats, and signed values.
* @param check The string to validate
* @return True if string represents a valid number
*/
static bool IsNumber(const char* check)
{
if (!check || !check[0])
return false;
bool seenDecimal = false;
int len = std::strlen(check);
for (int i = 0; i < len; i++) {
char c = check[i];
if (c < '0' || c > '9') {
if (c == '.' && !seenDecimal) {
seenDecimal = true;
}
else if (i == 0 && (c == '-' || c == '+') && len > 1) {
// Valid sign prefix
}
else {
return false;
}
}
}
return true;
}
/**
* Static utility to validate hexadecimal number format.
* Must start with 0x or 0X followed by valid hex digits.
* @param check The string to validate
* @return True if string is valid hexadecimal format
*/
static bool IsHexNumber(const char* check)
{
if (!check)
return false;
int len = std::strlen(check);
if (len < 3)
return false;
if (check[0] != '0' || (check[1] != 'x' && check[1] != 'X'))
return false;
for (int i = 2; i < len; i++) {
char c = check[i];
if (!std::isxdigit(c))
return false;
}
return true;
}
/**
* Gets the maximum number of arguments this separator can handle.
* @return Maximum argument count
*/
int16_t GetMaxArgNum() const { return maxArgNum_; }
/**
* Gets the actual number of arguments parsed from the input.
* @return Current argument count
*/
int16_t GetArgNumber() const { return argNum_; }
/**
* Gets the argument at the specified index as a string.
* @param index Index of the argument to retrieve
* @return Argument string, or empty string if index is invalid
*/
std::string GetArg(int index) const
{
if (index >= 0 && index < static_cast<int>(arguments_.size()))
return arguments_[index];
return "";
}
/**
* Gets a pointer to the original position in the message for the argument.
* This points to the original string without copying.
* @param index Index of the argument
* @return Pointer to argument position in original string
*/
const char* GetArgPlus(int index) const
{
if (index >= 0 && index < static_cast<int>(argumentPointers_.size()))
return argumentPointers_[index];
return "";
}
/**
* Gets the original message that was parsed.
* @return Copy of the original input message
*/
const std::string& GetOriginalMessage() const { return originalMessage_; }
private:
/**
* Core parsing logic that splits the message according to specified rules.
* Handles quote recognition, multiple delimiters, and empty argument skipping.
*/
void parseMessage(const char* message, char div, int16_t argLen,
bool obeyQuotes, char div2, char div3, bool skipEmpty)
{
if (!message || !message[0])
return;
int len = std::strlen(message);
int start = 0;
bool inArg = (!skipEmpty || !isDelimiter(message[0], div, div2, div3));
bool inQuote = (obeyQuotes && (message[0] == '\"' || message[0] == '\''));
// Reserve space for efficiency
arguments_.reserve(maxArgNum_ + 1);
argumentPointers_.reserve(maxArgNum_ + 1);
if (inArg)
argumentPointers_.push_back(&message[0]);
for (int i = 0; i < len && argNum_ <= maxArgNum_; i++) {
char currentChar = message[i];
if (inArg) {
bool shouldEndArg = false;
if (!inQuote && isDelimiter(currentChar, div, div2, div3)) {
shouldEndArg = true;
}
else if (inQuote && (currentChar == '\'' || currentChar == '\"')) {
bool nextIsDelimOrEnd = (i + 1 >= len ||
isDelimiter(message[i + 1], div, div2, div3));
if (nextIsDelimOrEnd) {
inQuote = false;
shouldEndArg = true;
}
}
if (shouldEndArg) {
// Extract argument text
int argStart = argumentPointers_[argNum_] - message;
int argLength = i - argStart;
if (argLength >= argLen)
argLength = argLen - 1;
std::string arg;
if (argLength > 0) {
// Handle quoted strings by removing quotes
if (argLength > 1 &&
(argumentPointers_[argNum_][0] == '\'' ||
argumentPointers_[argNum_][0] == '\"')) {
arg = std::string(argumentPointers_[argNum_] + 1, argLength - 1);
}
else {
arg = std::string(argumentPointers_[argNum_], argLength);
}
}
arguments_.push_back(arg);
argNum_++;
if (skipEmpty) {
inArg = false;
}
else {
start = i + 1;
if (start < len && argNum_ <= maxArgNum_)
argumentPointers_.push_back(&message[start]);
}
}
}
else {
if (obeyQuotes && (currentChar == '\"' || currentChar == '\'')) {
inQuote = true;
start = i;
argumentPointers_.push_back(&message[start]);
inArg = true;
}
else if (!isDelimiter(currentChar, div, div2, div3)) {
start = i;
argumentPointers_.push_back(&message[start]);
inArg = true;
}
}
}
// Handle final argument if we ended while parsing one
if (inArg && argNum_ <= maxArgNum_ && !argumentPointers_.empty()) {
int argStart = argumentPointers_[argNum_] - message;
int argLength = len - argStart;
if (argLength >= argLen)
argLength = argLen - 1;
if (argLength > 0) {
std::string arg(argumentPointers_[argNum_], argLength);
arguments_.push_back(arg);
}
}
}
/**
* Helper function to check if a character is one of the specified delimiters.
* @param c Character to check
* @param div Primary delimiter
* @param div2 Secondary delimiter
* @param div3 Tertiary delimiter
* @return True if character matches any delimiter
*/
static bool isDelimiter(char c, char div, char div2, char div3)
{
return c == div || c == div2 || (div3 != 0 && c == div3);
}
int16_t maxArgNum_; // Maximum number of arguments to parse
int16_t argNum_; // Actual number of arguments parsed
std::string originalMessage_; // Copy of original input message
std::vector<std::string> arguments_; // Parsed arguments as strings
std::vector<const char*> argumentPointers_; // Pointers to original message positions
};