Moonshark/modules/string/string.lua

769 lines
20 KiB
Lua

-- modules/string.lua - Enhanced string manipulation utilities
local str = {}
-- Helper function to handle errors from Go functions
local function safe_call(func, ...)
local success, result = pcall(func, ...)
if not success then
error(result, 2)
end
return result
end
-- Helper to validate arguments
local function validate_string(s, func_name, arg_num)
if type(s) ~= "string" then
error(string.format("%s: argument %d must be a string, got %s",
func_name, arg_num or 1, type(s)), 3)
end
end
local function validate_number(n, func_name, arg_num)
if type(n) ~= "number" then
error(string.format("%s: argument %d must be a number, got %s",
func_name, arg_num or 1, type(n)), 3)
end
end
local function validate_table(t, func_name, arg_num)
if type(t) ~= "table" then
error(string.format("%s: argument %d must be a table, got %s",
func_name, arg_num or 1, type(t)), 3)
end
end
-- ======================================================================
-- BASIC STRING OPERATIONS
-- ======================================================================
function str.split(s, delimiter)
validate_string(s, "str.split", 1)
validate_string(delimiter, "str.split", 2)
return safe_call(moonshark.string_split, s, delimiter)
end
function str.join(arr, separator)
validate_table(arr, "str.join", 1)
validate_string(separator, "str.join", 2)
return safe_call(moonshark.string_join, arr, separator)
end
function str.trim(s)
validate_string(s, "str.trim")
return safe_call(moonshark.string_trim, s)
end
function str.trim_left(s, cutset)
validate_string(s, "str.trim_left", 1)
if cutset ~= nil then
validate_string(cutset, "str.trim_left", 2)
end
return safe_call(moonshark.string_trim_left, s, cutset)
end
function str.trim_right(s, cutset)
validate_string(s, "str.trim_right", 1)
if cutset ~= nil then
validate_string(cutset, "str.trim_right", 2)
end
return safe_call(moonshark.string_trim_right, s, cutset)
end
function str.upper(s)
validate_string(s, "str.upper")
return safe_call(moonshark.string_upper, s)
end
function str.lower(s)
validate_string(s, "str.lower")
return safe_call(moonshark.string_lower, s)
end
function str.title(s)
validate_string(s, "str.title")
return safe_call(moonshark.string_title, s)
end
function str.contains(s, substr)
validate_string(s, "str.contains", 1)
validate_string(substr, "str.contains", 2)
return safe_call(moonshark.string_contains, s, substr)
end
function str.starts_with(s, prefix)
validate_string(s, "str.starts_with", 1)
validate_string(prefix, "str.starts_with", 2)
return safe_call(moonshark.string_starts_with, s, prefix)
end
function str.ends_with(s, suffix)
validate_string(s, "str.ends_with", 1)
validate_string(suffix, "str.ends_with", 2)
return safe_call(moonshark.string_ends_with, s, suffix)
end
function str.replace(s, old, new)
validate_string(s, "str.replace", 1)
validate_string(old, "str.replace", 2)
validate_string(new, "str.replace", 3)
return safe_call(moonshark.string_replace, s, old, new)
end
function str.replace_n(s, old, new, n)
validate_string(s, "str.replace_n", 1)
validate_string(old, "str.replace_n", 2)
validate_string(new, "str.replace_n", 3)
validate_number(n, "str.replace_n", 4)
if n < 0 or n ~= math.floor(n) then
error("str.replace_n: count must be a non-negative integer", 2)
end
return safe_call(moonshark.string_replace_n, s, old, new, n)
end
function str.index(s, substr)
validate_string(s, "str.index", 1)
validate_string(substr, "str.index", 2)
local idx = safe_call(moonshark.string_index, s, substr)
return idx > 0 and idx or nil
end
function str.last_index(s, substr)
validate_string(s, "str.last_index", 1)
validate_string(substr, "str.last_index", 2)
local idx = safe_call(moonshark.string_last_index, s, substr)
return idx > 0 and idx or nil
end
function str.count(s, substr)
validate_string(s, "str.count", 1)
validate_string(substr, "str.count", 2)
return safe_call(moonshark.string_count, s, substr)
end
function str.repeat_(s, n)
validate_string(s, "str.repeat_", 1)
validate_number(n, "str.repeat_", 2)
if n < 0 or n ~= math.floor(n) then
error("str.repeat_: count must be a non-negative integer", 2)
end
return safe_call(moonshark.string_repeat, s, n)
end
function str.reverse(s)
validate_string(s, "str.reverse")
return safe_call(moonshark.string_reverse, s)
end
function str.length(s)
validate_string(s, "str.length")
return safe_call(moonshark.string_length, s)
end
function str.byte_length(s)
validate_string(s, "str.byte_length")
return safe_call(moonshark.string_byte_length, s)
end
function str.lines(s)
validate_string(s, "str.lines")
return safe_call(moonshark.string_lines, s)
end
function str.words(s)
validate_string(s, "str.words")
return safe_call(moonshark.string_words, s)
end
function str.pad_left(s, width, pad_char)
validate_string(s, "str.pad_left", 1)
validate_number(width, "str.pad_left", 2)
if width < 0 or width ~= math.floor(width) then
error("str.pad_left: width must be a non-negative integer", 2)
end
if pad_char ~= nil then
validate_string(pad_char, "str.pad_left", 3)
end
return safe_call(moonshark.string_pad_left, s, width, pad_char)
end
function str.pad_right(s, width, pad_char)
validate_string(s, "str.pad_right", 1)
validate_number(width, "str.pad_right", 2)
if width < 0 or width ~= math.floor(width) then
error("str.pad_right: width must be a non-negative integer", 2)
end
if pad_char ~= nil then
validate_string(pad_char, "str.pad_right", 3)
end
return safe_call(moonshark.string_pad_right, s, width, pad_char)
end
function str.slice(s, start, end_pos)
validate_string(s, "str.slice", 1)
validate_number(start, "str.slice", 2)
if start ~= math.floor(start) then
error("str.slice: start must be an integer", 2)
end
if end_pos ~= nil then
validate_number(end_pos, "str.slice", 3)
if end_pos ~= math.floor(end_pos) then
error("str.slice: end position must be an integer", 2)
end
end
return safe_call(moonshark.string_slice, s, start, end_pos)
end
-- ======================================================================
-- REGULAR EXPRESSIONS
-- ======================================================================
function str.match(pattern, s)
validate_string(pattern, "str.match", 1)
validate_string(s, "str.match", 2)
return safe_call(moonshark.regex_match, pattern, s)
end
function str.find(pattern, s)
validate_string(pattern, "str.find", 1)
validate_string(s, "str.find", 2)
return safe_call(moonshark.regex_find, pattern, s)
end
function str.find_all(pattern, s)
validate_string(pattern, "str.find_all", 1)
validate_string(s, "str.find_all", 2)
return safe_call(moonshark.regex_find_all, pattern, s)
end
function str.gsub(pattern, s, replacement)
validate_string(pattern, "str.gsub", 1)
validate_string(s, "str.gsub", 2)
validate_string(replacement, "str.gsub", 3)
return safe_call(moonshark.regex_replace, pattern, s, replacement)
end
-- ======================================================================
-- TYPE CONVERSION & VALIDATION
-- ======================================================================
function str.to_number(s)
validate_string(s, "str.to_number")
return safe_call(moonshark.string_to_number, s)
end
function str.is_numeric(s)
validate_string(s, "str.is_numeric")
return safe_call(moonshark.string_is_numeric, s)
end
function str.is_alpha(s)
validate_string(s, "str.is_alpha")
return safe_call(moonshark.string_is_alpha, s)
end
function str.is_alphanumeric(s)
validate_string(s, "str.is_alphanumeric")
return safe_call(moonshark.string_is_alphanumeric, s)
end
function str.is_empty(s)
return s == nil or s == ""
end
function str.is_blank(s)
return str.is_empty(s) or str.trim(s) == ""
end
function str.is_utf8(s)
validate_string(s, "str.is_utf8")
return safe_call(moonshark.string_is_valid_utf8, s)
end
function str.is_valid_utf8(s)
validate_string(s, "str.is_valid_utf8")
return safe_call(moonshark.string_is_valid_utf8, s)
end
-- ======================================================================
-- ADVANCED STRING OPERATIONS
-- ======================================================================
-- Capitalize first letter of each word (Pure Lua - faster)
function str.capitalize(s)
validate_string(s, "str.capitalize")
return s:gsub("(%a)([%w_']*)", function(first, rest)
return first:upper() .. rest:lower()
end)
end
-- Convert string to camelCase (Pure Lua - faster)
function str.camel_case(s)
validate_string(s, "str.camel_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
if #words == 0 then return s end
local result = words[1]
for i = 2, #words do
result = result .. words[i]:gsub("^%l", string.upper)
end
return result
end
-- Convert string to PascalCase (Pure Lua - faster)
function str.pascal_case(s)
validate_string(s, "str.pascal_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
local result = ""
for _, word in ipairs(words) do
result = result .. word:gsub("^%l", string.upper)
end
return result
end
-- Convert string to snake_case (Pure Lua - faster)
function str.snake_case(s)
validate_string(s, "str.snake_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
return table.concat(words, "_")
end
-- Convert string to kebab-case (Pure Lua - faster)
function str.kebab_case(s)
validate_string(s, "str.kebab_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
return table.concat(words, "-")
end
-- Convert string to SCREAMING_SNAKE_CASE (Pure Lua - faster)
function str.screaming_snake_case(s)
validate_string(s, "str.screaming_snake_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:upper())
end
return table.concat(words, "_")
end
-- Center text within given width (Pure Lua - faster)
function str.center(s, width, fill_char)
validate_string(s, "str.center", 1)
validate_number(width, "str.center", 2)
if width < 0 or width ~= math.floor(width) then
error("str.center: width must be a non-negative integer", 2)
end
fill_char = fill_char or " "
if fill_char ~= nil then
validate_string(fill_char, "str.center", 3)
if #fill_char == 0 then
error("str.center: fill character cannot be empty", 2)
end
fill_char = fill_char:sub(1,1) -- Use only first character
end
local len = #s
if len >= width then return s end
local pad_total = width - len
local pad_left = math.floor(pad_total / 2)
local pad_right = pad_total - pad_left
return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right)
end
-- Truncate string to maximum length (Pure Lua - faster)
function str.truncate(s, max_length, suffix)
validate_string(s, "str.truncate", 1)
validate_number(max_length, "str.truncate", 2)
if max_length < 0 or max_length ~= math.floor(max_length) then
error("str.truncate: max_length must be a non-negative integer", 2)
end
suffix = suffix or "..."
validate_string(suffix, "str.truncate", 3)
if #s <= max_length then
return s
end
local suffix_len = #suffix
if max_length <= suffix_len then
return suffix:sub(1, max_length)
end
local main_part = s:sub(1, max_length - suffix_len)
main_part = main_part:gsub("%s+$", "") -- trim right
return main_part .. suffix
end
-- Wrap text to specified width (Pure Lua - much faster)
function str.wrap(s, width)
validate_string(s, "str.wrap", 1)
validate_number(width, "str.wrap", 2)
if width <= 0 or width ~= math.floor(width) then
error("str.wrap: width must be a positive integer", 2)
end
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word)
end
local lines = {}
local current_line = ""
for _, word in ipairs(words) do
local word_len = #word
local current_len = #current_line
-- Handle words longer than width
if word_len > width then
if current_line ~= "" then
table.insert(lines, current_line)
current_line = ""
end
-- Break long word
while #word > width do
table.insert(lines, word:sub(1, width))
word = word:sub(width + 1)
end
if #word > 0 then
current_line = word
end
elseif current_len + word_len + 1 <= width then
if current_line == "" then
current_line = word
else
current_line = current_line .. " " .. word
end
else
if current_line ~= "" then
table.insert(lines, current_line)
end
current_line = word
end
end
if current_line ~= "" then
table.insert(lines, current_line)
end
return lines
end
-- Remove common leading whitespace (Pure Lua - faster)
function str.dedent(s)
validate_string(s, "str.dedent")
local lines = {}
for line in (s.."\n"):gmatch("([^\n]*)\n") do
table.insert(lines, line)
end
if #lines <= 1 then return s end
-- Find minimum indentation (excluding empty lines)
local min_indent = math.huge
for _, line in ipairs(lines) do
local trimmed = line:gsub("%s", "")
if trimmed ~= "" then
local indent = line:match("^%s*")
if indent then
min_indent = math.min(min_indent, #indent)
end
end
end
if min_indent == math.huge or min_indent == 0 then
return s
end
-- Remove common indentation
for i, line in ipairs(lines) do
local trimmed = line:gsub("%s", "")
if trimmed ~= "" then
lines[i] = line:sub(min_indent + 1)
end
end
return table.concat(lines, "\n")
end
-- Escape special characters for regex
function str.escape_regex(s)
validate_string(s, "str.escape_regex")
return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1")
end
-- Quote string for shell usage
function str.shell_quote(s)
validate_string(s, "str.shell_quote")
return "'" .. s:gsub("'", "'\"'\"'") .. "'"
end
-- URL encode string
function str.url_encode(s)
validate_string(s, "str.url_encode")
return s:gsub("([^%w%-%.%_%~])", function(c)
return string.format("%%%02X", string.byte(c))
end)
end
-- URL decode string
function str.url_decode(s)
validate_string(s, "str.url_decode")
local result = s:gsub("%%(%x%x)", function(hex)
local byte = tonumber(hex, 16)
if byte then
return string.char(byte)
else
return "%" .. hex -- Invalid hex, keep original
end
end):gsub("+", " ")
-- Validate result is UTF-8
if not str.is_valid_utf8(result) then
error("str.url_decode: result is not valid UTF-8", 2)
end
return result
end
-- ======================================================================
-- STRING COMPARISON
-- ======================================================================
-- Case-insensitive comparison
function str.iequals(a, b)
validate_string(a, "str.iequals", 1)
validate_string(b, "str.iequals", 2)
return str.lower(a) == str.lower(b)
end
-- Levenshtein distance (Pure Lua - much faster)
function str.distance(a, b)
validate_string(a, "str.distance", 1)
validate_string(b, "str.distance", 2)
local len_a, len_b = #a, #b
-- Handle empty strings
if len_a == 0 then return len_b end
if len_b == 0 then return len_a end
-- Limit computation for very long strings
if len_a > 1000 or len_b > 1000 then
error("str.distance: strings too long for distance calculation", 2)
end
local matrix = {}
-- Initialize matrix
for i = 0, len_a do
matrix[i] = {[0] = i}
end
for j = 0, len_b do
matrix[0][j] = j
end
-- Fill matrix
for i = 1, len_a do
for j = 1, len_b do
local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1
matrix[i][j] = math.min(
matrix[i-1][j] + 1, -- deletion
matrix[i][j-1] + 1, -- insertion
matrix[i-1][j-1] + cost -- substitution
)
end
end
return matrix[len_a][len_b]
end
-- String similarity (0-1) (Pure Lua - faster)
function str.similarity(a, b)
validate_string(a, "str.similarity", 1)
validate_string(b, "str.similarity", 2)
local max_len = math.max(#a, #b)
if max_len == 0 then return 1.0 end
local dist = str.distance(a, b)
return 1.0 - (dist / max_len)
end
-- ======================================================================
-- TEMPLATE FUNCTIONS
-- ======================================================================
-- Simple template substitution (Pure Lua - faster)
function str.template(template, vars)
validate_string(template, "str.template", 1)
if vars ~= nil then
validate_table(vars, "str.template", 2)
else
vars = {}
end
return template:gsub("%${([%w_]+)}", function(var)
local value = vars[var]
if value == nil then
return ""
else
return tostring(value)
end
end)
end
-- Advanced template with functions (Pure Lua - faster)
function str.template_advanced(template, context)
validate_string(template, "str.template_advanced", 1)
if context ~= nil then
validate_table(context, "str.template_advanced", 2)
else
context = {}
end
return template:gsub("%${([^}]+)}", function(expr)
-- Simple variable substitution
if context[expr] then
return tostring(context[expr])
end
-- Handle simple expressions like ${var.prop}
local parts = {}
for part in expr:gmatch("[^%.]+") do
table.insert(parts, part)
end
local value = context
for _, part in ipairs(parts) do
if type(value) == "table" and value[part] ~= nil then
value = value[part]
else
return ""
end
end
return tostring(value)
end)
end
-- ======================================================================
-- UTILITY FUNCTIONS
-- ======================================================================
-- Check if string contains only whitespace
function str.is_whitespace(s)
validate_string(s, "str.is_whitespace")
return s:match("^%s*$") ~= nil
end
-- Remove all whitespace
function str.strip_whitespace(s)
validate_string(s, "str.strip_whitespace")
return s:gsub("%s", "")
end
-- Normalize whitespace (replace multiple spaces with single space)
function str.normalize_whitespace(s)
validate_string(s, "str.normalize_whitespace")
return str.trim(s:gsub("%s+", " "))
end
-- Extract numbers from string
function str.extract_numbers(s)
validate_string(s, "str.extract_numbers")
local numbers = {}
for num in s:gmatch("%-?%d+%.?%d*") do
local n = tonumber(num)
if n then table.insert(numbers, n) end
end
return numbers
end
-- Remove diacritics/accents
function str.remove_accents(s)
validate_string(s, "str.remove_accents")
local accents = {
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
-- Uppercase versions
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
}
local result = s
for accented, plain in pairs(accents) do
result = result:gsub(accented, plain)
end
return result
end
-- Generate random string
function str.random(length, charset)
validate_number(length, "str.random", 1)
if length < 0 or length ~= math.floor(length) then
error("str.random: length must be a non-negative integer", 2)
end
if charset ~= nil then
validate_string(charset, "str.random", 2)
end
return safe_call(moonshark.random_string, length, charset)
end
-- Generate slug from string (Pure Lua - faster)
function str.slug(s)
validate_string(s, "str.slug")
-- Remove accents (simplified but faster)
local accents = {
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
-- Uppercase versions
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
}
local result = s:lower()
for accented, plain in pairs(accents) do
result = result:gsub(accented:lower(), plain:lower())
end
-- Keep only alphanumeric characters and spaces, then convert spaces to hyphens
result = result:gsub("[^%w%s]", "")
result = result:gsub("%s+", "-")
result = result:gsub("^%-+", ""):gsub("%-+$", "")
return result
end
return str