464 lines
11 KiB
Lua
464 lines
11 KiB
Lua
-- modules/string.lua - Comprehensive string manipulation utilities
|
|
|
|
local str = {}
|
|
|
|
-- ======================================================================
|
|
-- BASIC STRING OPERATIONS
|
|
-- ======================================================================
|
|
|
|
function str.split(s, delimiter)
|
|
return moonshark.string_split(s, delimiter)
|
|
end
|
|
|
|
function str.join(arr, separator)
|
|
return moonshark.string_join(arr, separator)
|
|
end
|
|
|
|
function str.trim(s)
|
|
return moonshark.string_trim(s)
|
|
end
|
|
|
|
function str.trim_left(s, cutset)
|
|
return moonshark.string_trim_left(s, cutset)
|
|
end
|
|
|
|
function str.trim_right(s, cutset)
|
|
return moonshark.string_trim_right(s, cutset)
|
|
end
|
|
|
|
function str.upper(s)
|
|
return moonshark.string_upper(s)
|
|
end
|
|
|
|
function str.lower(s)
|
|
return moonshark.string_lower(s)
|
|
end
|
|
|
|
function str.title(s)
|
|
return moonshark.string_title(s)
|
|
end
|
|
|
|
function str.contains(s, substr)
|
|
return moonshark.string_contains(s, substr)
|
|
end
|
|
|
|
function str.starts_with(s, prefix)
|
|
return moonshark.string_starts_with(s, prefix)
|
|
end
|
|
|
|
function str.ends_with(s, suffix)
|
|
return moonshark.string_ends_with(s, suffix)
|
|
end
|
|
|
|
function str.replace(s, old, new)
|
|
return moonshark.string_replace(s, old, new)
|
|
end
|
|
|
|
function str.replace_n(s, old, new, n)
|
|
return moonshark.string_replace_n(s, old, new, n)
|
|
end
|
|
|
|
function str.index(s, substr)
|
|
local idx = moonshark.string_index(s, substr)
|
|
return idx > 0 and idx or nil
|
|
end
|
|
|
|
function str.last_index(s, substr)
|
|
local idx = moonshark.string_last_index(s, substr)
|
|
return idx > 0 and idx or nil
|
|
end
|
|
|
|
function str.count(s, substr)
|
|
return moonshark.string_count(s, substr)
|
|
end
|
|
|
|
function str.repeat_(s, n)
|
|
return moonshark.string_repeat(s, n)
|
|
end
|
|
|
|
function str.reverse(s)
|
|
return moonshark.string_reverse(s)
|
|
end
|
|
|
|
function str.length(s)
|
|
return moonshark.string_length(s)
|
|
end
|
|
|
|
function str.byte_length(s)
|
|
return moonshark.string_byte_length(s)
|
|
end
|
|
|
|
function str.lines(s)
|
|
return moonshark.string_lines(s)
|
|
end
|
|
|
|
function str.words(s)
|
|
return moonshark.string_words(s)
|
|
end
|
|
|
|
function str.pad_left(s, width, pad_char)
|
|
return moonshark.string_pad_left(s, width, pad_char)
|
|
end
|
|
|
|
function str.pad_right(s, width, pad_char)
|
|
return moonshark.string_pad_right(s, width, pad_char)
|
|
end
|
|
|
|
function str.slice(s, start, end_pos)
|
|
return moonshark.string_slice(s, start, end_pos)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- REGULAR EXPRESSIONS
|
|
-- ======================================================================
|
|
|
|
function str.match(pattern, s)
|
|
return moonshark.regex_match(pattern, s)
|
|
end
|
|
|
|
function str.find(pattern, s)
|
|
return moonshark.regex_find(pattern, s)
|
|
end
|
|
|
|
function str.find_all(pattern, s)
|
|
return moonshark.regex_find_all(pattern, s)
|
|
end
|
|
|
|
function str.gsub(pattern, s, replacement)
|
|
return moonshark.regex_replace(pattern, s, replacement)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- TYPE CONVERSION & VALIDATION
|
|
-- ======================================================================
|
|
|
|
function str.to_number(s)
|
|
return moonshark.string_to_number(s)
|
|
end
|
|
|
|
function str.is_numeric(s)
|
|
return moonshark.string_is_numeric(s)
|
|
end
|
|
|
|
function str.is_alpha(s)
|
|
return moonshark.string_is_alpha(s)
|
|
end
|
|
|
|
function str.is_alphanumeric(s)
|
|
return moonshark.string_is_alphanumeric(s)
|
|
end
|
|
|
|
function str.is_empty(s)
|
|
return s == nil or s == ""
|
|
end
|
|
|
|
function str.is_blank(s)
|
|
return str.is_empty(s) or str.trim(s) == ""
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- ADVANCED STRING OPERATIONS
|
|
-- ======================================================================
|
|
|
|
-- Capitalize first letter of each word
|
|
function str.capitalize(s)
|
|
return s:gsub("(%a)([%w_']*)", function(first, rest)
|
|
return str.upper(first) .. str.lower(rest)
|
|
end)
|
|
end
|
|
|
|
-- Convert string to camelCase
|
|
function str.camel_case(s)
|
|
local words = str.words(str.lower(s))
|
|
if #words == 0 then return s end
|
|
|
|
local result = words[1]
|
|
for i = 2, #words do
|
|
result = result .. str.capitalize(words[i])
|
|
end
|
|
return result
|
|
end
|
|
|
|
-- Convert string to PascalCase
|
|
function str.pascal_case(s)
|
|
local words = str.words(str.lower(s))
|
|
local result = ""
|
|
for _, word in ipairs(words) do
|
|
result = result .. str.capitalize(word)
|
|
end
|
|
return result
|
|
end
|
|
|
|
-- Convert string to snake_case
|
|
function str.snake_case(s)
|
|
local words = str.words(str.lower(s))
|
|
return str.join(words, "_")
|
|
end
|
|
|
|
-- Convert string to kebab-case
|
|
function str.kebab_case(s)
|
|
local words = str.words(str.lower(s))
|
|
return str.join(words, "-")
|
|
end
|
|
|
|
-- Convert string to SCREAMING_SNAKE_CASE
|
|
function str.screaming_snake_case(s)
|
|
return str.upper(str.snake_case(s))
|
|
end
|
|
|
|
-- Center text within given width
|
|
function str.center(s, width, fill_char)
|
|
fill_char = fill_char or " "
|
|
local len = str.length(s)
|
|
if len >= width then return s end
|
|
|
|
local pad_total = width - len
|
|
local pad_left = math.floor(pad_total / 2)
|
|
local pad_right = pad_total - pad_left
|
|
|
|
return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right)
|
|
end
|
|
|
|
-- Truncate string to maximum length
|
|
function str.truncate(s, max_length, suffix)
|
|
suffix = suffix or "..."
|
|
if str.length(s) <= max_length then
|
|
return s
|
|
end
|
|
local main_part = str.slice(s, 1, max_length - str.length(suffix))
|
|
main_part = str.trim_right(main_part)
|
|
return main_part .. suffix
|
|
end
|
|
|
|
-- Wrap text to specified width
|
|
function str.wrap(s, width)
|
|
local words = str.words(s)
|
|
local lines = {}
|
|
local current_line = ""
|
|
|
|
for _, word in ipairs(words) do
|
|
if str.length(current_line) + str.length(word) + 1 <= width then
|
|
if current_line == "" then
|
|
current_line = word
|
|
else
|
|
current_line = current_line .. " " .. word
|
|
end
|
|
else
|
|
if current_line ~= "" then
|
|
table.insert(lines, current_line)
|
|
end
|
|
current_line = word
|
|
end
|
|
end
|
|
|
|
if current_line ~= "" then
|
|
table.insert(lines, current_line)
|
|
end
|
|
|
|
return lines
|
|
end
|
|
|
|
-- Remove common leading whitespace
|
|
function str.dedent(s)
|
|
local lines = str.split(s, "\n")
|
|
if #lines <= 1 then return s end
|
|
|
|
-- Find minimum indentation (excluding empty lines)
|
|
local min_indent = math.huge
|
|
for _, line in ipairs(lines) do
|
|
if str.trim(line) ~= "" then
|
|
local indent = line:match("^%s*")
|
|
min_indent = math.min(min_indent, #indent)
|
|
end
|
|
end
|
|
|
|
if min_indent == math.huge or min_indent == 0 then
|
|
return s
|
|
end
|
|
|
|
-- Remove common indentation
|
|
for i, line in ipairs(lines) do
|
|
if str.trim(line) ~= "" then
|
|
lines[i] = line:sub(min_indent + 1)
|
|
end
|
|
end
|
|
|
|
return str.join(lines, "\n")
|
|
end
|
|
|
|
-- Escape special characters for regex
|
|
function str.escape_regex(s)
|
|
return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1")
|
|
end
|
|
|
|
-- Quote string for shell usage
|
|
function str.shell_quote(s)
|
|
return "'" .. s:gsub("'", "'\"'\"'") .. "'"
|
|
end
|
|
|
|
-- URL encode string
|
|
function str.url_encode(s)
|
|
return s:gsub("([^%w%-%.%_%~])", function(c)
|
|
return string.format("%%%02X", string.byte(c))
|
|
end)
|
|
end
|
|
|
|
-- URL decode string
|
|
function str.url_decode(s)
|
|
return s:gsub("%%(%x%x)", function(hex)
|
|
return string.char(tonumber(hex, 16))
|
|
end):gsub("+", " ")
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- STRING COMPARISON
|
|
-- ======================================================================
|
|
|
|
-- Case-insensitive comparison
|
|
function str.iequals(a, b)
|
|
return str.lower(a) == str.lower(b)
|
|
end
|
|
|
|
-- Levenshtein distance
|
|
function str.distance(a, b)
|
|
local len_a, len_b = str.length(a), str.length(b)
|
|
local matrix = {}
|
|
|
|
-- Initialize matrix
|
|
for i = 0, len_a do
|
|
matrix[i] = {[0] = i}
|
|
end
|
|
for j = 0, len_b do
|
|
matrix[0][j] = j
|
|
end
|
|
|
|
-- Fill matrix
|
|
for i = 1, len_a do
|
|
for j = 1, len_b do
|
|
local cost = (str.slice(a, i, i) == str.slice(b, j, j)) and 0 or 1
|
|
matrix[i][j] = math.min(
|
|
matrix[i-1][j] + 1, -- deletion
|
|
matrix[i][j-1] + 1, -- insertion
|
|
matrix[i-1][j-1] + cost -- substitution
|
|
)
|
|
end
|
|
end
|
|
|
|
return matrix[len_a][len_b]
|
|
end
|
|
|
|
-- String similarity (0-1)
|
|
function str.similarity(a, b)
|
|
local max_len = math.max(str.length(a), str.length(b))
|
|
if max_len == 0 then return 1 end
|
|
return 1 - (str.distance(a, b) / max_len)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- TEMPLATE FUNCTIONS
|
|
-- ======================================================================
|
|
|
|
-- Simple template substitution
|
|
function str.template(template, vars)
|
|
vars = vars or {}
|
|
return template:gsub("%${([%w_]+)}", function(var)
|
|
return tostring(vars[var] or "")
|
|
end)
|
|
end
|
|
|
|
-- Advanced template with functions
|
|
function str.template_advanced(template, context)
|
|
context = context or {}
|
|
|
|
return template:gsub("%${([^}]+)}", function(expr)
|
|
-- Simple variable substitution
|
|
if context[expr] then
|
|
return tostring(context[expr])
|
|
end
|
|
|
|
-- Handle simple expressions like ${var.prop}
|
|
local parts = str.split(expr, ".")
|
|
local value = context
|
|
for _, part in ipairs(parts) do
|
|
if type(value) == "table" and value[part] then
|
|
value = value[part]
|
|
else
|
|
return ""
|
|
end
|
|
end
|
|
|
|
return tostring(value)
|
|
end)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- UTILITY FUNCTIONS
|
|
-- ======================================================================
|
|
|
|
-- Check if string contains only whitespace
|
|
function str.is_whitespace(s)
|
|
return s:match("^%s*$") ~= nil
|
|
end
|
|
|
|
-- Remove all whitespace
|
|
function str.strip_whitespace(s)
|
|
return s:gsub("%s", "")
|
|
end
|
|
|
|
-- Normalize whitespace (replace multiple spaces with single space)
|
|
function str.normalize_whitespace(s)
|
|
return str.trim(s:gsub("%s+", " "))
|
|
end
|
|
|
|
-- Extract numbers from string
|
|
function str.extract_numbers(s)
|
|
local numbers = {}
|
|
for num in s:gmatch("%-?%d+%.?%d*") do
|
|
local n = tonumber(num)
|
|
if n then table.insert(numbers, n) end
|
|
end
|
|
return numbers
|
|
end
|
|
|
|
-- Remove diacritics/accents
|
|
function str.remove_accents(s)
|
|
local accents = {
|
|
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a",
|
|
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
|
|
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
|
|
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
|
|
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
|
|
["ñ"] = "n", ["ç"] = "c"
|
|
}
|
|
|
|
local result = s
|
|
for accented, plain in pairs(accents) do
|
|
result = result:gsub(accented, plain)
|
|
end
|
|
return result
|
|
end
|
|
|
|
-- Generate random string
|
|
function str.random(length, charset)
|
|
return moonshark.random_string(length, charset)
|
|
end
|
|
|
|
-- Check if string is valid UTF-8
|
|
function str.is_utf8(s)
|
|
-- Simple check - if we can iterate through the string as UTF-8, it's valid
|
|
local success = pcall(function()
|
|
for p, c in utf8 and utf8.codes or string.gmatch(s, ".") do
|
|
-- Just iterate through
|
|
end
|
|
end)
|
|
return success
|
|
end
|
|
|
|
-- Generate slug from string
|
|
function str.slug(s)
|
|
local kebab = str.kebab_case(str.remove_accents(s))
|
|
local cleaned = (kebab:gsub("[^%w%-]", ""))
|
|
return (cleaned:gsub("%-+", "-"))
|
|
end
|
|
|
|
return str |