496 lines
17 KiB
Lua
496 lines
17 KiB
Lua
-- modules/string.lua - Enhanced string manipulation utilities
|
|
|
|
local str = {}
|
|
|
|
-- ======================================================================
|
|
-- BASIC STRING OPERATIONS (Pure Lua)
|
|
-- ======================================================================
|
|
|
|
function str.split(s, delimiter)
|
|
if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end
|
|
if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end
|
|
return moonshark.string_split(s, delimiter)
|
|
end
|
|
|
|
function str.join(arr, separator)
|
|
if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end
|
|
if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end
|
|
return moonshark.string_join(arr, separator)
|
|
end
|
|
|
|
function str.trim(s)
|
|
if type(s) ~= "string" then error("str.trim: argument must be a string", 2) end
|
|
return s:match("^%s*(.-)%s*$")
|
|
end
|
|
|
|
function str.trim_left(s, cutset)
|
|
if type(s) ~= "string" then error("str.trim_left: first argument must be a string", 2) end
|
|
if cutset then
|
|
if type(cutset) ~= "string" then error("str.trim_left: second argument must be a string", 2) end
|
|
local pattern = "^[" .. cutset:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") .. "]*"
|
|
return s:gsub(pattern, "")
|
|
else
|
|
return s:match("^%s*(.*)")
|
|
end
|
|
end
|
|
|
|
function str.trim_right(s, cutset)
|
|
if type(s) ~= "string" then error("str.trim_right: first argument must be a string", 2) end
|
|
if cutset then
|
|
if type(cutset) ~= "string" then error("str.trim_right: second argument must be a string", 2) end
|
|
local pattern = "[" .. cutset:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") .. "]*$"
|
|
return s:gsub(pattern, "")
|
|
else
|
|
return s:match("(.-)%s*$")
|
|
end
|
|
end
|
|
|
|
function str.upper(s)
|
|
if type(s) ~= "string" then error("str.upper: argument must be a string", 2) end
|
|
return s:upper()
|
|
end
|
|
|
|
function str.lower(s)
|
|
if type(s) ~= "string" then error("str.lower: argument must be a string", 2) end
|
|
return s:lower()
|
|
end
|
|
|
|
function str.title(s)
|
|
if type(s) ~= "string" then error("str.title: argument must be a string", 2) end
|
|
return s:gsub("(%a)([%w_']*)", function(first, rest)
|
|
return first:upper() .. rest:lower()
|
|
end)
|
|
end
|
|
|
|
function str.contains(s, substr)
|
|
if type(s) ~= "string" then error("str.contains: first argument must be a string", 2) end
|
|
if type(substr) ~= "string" then error("str.contains: second argument must be a string", 2) end
|
|
return s:find(substr, 1, true) ~= nil
|
|
end
|
|
|
|
function str.starts_with(s, prefix)
|
|
if type(s) ~= "string" then error("str.starts_with: first argument must be a string", 2) end
|
|
if type(prefix) ~= "string" then error("str.starts_with: second argument must be a string", 2) end
|
|
return s:sub(1, #prefix) == prefix
|
|
end
|
|
|
|
function str.ends_with(s, suffix)
|
|
if type(s) ~= "string" then error("str.ends_with: first argument must be a string", 2) end
|
|
if type(suffix) ~= "string" then error("str.ends_with: second argument must be a string", 2) end
|
|
return s:sub(-#suffix) == suffix
|
|
end
|
|
|
|
function str.replace(s, old, new)
|
|
if type(s) ~= "string" then error("str.replace: first argument must be a string", 2) end
|
|
if type(old) ~= "string" then error("str.replace: second argument must be a string", 2) end
|
|
if type(new) ~= "string" then error("str.replace: third argument must be a string", 2) end
|
|
if old == "" then error("str.replace: cannot replace empty string", 2) end
|
|
return s:gsub(old:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1"), new)
|
|
end
|
|
|
|
function str.replace_n(s, old, new, n)
|
|
if type(s) ~= "string" then error("str.replace_n: first argument must be a string", 2) end
|
|
if type(old) ~= "string" then error("str.replace_n: second argument must be a string", 2) end
|
|
if type(new) ~= "string" then error("str.replace_n: third argument must be a string", 2) end
|
|
if type(n) ~= "number" or n < 0 or n ~= math.floor(n) then
|
|
error("str.replace_n: fourth argument must be a non-negative integer", 2)
|
|
end
|
|
if old == "" then error("str.replace_n: cannot replace empty string", 2) end
|
|
local escaped = old:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
|
|
return (s:gsub(escaped, new, n))
|
|
end
|
|
|
|
function str.index(s, substr)
|
|
if type(s) ~= "string" then error("str.index: first argument must be a string", 2) end
|
|
if type(substr) ~= "string" then error("str.index: second argument must be a string", 2) end
|
|
local pos = s:find(substr, 1, true)
|
|
return pos
|
|
end
|
|
|
|
function str.last_index(s, substr)
|
|
if type(s) ~= "string" then error("str.last_index: first argument must be a string", 2) end
|
|
if type(substr) ~= "string" then error("str.last_index: second argument must be a string", 2) end
|
|
local last_pos = nil
|
|
local pos = 1
|
|
while true do
|
|
local found = s:find(substr, pos, true)
|
|
if not found then break end
|
|
last_pos = found
|
|
pos = found + 1
|
|
end
|
|
return last_pos
|
|
end
|
|
|
|
function str.count(s, substr)
|
|
if type(s) ~= "string" then error("str.count: first argument must be a string", 2) end
|
|
if type(substr) ~= "string" then error("str.count: second argument must be a string", 2) end
|
|
if substr == "" then return #s + 1 end
|
|
local count = 0
|
|
local pos = 1
|
|
while true do
|
|
local found = s:find(substr, pos, true)
|
|
if not found then break end
|
|
count = count + 1
|
|
pos = found + #substr
|
|
end
|
|
return count
|
|
end
|
|
|
|
function str.repeat_(s, n)
|
|
if type(s) ~= "string" then error("str.repeat_: first argument must be a string", 2) end
|
|
if type(n) ~= "number" or n < 0 or n ~= math.floor(n) then
|
|
error("str.repeat_: second argument must be a non-negative integer", 2)
|
|
end
|
|
return string.rep(s, n)
|
|
end
|
|
|
|
function str.reverse(s)
|
|
if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end
|
|
local result, err = moonshark.string_reverse(s)
|
|
if not result then error("str.reverse: " .. err, 2) end
|
|
return result
|
|
end
|
|
|
|
function str.length(s)
|
|
if type(s) ~= "string" then error("str.length: argument must be a string", 2) end
|
|
return moonshark.string_length(s)
|
|
end
|
|
|
|
function str.byte_length(s)
|
|
if type(s) ~= "string" then error("str.byte_length: argument must be a string", 2) end
|
|
return #s
|
|
end
|
|
|
|
function str.lines(s)
|
|
if type(s) ~= "string" then error("str.lines: argument must be a string", 2) end
|
|
s = s:gsub("\r\n", "\n"):gsub("\r", "\n")
|
|
local lines = {}
|
|
for line in (s .. "\n"):gmatch("([^\n]*)\n") do
|
|
table.insert(lines, line)
|
|
end
|
|
if #lines > 0 and lines[#lines] == "" then
|
|
table.remove(lines)
|
|
end
|
|
return lines
|
|
end
|
|
|
|
function str.words(s)
|
|
if type(s) ~= "string" then error("str.words: argument must be a string", 2) end
|
|
local words = {}
|
|
for word in s:gmatch("%S+") do
|
|
table.insert(words, word)
|
|
end
|
|
return words
|
|
end
|
|
|
|
function str.pad_left(s, width, pad_char)
|
|
if type(s) ~= "string" then error("str.pad_left: first argument must be a string", 2) end
|
|
if type(width) ~= "number" or width < 0 or width ~= math.floor(width) then
|
|
error("str.pad_left: second argument must be a non-negative integer", 2)
|
|
end
|
|
pad_char = pad_char or " "
|
|
if type(pad_char) ~= "string" then error("str.pad_left: third argument must be a string", 2) end
|
|
if #pad_char == 0 then pad_char = " " else pad_char = pad_char:sub(1,1) end
|
|
local current_len = str.length(s)
|
|
if current_len >= width then return s end
|
|
return string.rep(pad_char, width - current_len) .. s
|
|
end
|
|
|
|
function str.pad_right(s, width, pad_char)
|
|
if type(s) ~= "string" then error("str.pad_right: first argument must be a string", 2) end
|
|
if type(width) ~= "number" or width < 0 or width ~= math.floor(width) then
|
|
error("str.pad_right: second argument must be a non-negative integer", 2)
|
|
end
|
|
pad_char = pad_char or " "
|
|
if type(pad_char) ~= "string" then error("str.pad_right: third argument must be a string", 2) end
|
|
if #pad_char == 0 then pad_char = " " else pad_char = pad_char:sub(1,1) end
|
|
local current_len = str.length(s)
|
|
if current_len >= width then return s end
|
|
return s .. string.rep(pad_char, width - current_len)
|
|
end
|
|
|
|
function str.slice(s, start, end_pos)
|
|
if type(s) ~= "string" then error("str.slice: first argument must be a string", 2) end
|
|
if type(start) ~= "number" or start ~= math.floor(start) then
|
|
error("str.slice: second argument must be an integer", 2)
|
|
end
|
|
if end_pos ~= nil and (type(end_pos) ~= "number" or end_pos ~= math.floor(end_pos)) then
|
|
error("str.slice: third argument must be an integer", 2)
|
|
end
|
|
local result, err = moonshark.string_slice(s, start, end_pos)
|
|
if not result then error("str.slice: " .. err, 2) end
|
|
return result
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- REGULAR EXPRESSIONS (Go Functions)
|
|
-- ======================================================================
|
|
|
|
function str.match(pattern, s)
|
|
if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end
|
|
if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end
|
|
return moonshark.regex_match(pattern, s)
|
|
end
|
|
|
|
function str.find(pattern, s)
|
|
if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end
|
|
if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end
|
|
return moonshark.regex_find(pattern, s)
|
|
end
|
|
|
|
function str.find_all(pattern, s)
|
|
if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end
|
|
if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end
|
|
return moonshark.regex_find_all(pattern, s)
|
|
end
|
|
|
|
function str.gsub(pattern, s, replacement)
|
|
if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end
|
|
if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end
|
|
if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end
|
|
return moonshark.regex_replace(pattern, s, replacement)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- TYPE CONVERSION & VALIDATION
|
|
-- ======================================================================
|
|
|
|
function str.to_number(s)
|
|
if type(s) ~= "string" then error("str.to_number: argument must be a string", 2) end
|
|
s = str.trim(s)
|
|
return tonumber(s)
|
|
end
|
|
|
|
function str.is_numeric(s)
|
|
if type(s) ~= "string" then error("str.is_numeric: argument must be a string", 2) end
|
|
s = str.trim(s)
|
|
return tonumber(s) ~= nil
|
|
end
|
|
|
|
function str.is_alpha(s)
|
|
if type(s) ~= "string" then error("str.is_alpha: argument must be a string", 2) end
|
|
if #s == 0 then return false end
|
|
return s:match("^%a+$") ~= nil
|
|
end
|
|
|
|
function str.is_alphanumeric(s)
|
|
if type(s) ~= "string" then error("str.is_alphanumeric: argument must be a string", 2) end
|
|
if #s == 0 then return false end
|
|
return s:match("^%w+$") ~= nil
|
|
end
|
|
|
|
function str.is_empty(s)
|
|
return s == nil or s == ""
|
|
end
|
|
|
|
function str.is_blank(s)
|
|
return str.is_empty(s) or str.trim(s) == ""
|
|
end
|
|
|
|
function str.is_utf8(s)
|
|
if type(s) ~= "string" then error("str.is_utf8: argument must be a string", 2) end
|
|
return moonshark.string_is_valid_utf8(s)
|
|
end
|
|
|
|
-- ======================================================================
|
|
-- ADVANCED STRING OPERATIONS (Pure Lua)
|
|
-- ======================================================================
|
|
|
|
function str.capitalize(s)
|
|
if type(s) ~= "string" then error("str.capitalize: argument must be a string", 2) end
|
|
return s:gsub("(%a)([%w_']*)", function(first, rest)
|
|
return first:upper() .. rest:lower()
|
|
end)
|
|
end
|
|
|
|
function str.camel_case(s)
|
|
if type(s) ~= "string" then error("str.camel_case: argument must be a string", 2) end
|
|
local words = str.words(s)
|
|
if #words == 0 then return s end
|
|
local result = words[1]:lower()
|
|
for i = 2, #words do
|
|
result = result .. words[i]:sub(1,1):upper() .. words[i]:sub(2):lower()
|
|
end
|
|
return result
|
|
end
|
|
|
|
function str.pascal_case(s)
|
|
if type(s) ~= "string" then error("str.pascal_case: argument must be a string", 2) end
|
|
local words = str.words(s)
|
|
local result = ""
|
|
for _, word in ipairs(words) do
|
|
result = result .. word:sub(1,1):upper() .. word:sub(2):lower()
|
|
end
|
|
return result
|
|
end
|
|
|
|
function str.snake_case(s)
|
|
if type(s) ~= "string" then error("str.snake_case: argument must be a string", 2) end
|
|
local words = str.words(s)
|
|
local result = {}
|
|
for _, word in ipairs(words) do
|
|
table.insert(result, word:lower())
|
|
end
|
|
return table.concat(result, "_")
|
|
end
|
|
|
|
function str.kebab_case(s)
|
|
if type(s) ~= "string" then error("str.kebab_case: argument must be a string", 2) end
|
|
local words = str.words(s)
|
|
local result = {}
|
|
for _, word in ipairs(words) do
|
|
table.insert(result, word:lower())
|
|
end
|
|
return table.concat(result, "-")
|
|
end
|
|
|
|
function str.center(s, width, fill_char)
|
|
if type(s) ~= "string" then error("str.center: first argument must be a string", 2) end
|
|
if type(width) ~= "number" or width < 0 or width ~= math.floor(width) then
|
|
error("str.center: second argument must be a non-negative integer", 2)
|
|
end
|
|
fill_char = fill_char or " "
|
|
if type(fill_char) ~= "string" or #fill_char == 0 then
|
|
error("str.center: fill character must be a non-empty string", 2)
|
|
end
|
|
fill_char = fill_char:sub(1,1)
|
|
|
|
local len = str.length(s)
|
|
if len >= width then return s end
|
|
|
|
local pad_total = width - len
|
|
local pad_left = math.floor(pad_total / 2)
|
|
local pad_right = pad_total - pad_left
|
|
|
|
return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right)
|
|
end
|
|
|
|
function str.truncate(s, max_length, suffix)
|
|
if type(s) ~= "string" then error("str.truncate: first argument must be a string", 2) end
|
|
if type(max_length) ~= "number" or max_length < 0 or max_length ~= math.floor(max_length) then
|
|
error("str.truncate: second argument must be a non-negative integer", 2)
|
|
end
|
|
suffix = suffix or "..."
|
|
if type(suffix) ~= "string" then error("str.truncate: third argument must be a string", 2) end
|
|
|
|
local len = str.length(s)
|
|
if len <= max_length then return s end
|
|
|
|
local suffix_len = str.length(suffix)
|
|
if max_length <= suffix_len then
|
|
return str.slice(suffix, 1, max_length)
|
|
end
|
|
|
|
local main_part = str.slice(s, 1, max_length - suffix_len)
|
|
main_part = str.trim_right(main_part)
|
|
return main_part .. suffix
|
|
end
|
|
|
|
function str.escape_regex(s)
|
|
if type(s) ~= "string" then error("str.escape_regex: argument must be a string", 2) end
|
|
return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1")
|
|
end
|
|
|
|
function str.url_encode(s)
|
|
if type(s) ~= "string" then error("str.url_encode: argument must be a string", 2) end
|
|
return s:gsub("([^%w%-%.%_%~])", function(c)
|
|
return string.format("%%%02X", string.byte(c))
|
|
end)
|
|
end
|
|
|
|
function str.url_decode(s)
|
|
if type(s) ~= "string" then error("str.url_decode: argument must be a string", 2) end
|
|
local result = s:gsub("%%(%x%x)", function(hex)
|
|
local byte = tonumber(hex, 16)
|
|
return byte and string.char(byte) or ("%" .. hex)
|
|
end):gsub("+", " ")
|
|
|
|
if not str.is_utf8(result) then
|
|
error("str.url_decode: result is not valid UTF-8", 2)
|
|
end
|
|
|
|
return result
|
|
end
|
|
|
|
function str.distance(a, b)
|
|
if type(a) ~= "string" then error("str.distance: first argument must be a string", 2) end
|
|
if type(b) ~= "string" then error("str.distance: second argument must be a string", 2) end
|
|
|
|
local len_a, len_b = str.length(a), str.length(b)
|
|
|
|
if len_a == 0 then return len_b end
|
|
if len_b == 0 then return len_a end
|
|
|
|
if len_a > 1000 or len_b > 1000 then
|
|
error("str.distance: strings too long for distance calculation", 2)
|
|
end
|
|
|
|
local matrix = {}
|
|
|
|
for i = 0, len_a do
|
|
matrix[i] = {[0] = i}
|
|
end
|
|
for j = 0, len_b do
|
|
matrix[0][j] = j
|
|
end
|
|
|
|
for i = 1, len_a do
|
|
for j = 1, len_b do
|
|
local cost = (str.slice(a, i, i) == str.slice(b, j, j)) and 0 or 1
|
|
matrix[i][j] = math.min(
|
|
matrix[i-1][j] + 1,
|
|
matrix[i][j-1] + 1,
|
|
matrix[i-1][j-1] + cost
|
|
)
|
|
end
|
|
end
|
|
|
|
return matrix[len_a][len_b]
|
|
end
|
|
|
|
function str.similarity(a, b)
|
|
if type(a) ~= "string" then error("str.similarity: first argument must be a string", 2) end
|
|
if type(b) ~= "string" then error("str.similarity: second argument must be a string", 2) end
|
|
|
|
local max_len = math.max(str.length(a), str.length(b))
|
|
if max_len == 0 then return 1.0 end
|
|
|
|
local dist = str.distance(a, b)
|
|
return 1.0 - (dist / max_len)
|
|
end
|
|
|
|
function str.template(template, vars)
|
|
if type(template) ~= "string" then error("str.template: first argument must be a string", 2) end
|
|
vars = vars or {}
|
|
if type(vars) ~= "table" then error("str.template: second argument must be a table", 2) end
|
|
|
|
return template:gsub("%${([%w_]+)}", function(var)
|
|
local value = vars[var]
|
|
return value ~= nil and tostring(value) or ""
|
|
end)
|
|
end
|
|
|
|
function str.random(length, charset)
|
|
if type(length) ~= "number" or length < 0 or length ~= math.floor(length) then
|
|
error("str.random: first argument must be a non-negative integer", 2)
|
|
end
|
|
if charset ~= nil and type(charset) ~= "string" then
|
|
error("str.random: second argument must be a string", 2)
|
|
end
|
|
local result, err = moonshark.random_string(length, charset)
|
|
if not result then error("str.random: " .. err, 2) end
|
|
return result
|
|
end
|
|
|
|
function str.slug(s)
|
|
if type(s) ~= "string" then error("str.slug: argument must be a string", 2) end
|
|
|
|
local result = s:lower()
|
|
result = result:gsub("[^%w%s]", "")
|
|
result = result:gsub("%s+", "-")
|
|
result = result:gsub("^%-+", ""):gsub("%-+$", "")
|
|
|
|
return result
|
|
end
|
|
|
|
return str |