enhance string library

This commit is contained in:
Sky Johnson 2025-07-15 16:06:32 -05:00
parent 743fd0e835
commit edc8e9e607
3 changed files with 858 additions and 156 deletions

250
benchmarks/string.lua Normal file
View File

@ -0,0 +1,250 @@
require("../tests/tests")
local str = require("string")
-- Pure Lua implementations for comparison
local pure_lua = {}
function pure_lua.capitalize(s)
return s:gsub("(%a)([%w_']*)", function(first, rest)
return first:upper() .. rest:lower()
end)
end
function pure_lua.camel_case(s)
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
if #words == 0 then return s end
local result = words[1]
for i = 2, #words do
result = result .. words[i]:gsub("^%l", string.upper)
end
return result
end
function pure_lua.template(template, vars)
vars = vars or {}
return template:gsub("%${([%w_]+)}", function(var)
return tostring(vars[var] or "")
end)
end
function pure_lua.distance(a, b)
local len_a, len_b = #a, #b
if len_a == 0 then return len_b end
if len_b == 0 then return len_a end
local matrix = {}
for i = 0, len_a do
matrix[i] = {[0] = i}
end
for j = 0, len_b do
matrix[0][j] = j
end
for i = 1, len_a do
for j = 1, len_b do
local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1
matrix[i][j] = math.min(
matrix[i-1][j] + 1,
matrix[i][j-1] + 1,
matrix[i-1][j-1] + cost
)
end
end
return matrix[len_a][len_b]
end
function pure_lua.wrap(s, width)
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word)
end
local lines = {}
local current_line = ""
for _, word in ipairs(words) do
if #current_line + #word + 1 <= width then
if current_line == "" then
current_line = word
else
current_line = current_line .. " " .. word
end
else
if current_line ~= "" then
table.insert(lines, current_line)
end
current_line = word
end
end
if current_line ~= "" then
table.insert(lines, current_line)
end
return lines
end
function pure_lua.slug(s)
-- Remove accents (simplified)
local result = s:lower()
result = result:gsub("[àáâãäå]", "a")
result = result:gsub("[èéêë]", "e")
result = result:gsub("[ìíîï]", "i")
result = result:gsub("[òóôõö]", "o")
result = result:gsub("[ùúûü]", "u")
result = result:gsub("[ñ]", "n")
result = result:gsub("[ç]", "c")
result = result:gsub("[^%w%s]", "")
result = result:gsub("%s+", "-")
result = result:gsub("^%-+", ""):gsub("%-+$", "")
return result
end
-- Test data
local test_texts = {
short = "Hello World",
medium = "The quick brown fox jumps over the lazy dog. This is a test sentence for benchmarking.",
long = string.rep("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", 100),
template_str = "Hello ${name}, you are ${age} years old and work as a ${job}.",
template_vars = {name = "John", age = 30, job = "developer"}
}
-- Benchmark function
local function benchmark_comparison(name, go_func, lua_func, test_data, iterations)
iterations = iterations or 10000
print(string.format("\n=== %s Benchmark (%d iterations) ===", name, iterations))
-- Warmup
for i = 1, 100 do
go_func(test_data)
lua_func(test_data)
end
-- Benchmark Go version
local go_start = os.clock()
for i = 1, iterations do
go_func(test_data)
end
local go_time = os.clock() - go_start
-- Benchmark Lua version
local lua_start = os.clock()
for i = 1, iterations do
lua_func(test_data)
end
local lua_time = os.clock() - lua_start
-- Results
local go_rate = iterations / go_time
local lua_rate = iterations / lua_time
local speedup = lua_time / go_time
print(string.format("Go (enhanced): %.3fs (%.0f ops/sec)", go_time, go_rate))
print(string.format("Lua (pure): %.3fs (%.0f ops/sec)", lua_time, lua_rate))
print(string.format("Speedup: %.2fx %s", math.abs(speedup), speedup > 1 and "(Go faster)" or "(Lua faster)"))
return {
go_time = go_time,
lua_time = lua_time,
speedup = speedup,
go_rate = go_rate,
lua_rate = lua_rate
}
end
-- Benchmark wrapper functions
local function run_benchmarks()
print("String Operations Performance Comparison")
print("=" .. string.rep("=", 50))
local results = {}
-- Capitalize benchmark
results.capitalize = benchmark_comparison(
"Capitalize",
function(text) return str.capitalize(text) end,
function(text) return pure_lua.capitalize(text) end,
test_texts.medium
)
-- CamelCase benchmark
results.camel_case = benchmark_comparison(
"CamelCase",
function(text) return str.camel_case(text) end,
function(text) return pure_lua.camel_case(text) end,
test_texts.medium
)
-- Template benchmark
results.template = benchmark_comparison(
"Template",
function(data) return str.template(test_texts.template_str, data) end,
function(data) return pure_lua.template(test_texts.template_str, data) end,
test_texts.template_vars
)
-- Distance benchmark (shorter strings due to O(n²) complexity)
results.distance = benchmark_comparison(
"Levenshtein Distance",
function(texts) return str.distance(texts[1], texts[2]) end,
function(texts) return pure_lua.distance(texts[1], texts[2]) end,
{"kitten", "sitting"},
1000 -- Fewer iterations for expensive operation
)
-- Wrap benchmark
results.wrap = benchmark_comparison(
"Text Wrap",
function(text) return str.wrap(text, 40) end,
function(text) return pure_lua.wrap(text, 40) end,
test_texts.long
)
-- Slug benchmark
results.slug = benchmark_comparison(
"Slug Generation",
function(text) return str.slug(text) end,
function(text) return pure_lua.slug(text) end,
"Café & Restaurant!! Special Characters"
)
-- Summary
print("\n" .. string.rep("=", 50))
print("PERFORMANCE SUMMARY")
print(string.rep("=", 50))
local go_wins = 0
local lua_wins = 0
for name, result in pairs(results) do
local winner = result.speedup > 1 and "Go" or "Lua"
local margin = string.format("%.2fx", math.abs(result.speedup))
print(string.format("%-20s: %s wins by %s", name, winner, margin))
if result.speedup > 1 then
go_wins = go_wins + 1
else
lua_wins = lua_wins + 1
end
end
print(string.rep("-", 50))
print(string.format("Go wins: %d, Lua wins: %d", go_wins, lua_wins))
-- Analysis
print("\nANALYSIS:")
print("• Go functions benefit from optimized implementations")
print("• Lua functions avoid CGO overhead for simple operations")
print("• Choice depends on operation complexity vs call frequency")
return results
end
-- Run the benchmarks
run_benchmarks()

View File

@ -15,6 +15,19 @@ import (
"golang.org/x/text/language" "golang.org/x/text/language"
) )
const (
maxStringLength = 10_000_000 // 10MB limit for safety
maxRepeatCount = 1_000_000 // Prevent excessive memory usage
maxRandomLength = 100_000 // Reasonable limit for random strings
)
func validateStringLength(s string) error {
if len(s) > maxStringLength {
return fmt.Errorf("string too large (max %d bytes)", maxStringLength)
}
return nil
}
func GetStringFunctions() map[string]luajit.GoFunction { func GetStringFunctions() map[string]luajit.GoFunction {
return map[string]luajit.GoFunction{ return map[string]luajit.GoFunction{
"string_split": func(s *luajit.State) int { "string_split": func(s *luajit.State) int {
@ -29,6 +42,24 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_split: second argument must be a string") return s.PushError("string_split: second argument must be a string")
} }
if err := validateStringLength(str); err != nil {
return s.PushError("string_split: %v", err)
}
// Handle empty separator - split into characters
if sep == "" {
runes := []rune(str)
parts := make([]string, len(runes))
for i, r := range runes {
parts[i] = string(r)
}
if err := s.PushValue(parts); err != nil {
return s.PushError("string_split: failed to push result: %v", err)
}
return 1
}
parts := strings.Split(str, sep) parts := strings.Split(str, sep)
if err := s.PushValue(parts); err != nil { if err := s.PushValue(parts); err != nil {
return s.PushError("string_split: failed to push result: %v", err) return s.PushError("string_split: failed to push result: %v", err)
@ -50,25 +81,33 @@ func GetStringFunctions() map[string]luajit.GoFunction {
} }
var parts []string var parts []string
if slice, ok := arr.([]string); ok { switch v := arr.(type) {
parts = slice case []string:
} else if anySlice, ok := arr.([]interface{}); ok { parts = v
parts = make([]string, len(anySlice)) case []interface{}:
for i, v := range anySlice { parts = make([]string, len(v))
parts[i] = fmt.Sprintf("%v", v) for i, val := range v {
} if val == nil {
} else if anyMap, ok := arr.(map[string]interface{}); ok { parts[i] = ""
// Handle empty table case - check if it's meant to be an array
if len(anyMap) == 0 {
parts = []string{} // Empty array
} else { } else {
return s.PushError("string_join: first argument must be an array") parts[i] = fmt.Sprintf("%v", val)
} }
}
case map[string]interface{}:
if len(v) == 0 {
parts = []string{}
} else { } else {
return s.PushError("string_join: first argument must be an array, not a map")
}
default:
return s.PushError("string_join: first argument must be an array") return s.PushError("string_join: first argument must be an array")
} }
result := strings.Join(parts, sep) result := strings.Join(parts, sep)
if err := validateStringLength(result); err != nil {
return s.PushError("string_join: result %v", err)
}
s.PushString(result) s.PushString(result)
return 1 return 1
}, },
@ -94,7 +133,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_trim_left: first argument must be a string") return s.PushError("string_trim_left: first argument must be a string")
} }
if s.GetTop() >= 2 && s.IsString(2) { if s.GetTop() >= 2 && !s.IsNil(2) {
cutset, err := s.SafeToString(2) cutset, err := s.SafeToString(2)
if err != nil { if err != nil {
return s.PushError("string_trim_left: second argument must be a string") return s.PushError("string_trim_left: second argument must be a string")
@ -115,7 +154,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_trim_right: first argument must be a string") return s.PushError("string_trim_right: first argument must be a string")
} }
if s.GetTop() >= 2 && s.IsString(2) { if s.GetTop() >= 2 && !s.IsNil(2) {
cutset, err := s.SafeToString(2) cutset, err := s.SafeToString(2)
if err != nil { if err != nil {
return s.PushError("string_trim_right: second argument must be a string") return s.PushError("string_trim_right: second argument must be a string")
@ -159,7 +198,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_title: argument must be a string") return s.PushError("string_title: argument must be a string")
} }
caser := cases.Title(language.English) caser := cases.Title(language.English, cases.NoLower)
s.PushString(caser.String(str)) s.PushString(caser.String(str))
return 1 return 1
}, },
@ -228,7 +267,16 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_replace: third argument must be a string") return s.PushError("string_replace: third argument must be a string")
} }
if old == "" {
return s.PushError("string_replace: cannot replace empty string")
}
result := strings.ReplaceAll(str, old, new) result := strings.ReplaceAll(str, old, new)
if err := validateStringLength(result); err != nil {
return s.PushError("string_replace: result %v", err)
}
s.PushString(result) s.PushString(result)
return 1 return 1
}, },
@ -250,9 +298,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_replace_n: third argument must be a string") return s.PushError("string_replace_n: third argument must be a string")
} }
n, err := s.SafeToNumber(4) n, err := s.SafeToNumber(4)
if err != nil || n != float64(int(n)) { if err != nil || n != float64(int(n)) || n < 0 {
return s.PushError("string_replace_n: fourth argument must be an integer") return s.PushError("string_replace_n: fourth argument must be a non-negative integer")
} }
if old == "" {
return s.PushError("string_replace_n: cannot replace empty string")
}
result := strings.Replace(str, old, new, int(n)) result := strings.Replace(str, old, new, int(n))
s.PushString(result) s.PushString(result)
return 1 return 1
@ -270,8 +323,18 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_index: second argument must be a string") return s.PushError("string_index: second argument must be a string")
} }
if substr == "" {
s.PushNumber(1) // Empty string found at position 1
return 1
}
index := strings.Index(str, substr) index := strings.Index(str, substr)
s.PushNumber(float64(index + 1)) // Lua is 1-indexed if index == -1 {
s.PushNumber(0) // Not found
} else {
s.PushNumber(float64(index + 1)) // Convert to 1-indexed
}
return 1 return 1
}, },
@ -287,11 +350,17 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_last_index: second argument must be a string") return s.PushError("string_last_index: second argument must be a string")
} }
if substr == "" {
s.PushNumber(float64(utf8.RuneCountInString(str) + 1)) // Empty string at end
return 1
}
index := strings.LastIndex(str, substr) index := strings.LastIndex(str, substr)
if index == -1 { if index == -1 {
s.PushNumber(0) s.PushNumber(0) // Not found
} else { } else {
s.PushNumber(float64(index + 1)) // Lua is 1-indexed s.PushNumber(float64(index + 1)) // Convert to 1-indexed
} }
return 1 return 1
}, },
@ -308,6 +377,13 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_count: second argument must be a string") return s.PushError("string_count: second argument must be a string")
} }
if substr == "" {
// Empty string matches at every position including boundaries
s.PushNumber(float64(utf8.RuneCountInString(str) + 1))
return 1
}
count := strings.Count(str, substr) count := strings.Count(str, substr)
s.PushNumber(float64(count)) s.PushNumber(float64(count))
return 1 return 1
@ -325,10 +401,19 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil || count < 0 || count != float64(int(count)) { if err != nil || count < 0 || count != float64(int(count)) {
return s.PushError("string_repeat: second argument must be a non-negative integer") return s.PushError("string_repeat: second argument must be a non-negative integer")
} }
if count > 1000000 {
return s.PushError("string_repeat: count too large (max 1000000)") n := int(count)
if n == 0 {
s.PushString("")
return 1
} }
result := strings.Repeat(str, int(count))
// Check for potential overflow
if len(str) > 0 && n > maxRepeatCount/len(str) {
return s.PushError("string_repeat: result would be too large")
}
result := strings.Repeat(str, n)
s.PushString(result) s.PushString(result)
return 1 return 1
}, },
@ -341,6 +426,11 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_reverse: argument must be a string") return s.PushError("string_reverse: argument must be a string")
} }
if !utf8.ValidString(str) {
return s.PushError("string_reverse: invalid UTF-8 string")
}
runes := []rune(str) runes := []rune(str)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i] runes[i], runes[j] = runes[j], runes[i]
@ -381,7 +471,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
if err != nil { if err != nil {
return s.PushError("string_lines: argument must be a string") return s.PushError("string_lines: argument must be a string")
} }
// Handle different line endings
str = strings.ReplaceAll(str, "\r\n", "\n")
str = strings.ReplaceAll(str, "\r", "\n")
lines := strings.Split(str, "\n") lines := strings.Split(str, "\n")
if err := s.PushValue(lines); err != nil { if err := s.PushValue(lines); err != nil {
return s.PushError("string_lines: failed to push result: %v", err) return s.PushError("string_lines: failed to push result: %v", err)
} }
@ -412,14 +507,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_pad_left: first argument must be a string") return s.PushError("string_pad_left: first argument must be a string")
} }
width, err := s.SafeToNumber(2) width, err := s.SafeToNumber(2)
if err != nil || width != float64(int(width)) { if err != nil || width != float64(int(width)) || width < 0 {
return s.PushError("string_pad_left: second argument must be an integer") return s.PushError("string_pad_left: second argument must be a non-negative integer")
} }
padChar := " " padChar := " "
if s.GetTop() >= 3 { if s.GetTop() >= 3 && !s.IsNil(3) {
if p, err := s.SafeToString(3); err == nil && len(p) > 0 { if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
padChar = string([]rune(p)[0]) runes := []rune(p)
padChar = string(runes[0])
} }
} }
@ -430,7 +526,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return 1 return 1
} }
padding := strings.Repeat(padChar, targetLen-currentLen) padLen := targetLen - currentLen
if padLen > maxRepeatCount {
return s.PushError("string_pad_left: padding too large")
}
padding := strings.Repeat(padChar, padLen)
s.PushString(padding + str) s.PushString(padding + str)
return 1 return 1
}, },
@ -444,14 +545,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_pad_right: first argument must be a string") return s.PushError("string_pad_right: first argument must be a string")
} }
width, err := s.SafeToNumber(2) width, err := s.SafeToNumber(2)
if err != nil || width != float64(int(width)) { if err != nil || width != float64(int(width)) || width < 0 {
return s.PushError("string_pad_right: second argument must be an integer") return s.PushError("string_pad_right: second argument must be a non-negative integer")
} }
padChar := " " padChar := " "
if s.GetTop() >= 3 { if s.GetTop() >= 3 && !s.IsNil(3) {
if p, err := s.SafeToString(3); err == nil && len(p) > 0 { if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
padChar = string([]rune(p)[0]) runes := []rune(p)
padChar = string(runes[0])
} }
} }
@ -462,7 +564,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return 1 return 1
} }
padding := strings.Repeat(padChar, targetLen-currentLen) padLen := targetLen - currentLen
if padLen > maxRepeatCount {
return s.PushError("string_pad_right: padding too large")
}
padding := strings.Repeat(padChar, padLen)
s.PushString(str + padding) s.PushString(str + padding)
return 1 return 1
}, },
@ -480,10 +587,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_slice: second argument must be an integer") return s.PushError("string_slice: second argument must be an integer")
} }
if !utf8.ValidString(str) {
return s.PushError("string_slice: invalid UTF-8 string")
}
runes := []rune(str) runes := []rune(str)
length := len(runes) length := len(runes)
startIdx := int(start) - 1 // Convert from 1-indexed to 0-indexed startIdx := int(start) - 1 // Convert from 1-indexed to 0-indexed
// Handle negative start index
if startIdx < 0 { if startIdx < 0 {
startIdx = 0 startIdx = 0
} }
@ -493,10 +605,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
} }
endIdx := length endIdx := length
if s.GetTop() >= 3 { if s.GetTop() >= 3 && !s.IsNil(3) {
end, err := s.SafeToNumber(3) end, err := s.SafeToNumber(3)
if err == nil && end == float64(int(end)) { if err == nil && end == float64(int(end)) {
endIdx = int(end) endIdx = int(end)
// Handle negative end index (from end of string)
if endIdx < 0 {
endIdx = length + endIdx + 1
}
if endIdx < 0 { if endIdx < 0 {
endIdx = 0 endIdx = 0
} }
@ -531,8 +647,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
re, err := regexp.Compile(pattern) re, err := regexp.Compile(pattern)
if err != nil { if err != nil {
s.PushBoolean(false) s.PushBoolean(false)
s.PushString(err.Error()) return 1
return 2
} }
s.PushBoolean(re.MatchString(str)) s.PushBoolean(re.MatchString(str))
@ -555,8 +670,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
re, err := regexp.Compile(pattern) re, err := regexp.Compile(pattern)
if err != nil { if err != nil {
s.PushNil() s.PushNil()
s.PushString(err.Error()) return 1
return 2
} }
match := re.FindString(str) match := re.FindString(str)
@ -583,12 +697,18 @@ func GetStringFunctions() map[string]luajit.GoFunction {
re, err := regexp.Compile(pattern) re, err := regexp.Compile(pattern)
if err != nil { if err != nil {
s.PushNil() // Return empty array for invalid patterns
s.PushString(err.Error()) if err := s.PushValue([]string{}); err != nil {
return 2 return s.PushError("regex_find_all: failed to push result: %v", err)
}
return 1
} }
matches := re.FindAllString(str, -1) matches := re.FindAllString(str, -1)
if matches == nil {
matches = []string{} // Return empty array instead of nil
}
if err := s.PushValue(matches); err != nil { if err := s.PushValue(matches); err != nil {
return s.PushError("regex_find_all: failed to push result: %v", err) return s.PushError("regex_find_all: failed to push result: %v", err)
} }
@ -614,9 +734,9 @@ func GetStringFunctions() map[string]luajit.GoFunction {
re, err := regexp.Compile(pattern) re, err := regexp.Compile(pattern)
if err != nil { if err != nil {
s.PushNil() // Return original string for invalid patterns
s.PushString(err.Error()) s.PushString(str)
return 2 return 1
} }
result := re.ReplaceAllString(str, replacement) result := re.ReplaceAllString(str, replacement)
@ -633,14 +753,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_to_number: argument must be a string") return s.PushError("string_to_number: argument must be a string")
} }
// Trim whitespace for more lenient parsing
str = strings.TrimSpace(str)
// Try float first for more general parsing
if num, err := strconv.ParseFloat(str, 64); err == nil { if num, err := strconv.ParseFloat(str, 64); err == nil {
s.PushNumber(num) s.PushNumber(num)
return 1 return 1
} }
if num, err := strconv.ParseInt(str, 10, 64); err == nil {
s.PushNumber(float64(num))
return 1
}
s.PushNil() s.PushNil()
return 1 return 1
@ -655,9 +775,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
return s.PushError("string_is_numeric: argument must be a string") return s.PushError("string_is_numeric: argument must be a string")
} }
str = strings.TrimSpace(str)
if str == "" {
s.PushBoolean(false)
return 1
}
_, err1 := strconv.ParseFloat(str, 64) _, err1 := strconv.ParseFloat(str, 64)
_, err2 := strconv.ParseInt(str, 10, 64) s.PushBoolean(err1 == nil)
s.PushBoolean(err1 == nil || err2 == nil)
return 1 return 1
}, },
@ -719,7 +844,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
} }
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
if s.GetTop() >= 2 { if s.GetTop() >= 2 && !s.IsNil(2) {
if custom, err := s.SafeToString(2); err == nil && len(custom) > 0 { if custom, err := s.SafeToString(2); err == nil && len(custom) > 0 {
charset = custom charset = custom
} }
@ -730,18 +855,40 @@ func GetStringFunctions() map[string]luajit.GoFunction {
s.PushString("") s.PushString("")
return 1 return 1
} }
if n > 100000 { if n > maxRandomLength {
return s.PushError("random_string: length too large (max 100000)") return s.PushError("random_string: length too large (max %d)", maxRandomLength)
} }
result := make([]byte, n) // Validate charset for UTF-8
if !utf8.ValidString(charset) {
return s.PushError("random_string: charset must be valid UTF-8")
}
charsetRunes := []rune(charset)
if len(charsetRunes) == 0 {
return s.PushError("random_string: charset cannot be empty")
}
result := make([]rune, n)
rnd := rand.New(rand.NewSource(time.Now().UnixNano())) rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
for i := range result { for i := range result {
result[i] = charset[rnd.Intn(len(charset))] result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
} }
s.PushString(string(result)) s.PushString(string(result))
return 1 return 1
}, },
"string_is_valid_utf8": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_is_valid_utf8: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_is_valid_utf8: argument must be a string")
}
s.PushBoolean(utf8.ValidString(str))
return 1
},
} }
} }

View File

@ -1,111 +1,217 @@
-- modules/string.lua - Comprehensive string manipulation utilities -- modules/string.lua - Enhanced string manipulation utilities
local str = {} local str = {}
-- Helper function to handle errors from Go functions
local function safe_call(func, ...)
local success, result = pcall(func, ...)
if not success then
error(result, 2)
end
return result
end
-- Helper to validate arguments
local function validate_string(s, func_name, arg_num)
if type(s) ~= "string" then
error(string.format("%s: argument %d must be a string, got %s",
func_name, arg_num or 1, type(s)), 3)
end
end
local function validate_number(n, func_name, arg_num)
if type(n) ~= "number" then
error(string.format("%s: argument %d must be a number, got %s",
func_name, arg_num or 1, type(n)), 3)
end
end
local function validate_table(t, func_name, arg_num)
if type(t) ~= "table" then
error(string.format("%s: argument %d must be a table, got %s",
func_name, arg_num or 1, type(t)), 3)
end
end
-- ====================================================================== -- ======================================================================
-- BASIC STRING OPERATIONS -- BASIC STRING OPERATIONS
-- ====================================================================== -- ======================================================================
function str.split(s, delimiter) function str.split(s, delimiter)
return moonshark.string_split(s, delimiter) validate_string(s, "str.split", 1)
validate_string(delimiter, "str.split", 2)
return safe_call(moonshark.string_split, s, delimiter)
end end
function str.join(arr, separator) function str.join(arr, separator)
return moonshark.string_join(arr, separator) validate_table(arr, "str.join", 1)
validate_string(separator, "str.join", 2)
return safe_call(moonshark.string_join, arr, separator)
end end
function str.trim(s) function str.trim(s)
return moonshark.string_trim(s) validate_string(s, "str.trim")
return safe_call(moonshark.string_trim, s)
end end
function str.trim_left(s, cutset) function str.trim_left(s, cutset)
return moonshark.string_trim_left(s, cutset) validate_string(s, "str.trim_left", 1)
if cutset ~= nil then
validate_string(cutset, "str.trim_left", 2)
end
return safe_call(moonshark.string_trim_left, s, cutset)
end end
function str.trim_right(s, cutset) function str.trim_right(s, cutset)
return moonshark.string_trim_right(s, cutset) validate_string(s, "str.trim_right", 1)
if cutset ~= nil then
validate_string(cutset, "str.trim_right", 2)
end
return safe_call(moonshark.string_trim_right, s, cutset)
end end
function str.upper(s) function str.upper(s)
return moonshark.string_upper(s) validate_string(s, "str.upper")
return safe_call(moonshark.string_upper, s)
end end
function str.lower(s) function str.lower(s)
return moonshark.string_lower(s) validate_string(s, "str.lower")
return safe_call(moonshark.string_lower, s)
end end
function str.title(s) function str.title(s)
return moonshark.string_title(s) validate_string(s, "str.title")
return safe_call(moonshark.string_title, s)
end end
function str.contains(s, substr) function str.contains(s, substr)
return moonshark.string_contains(s, substr) validate_string(s, "str.contains", 1)
validate_string(substr, "str.contains", 2)
return safe_call(moonshark.string_contains, s, substr)
end end
function str.starts_with(s, prefix) function str.starts_with(s, prefix)
return moonshark.string_starts_with(s, prefix) validate_string(s, "str.starts_with", 1)
validate_string(prefix, "str.starts_with", 2)
return safe_call(moonshark.string_starts_with, s, prefix)
end end
function str.ends_with(s, suffix) function str.ends_with(s, suffix)
return moonshark.string_ends_with(s, suffix) validate_string(s, "str.ends_with", 1)
validate_string(suffix, "str.ends_with", 2)
return safe_call(moonshark.string_ends_with, s, suffix)
end end
function str.replace(s, old, new) function str.replace(s, old, new)
return moonshark.string_replace(s, old, new) validate_string(s, "str.replace", 1)
validate_string(old, "str.replace", 2)
validate_string(new, "str.replace", 3)
return safe_call(moonshark.string_replace, s, old, new)
end end
function str.replace_n(s, old, new, n) function str.replace_n(s, old, new, n)
return moonshark.string_replace_n(s, old, new, n) validate_string(s, "str.replace_n", 1)
validate_string(old, "str.replace_n", 2)
validate_string(new, "str.replace_n", 3)
validate_number(n, "str.replace_n", 4)
if n < 0 or n ~= math.floor(n) then
error("str.replace_n: count must be a non-negative integer", 2)
end
return safe_call(moonshark.string_replace_n, s, old, new, n)
end end
function str.index(s, substr) function str.index(s, substr)
local idx = moonshark.string_index(s, substr) validate_string(s, "str.index", 1)
validate_string(substr, "str.index", 2)
local idx = safe_call(moonshark.string_index, s, substr)
return idx > 0 and idx or nil return idx > 0 and idx or nil
end end
function str.last_index(s, substr) function str.last_index(s, substr)
local idx = moonshark.string_last_index(s, substr) validate_string(s, "str.last_index", 1)
validate_string(substr, "str.last_index", 2)
local idx = safe_call(moonshark.string_last_index, s, substr)
return idx > 0 and idx or nil return idx > 0 and idx or nil
end end
function str.count(s, substr) function str.count(s, substr)
return moonshark.string_count(s, substr) validate_string(s, "str.count", 1)
validate_string(substr, "str.count", 2)
return safe_call(moonshark.string_count, s, substr)
end end
function str.repeat_(s, n) function str.repeat_(s, n)
return moonshark.string_repeat(s, n) validate_string(s, "str.repeat_", 1)
validate_number(n, "str.repeat_", 2)
if n < 0 or n ~= math.floor(n) then
error("str.repeat_: count must be a non-negative integer", 2)
end
return safe_call(moonshark.string_repeat, s, n)
end end
function str.reverse(s) function str.reverse(s)
return moonshark.string_reverse(s) validate_string(s, "str.reverse")
return safe_call(moonshark.string_reverse, s)
end end
function str.length(s) function str.length(s)
return moonshark.string_length(s) validate_string(s, "str.length")
return safe_call(moonshark.string_length, s)
end end
function str.byte_length(s) function str.byte_length(s)
return moonshark.string_byte_length(s) validate_string(s, "str.byte_length")
return safe_call(moonshark.string_byte_length, s)
end end
function str.lines(s) function str.lines(s)
return moonshark.string_lines(s) validate_string(s, "str.lines")
return safe_call(moonshark.string_lines, s)
end end
function str.words(s) function str.words(s)
return moonshark.string_words(s) validate_string(s, "str.words")
return safe_call(moonshark.string_words, s)
end end
function str.pad_left(s, width, pad_char) function str.pad_left(s, width, pad_char)
return moonshark.string_pad_left(s, width, pad_char) validate_string(s, "str.pad_left", 1)
validate_number(width, "str.pad_left", 2)
if width < 0 or width ~= math.floor(width) then
error("str.pad_left: width must be a non-negative integer", 2)
end
if pad_char ~= nil then
validate_string(pad_char, "str.pad_left", 3)
end
return safe_call(moonshark.string_pad_left, s, width, pad_char)
end end
function str.pad_right(s, width, pad_char) function str.pad_right(s, width, pad_char)
return moonshark.string_pad_right(s, width, pad_char) validate_string(s, "str.pad_right", 1)
validate_number(width, "str.pad_right", 2)
if width < 0 or width ~= math.floor(width) then
error("str.pad_right: width must be a non-negative integer", 2)
end
if pad_char ~= nil then
validate_string(pad_char, "str.pad_right", 3)
end
return safe_call(moonshark.string_pad_right, s, width, pad_char)
end end
function str.slice(s, start, end_pos) function str.slice(s, start, end_pos)
return moonshark.string_slice(s, start, end_pos) validate_string(s, "str.slice", 1)
validate_number(start, "str.slice", 2)
if start ~= math.floor(start) then
error("str.slice: start must be an integer", 2)
end
if end_pos ~= nil then
validate_number(end_pos, "str.slice", 3)
if end_pos ~= math.floor(end_pos) then
error("str.slice: end position must be an integer", 2)
end
end
return safe_call(moonshark.string_slice, s, start, end_pos)
end end
-- ====================================================================== -- ======================================================================
@ -113,19 +219,28 @@ end
-- ====================================================================== -- ======================================================================
function str.match(pattern, s) function str.match(pattern, s)
return moonshark.regex_match(pattern, s) validate_string(pattern, "str.match", 1)
validate_string(s, "str.match", 2)
return safe_call(moonshark.regex_match, pattern, s)
end end
function str.find(pattern, s) function str.find(pattern, s)
return moonshark.regex_find(pattern, s) validate_string(pattern, "str.find", 1)
validate_string(s, "str.find", 2)
return safe_call(moonshark.regex_find, pattern, s)
end end
function str.find_all(pattern, s) function str.find_all(pattern, s)
return moonshark.regex_find_all(pattern, s) validate_string(pattern, "str.find_all", 1)
validate_string(s, "str.find_all", 2)
return safe_call(moonshark.regex_find_all, pattern, s)
end end
function str.gsub(pattern, s, replacement) function str.gsub(pattern, s, replacement)
return moonshark.regex_replace(pattern, s, replacement) validate_string(pattern, "str.gsub", 1)
validate_string(s, "str.gsub", 2)
validate_string(replacement, "str.gsub", 3)
return safe_call(moonshark.regex_replace, pattern, s, replacement)
end end
-- ====================================================================== -- ======================================================================
@ -133,19 +248,23 @@ end
-- ====================================================================== -- ======================================================================
function str.to_number(s) function str.to_number(s)
return moonshark.string_to_number(s) validate_string(s, "str.to_number")
return safe_call(moonshark.string_to_number, s)
end end
function str.is_numeric(s) function str.is_numeric(s)
return moonshark.string_is_numeric(s) validate_string(s, "str.is_numeric")
return safe_call(moonshark.string_is_numeric, s)
end end
function str.is_alpha(s) function str.is_alpha(s)
return moonshark.string_is_alpha(s) validate_string(s, "str.is_alpha")
return safe_call(moonshark.string_is_alpha, s)
end end
function str.is_alphanumeric(s) function str.is_alphanumeric(s)
return moonshark.string_is_alphanumeric(s) validate_string(s, "str.is_alphanumeric")
return safe_call(moonshark.string_is_alphanumeric, s)
end end
function str.is_empty(s) function str.is_empty(s)
@ -156,60 +275,106 @@ function str.is_blank(s)
return str.is_empty(s) or str.trim(s) == "" return str.is_empty(s) or str.trim(s) == ""
end end
function str.is_utf8(s)
validate_string(s, "str.is_utf8")
return safe_call(moonshark.string_is_valid_utf8, s)
end
function str.is_valid_utf8(s)
validate_string(s, "str.is_valid_utf8")
return safe_call(moonshark.string_is_valid_utf8, s)
end
-- ====================================================================== -- ======================================================================
-- ADVANCED STRING OPERATIONS -- ADVANCED STRING OPERATIONS
-- ====================================================================== -- ======================================================================
-- Capitalize first letter of each word -- Capitalize first letter of each word (Pure Lua - faster)
function str.capitalize(s) function str.capitalize(s)
validate_string(s, "str.capitalize")
return s:gsub("(%a)([%w_']*)", function(first, rest) return s:gsub("(%a)([%w_']*)", function(first, rest)
return str.upper(first) .. str.lower(rest) return first:upper() .. rest:lower()
end) end)
end end
-- Convert string to camelCase -- Convert string to camelCase (Pure Lua - faster)
function str.camel_case(s) function str.camel_case(s)
local words = str.words(str.lower(s)) validate_string(s, "str.camel_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
if #words == 0 then return s end if #words == 0 then return s end
local result = words[1] local result = words[1]
for i = 2, #words do for i = 2, #words do
result = result .. str.capitalize(words[i]) result = result .. words[i]:gsub("^%l", string.upper)
end end
return result return result
end end
-- Convert string to PascalCase -- Convert string to PascalCase (Pure Lua - faster)
function str.pascal_case(s) function str.pascal_case(s)
local words = str.words(str.lower(s)) validate_string(s, "str.pascal_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
local result = "" local result = ""
for _, word in ipairs(words) do for _, word in ipairs(words) do
result = result .. str.capitalize(word) result = result .. word:gsub("^%l", string.upper)
end end
return result return result
end end
-- Convert string to snake_case -- Convert string to snake_case (Pure Lua - faster)
function str.snake_case(s) function str.snake_case(s)
local words = str.words(str.lower(s)) validate_string(s, "str.snake_case")
return str.join(words, "_") local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
return table.concat(words, "_")
end end
-- Convert string to kebab-case -- Convert string to kebab-case (Pure Lua - faster)
function str.kebab_case(s) function str.kebab_case(s)
local words = str.words(str.lower(s)) validate_string(s, "str.kebab_case")
return str.join(words, "-") local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:lower())
end
return table.concat(words, "-")
end end
-- Convert string to SCREAMING_SNAKE_CASE -- Convert string to SCREAMING_SNAKE_CASE (Pure Lua - faster)
function str.screaming_snake_case(s) function str.screaming_snake_case(s)
return str.upper(str.snake_case(s)) validate_string(s, "str.screaming_snake_case")
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word:upper())
end
return table.concat(words, "_")
end end
-- Center text within given width -- Center text within given width (Pure Lua - faster)
function str.center(s, width, fill_char) function str.center(s, width, fill_char)
validate_string(s, "str.center", 1)
validate_number(width, "str.center", 2)
if width < 0 or width ~= math.floor(width) then
error("str.center: width must be a non-negative integer", 2)
end
fill_char = fill_char or " " fill_char = fill_char or " "
local len = str.length(s) if fill_char ~= nil then
validate_string(fill_char, "str.center", 3)
if #fill_char == 0 then
error("str.center: fill character cannot be empty", 2)
end
fill_char = fill_char:sub(1,1) -- Use only first character
end
local len = #s
if len >= width then return s end if len >= width then return s end
local pad_total = width - len local pad_total = width - len
@ -219,25 +384,66 @@ function str.center(s, width, fill_char)
return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right) return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right)
end end
-- Truncate string to maximum length -- Truncate string to maximum length (Pure Lua - faster)
function str.truncate(s, max_length, suffix) function str.truncate(s, max_length, suffix)
validate_string(s, "str.truncate", 1)
validate_number(max_length, "str.truncate", 2)
if max_length < 0 or max_length ~= math.floor(max_length) then
error("str.truncate: max_length must be a non-negative integer", 2)
end
suffix = suffix or "..." suffix = suffix or "..."
if str.length(s) <= max_length then validate_string(suffix, "str.truncate", 3)
if #s <= max_length then
return s return s
end end
local main_part = str.slice(s, 1, max_length - str.length(suffix))
main_part = str.trim_right(main_part) local suffix_len = #suffix
if max_length <= suffix_len then
return suffix:sub(1, max_length)
end
local main_part = s:sub(1, max_length - suffix_len)
main_part = main_part:gsub("%s+$", "") -- trim right
return main_part .. suffix return main_part .. suffix
end end
-- Wrap text to specified width -- Wrap text to specified width (Pure Lua - much faster)
function str.wrap(s, width) function str.wrap(s, width)
local words = str.words(s) validate_string(s, "str.wrap", 1)
validate_number(width, "str.wrap", 2)
if width <= 0 or width ~= math.floor(width) then
error("str.wrap: width must be a positive integer", 2)
end
local words = {}
for word in s:gmatch("%S+") do
table.insert(words, word)
end
local lines = {} local lines = {}
local current_line = "" local current_line = ""
for _, word in ipairs(words) do for _, word in ipairs(words) do
if str.length(current_line) + str.length(word) + 1 <= width then local word_len = #word
local current_len = #current_line
-- Handle words longer than width
if word_len > width then
if current_line ~= "" then
table.insert(lines, current_line)
current_line = ""
end
-- Break long word
while #word > width do
table.insert(lines, word:sub(1, width))
word = word:sub(width + 1)
end
if #word > 0 then
current_line = word
end
elseif current_len + word_len + 1 <= width then
if current_line == "" then if current_line == "" then
current_line = word current_line = word
else else
@ -258,19 +464,26 @@ function str.wrap(s, width)
return lines return lines
end end
-- Remove common leading whitespace -- Remove common leading whitespace (Pure Lua - faster)
function str.dedent(s) function str.dedent(s)
local lines = str.split(s, "\n") validate_string(s, "str.dedent")
local lines = {}
for line in (s.."\n"):gmatch("([^\n]*)\n") do
table.insert(lines, line)
end
if #lines <= 1 then return s end if #lines <= 1 then return s end
-- Find minimum indentation (excluding empty lines) -- Find minimum indentation (excluding empty lines)
local min_indent = math.huge local min_indent = math.huge
for _, line in ipairs(lines) do for _, line in ipairs(lines) do
if str.trim(line) ~= "" then local trimmed = line:gsub("%s", "")
if trimmed ~= "" then
local indent = line:match("^%s*") local indent = line:match("^%s*")
if indent then
min_indent = math.min(min_indent, #indent) min_indent = math.min(min_indent, #indent)
end end
end end
end
if min_indent == math.huge or min_indent == 0 then if min_indent == math.huge or min_indent == 0 then
return s return s
@ -278,26 +491,30 @@ function str.dedent(s)
-- Remove common indentation -- Remove common indentation
for i, line in ipairs(lines) do for i, line in ipairs(lines) do
if str.trim(line) ~= "" then local trimmed = line:gsub("%s", "")
if trimmed ~= "" then
lines[i] = line:sub(min_indent + 1) lines[i] = line:sub(min_indent + 1)
end end
end end
return str.join(lines, "\n") return table.concat(lines, "\n")
end end
-- Escape special characters for regex -- Escape special characters for regex
function str.escape_regex(s) function str.escape_regex(s)
validate_string(s, "str.escape_regex")
return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1") return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1")
end end
-- Quote string for shell usage -- Quote string for shell usage
function str.shell_quote(s) function str.shell_quote(s)
validate_string(s, "str.shell_quote")
return "'" .. s:gsub("'", "'\"'\"'") .. "'" return "'" .. s:gsub("'", "'\"'\"'") .. "'"
end end
-- URL encode string -- URL encode string
function str.url_encode(s) function str.url_encode(s)
validate_string(s, "str.url_encode")
return s:gsub("([^%w%-%.%_%~])", function(c) return s:gsub("([^%w%-%.%_%~])", function(c)
return string.format("%%%02X", string.byte(c)) return string.format("%%%02X", string.byte(c))
end) end)
@ -305,9 +522,22 @@ end
-- URL decode string -- URL decode string
function str.url_decode(s) function str.url_decode(s)
return s:gsub("%%(%x%x)", function(hex) validate_string(s, "str.url_decode")
return string.char(tonumber(hex, 16)) local result = s:gsub("%%(%x%x)", function(hex)
local byte = tonumber(hex, 16)
if byte then
return string.char(byte)
else
return "%" .. hex -- Invalid hex, keep original
end
end):gsub("+", " ") end):gsub("+", " ")
-- Validate result is UTF-8
if not str.is_valid_utf8(result) then
error("str.url_decode: result is not valid UTF-8", 2)
end
return result
end end
-- ====================================================================== -- ======================================================================
@ -316,12 +546,27 @@ end
-- Case-insensitive comparison -- Case-insensitive comparison
function str.iequals(a, b) function str.iequals(a, b)
validate_string(a, "str.iequals", 1)
validate_string(b, "str.iequals", 2)
return str.lower(a) == str.lower(b) return str.lower(a) == str.lower(b)
end end
-- Levenshtein distance -- Levenshtein distance (Pure Lua - much faster)
function str.distance(a, b) function str.distance(a, b)
local len_a, len_b = str.length(a), str.length(b) validate_string(a, "str.distance", 1)
validate_string(b, "str.distance", 2)
local len_a, len_b = #a, #b
-- Handle empty strings
if len_a == 0 then return len_b end
if len_b == 0 then return len_a end
-- Limit computation for very long strings
if len_a > 1000 or len_b > 1000 then
error("str.distance: strings too long for distance calculation", 2)
end
local matrix = {} local matrix = {}
-- Initialize matrix -- Initialize matrix
@ -335,7 +580,7 @@ function str.distance(a, b)
-- Fill matrix -- Fill matrix
for i = 1, len_a do for i = 1, len_a do
for j = 1, len_b do for j = 1, len_b do
local cost = (str.slice(a, i, i) == str.slice(b, j, j)) and 0 or 1 local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1
matrix[i][j] = math.min( matrix[i][j] = math.min(
matrix[i-1][j] + 1, -- deletion matrix[i-1][j] + 1, -- deletion
matrix[i][j-1] + 1, -- insertion matrix[i][j-1] + 1, -- insertion
@ -347,28 +592,49 @@ function str.distance(a, b)
return matrix[len_a][len_b] return matrix[len_a][len_b]
end end
-- String similarity (0-1) -- String similarity (0-1) (Pure Lua - faster)
function str.similarity(a, b) function str.similarity(a, b)
local max_len = math.max(str.length(a), str.length(b)) validate_string(a, "str.similarity", 1)
if max_len == 0 then return 1 end validate_string(b, "str.similarity", 2)
return 1 - (str.distance(a, b) / max_len)
local max_len = math.max(#a, #b)
if max_len == 0 then return 1.0 end
local dist = str.distance(a, b)
return 1.0 - (dist / max_len)
end end
-- ====================================================================== -- ======================================================================
-- TEMPLATE FUNCTIONS -- TEMPLATE FUNCTIONS
-- ====================================================================== -- ======================================================================
-- Simple template substitution -- Simple template substitution (Pure Lua - faster)
function str.template(template, vars) function str.template(template, vars)
vars = vars or {} validate_string(template, "str.template", 1)
if vars ~= nil then
validate_table(vars, "str.template", 2)
else
vars = {}
end
return template:gsub("%${([%w_]+)}", function(var) return template:gsub("%${([%w_]+)}", function(var)
return tostring(vars[var] or "") local value = vars[var]
if value == nil then
return ""
else
return tostring(value)
end
end) end)
end end
-- Advanced template with functions -- Advanced template with functions (Pure Lua - faster)
function str.template_advanced(template, context) function str.template_advanced(template, context)
context = context or {} validate_string(template, "str.template_advanced", 1)
if context ~= nil then
validate_table(context, "str.template_advanced", 2)
else
context = {}
end
return template:gsub("%${([^}]+)}", function(expr) return template:gsub("%${([^}]+)}", function(expr)
-- Simple variable substitution -- Simple variable substitution
@ -377,10 +643,14 @@ function str.template_advanced(template, context)
end end
-- Handle simple expressions like ${var.prop} -- Handle simple expressions like ${var.prop}
local parts = str.split(expr, ".") local parts = {}
for part in expr:gmatch("[^%.]+") do
table.insert(parts, part)
end
local value = context local value = context
for _, part in ipairs(parts) do for _, part in ipairs(parts) do
if type(value) == "table" and value[part] then if type(value) == "table" and value[part] ~= nil then
value = value[part] value = value[part]
else else
return "" return ""
@ -397,21 +667,25 @@ end
-- Check if string contains only whitespace -- Check if string contains only whitespace
function str.is_whitespace(s) function str.is_whitespace(s)
validate_string(s, "str.is_whitespace")
return s:match("^%s*$") ~= nil return s:match("^%s*$") ~= nil
end end
-- Remove all whitespace -- Remove all whitespace
function str.strip_whitespace(s) function str.strip_whitespace(s)
validate_string(s, "str.strip_whitespace")
return s:gsub("%s", "") return s:gsub("%s", "")
end end
-- Normalize whitespace (replace multiple spaces with single space) -- Normalize whitespace (replace multiple spaces with single space)
function str.normalize_whitespace(s) function str.normalize_whitespace(s)
validate_string(s, "str.normalize_whitespace")
return str.trim(s:gsub("%s+", " ")) return str.trim(s:gsub("%s+", " "))
end end
-- Extract numbers from string -- Extract numbers from string
function str.extract_numbers(s) function str.extract_numbers(s)
validate_string(s, "str.extract_numbers")
local numbers = {} local numbers = {}
for num in s:gmatch("%-?%d+%.?%d*") do for num in s:gmatch("%-?%d+%.?%d*") do
local n = tonumber(num) local n = tonumber(num)
@ -422,13 +696,21 @@ end
-- Remove diacritics/accents -- Remove diacritics/accents
function str.remove_accents(s) function str.remove_accents(s)
validate_string(s, "str.remove_accents")
local accents = { local accents = {
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e", ["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i", ["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o", ["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u", ["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
["ñ"] = "n", ["ç"] = "c" ["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
-- Uppercase versions
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
} }
local result = s local result = s
@ -440,25 +722,48 @@ end
-- Generate random string -- Generate random string
function str.random(length, charset) function str.random(length, charset)
return moonshark.random_string(length, charset) validate_number(length, "str.random", 1)
end if length < 0 or length ~= math.floor(length) then
error("str.random: length must be a non-negative integer", 2)
-- Check if string is valid UTF-8
function str.is_utf8(s)
-- Simple check - if we can iterate through the string as UTF-8, it's valid
local success = pcall(function()
for p, c in utf8 and utf8.codes or string.gmatch(s, ".") do
-- Just iterate through
end end
end) if charset ~= nil then
return success validate_string(charset, "str.random", 2)
end
return safe_call(moonshark.random_string, length, charset)
end end
-- Generate slug from string -- Generate slug from string (Pure Lua - faster)
function str.slug(s) function str.slug(s)
local kebab = str.kebab_case(str.remove_accents(s)) validate_string(s, "str.slug")
local cleaned = (kebab:gsub("[^%w%-]", ""))
return (cleaned:gsub("%-+", "-")) -- Remove accents (simplified but faster)
local accents = {
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
-- Uppercase versions
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
}
local result = s:lower()
for accented, plain in pairs(accents) do
result = result:gsub(accented:lower(), plain:lower())
end
-- Keep only alphanumeric characters and spaces, then convert spaces to hyphens
result = result:gsub("[^%w%s]", "")
result = result:gsub("%s+", "-")
result = result:gsub("^%-+", ""):gsub("%-+$", "")
return result
end end
return str return str