enhance string library
This commit is contained in:
parent
743fd0e835
commit
edc8e9e607
250
benchmarks/string.lua
Normal file
250
benchmarks/string.lua
Normal file
@ -0,0 +1,250 @@
|
||||
require("../tests/tests")
|
||||
local str = require("string")
|
||||
|
||||
-- Pure Lua implementations for comparison
|
||||
local pure_lua = {}
|
||||
|
||||
function pure_lua.capitalize(s)
|
||||
return s:gsub("(%a)([%w_']*)", function(first, rest)
|
||||
return first:upper() .. rest:lower()
|
||||
end)
|
||||
end
|
||||
|
||||
function pure_lua.camel_case(s)
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:lower())
|
||||
end
|
||||
if #words == 0 then return s end
|
||||
|
||||
local result = words[1]
|
||||
for i = 2, #words do
|
||||
result = result .. words[i]:gsub("^%l", string.upper)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
function pure_lua.template(template, vars)
|
||||
vars = vars or {}
|
||||
return template:gsub("%${([%w_]+)}", function(var)
|
||||
return tostring(vars[var] or "")
|
||||
end)
|
||||
end
|
||||
|
||||
function pure_lua.distance(a, b)
|
||||
local len_a, len_b = #a, #b
|
||||
if len_a == 0 then return len_b end
|
||||
if len_b == 0 then return len_a end
|
||||
|
||||
local matrix = {}
|
||||
for i = 0, len_a do
|
||||
matrix[i] = {[0] = i}
|
||||
end
|
||||
for j = 0, len_b do
|
||||
matrix[0][j] = j
|
||||
end
|
||||
|
||||
for i = 1, len_a do
|
||||
for j = 1, len_b do
|
||||
local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1
|
||||
matrix[i][j] = math.min(
|
||||
matrix[i-1][j] + 1,
|
||||
matrix[i][j-1] + 1,
|
||||
matrix[i-1][j-1] + cost
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
return matrix[len_a][len_b]
|
||||
end
|
||||
|
||||
function pure_lua.wrap(s, width)
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word)
|
||||
end
|
||||
|
||||
local lines = {}
|
||||
local current_line = ""
|
||||
|
||||
for _, word in ipairs(words) do
|
||||
if #current_line + #word + 1 <= width then
|
||||
if current_line == "" then
|
||||
current_line = word
|
||||
else
|
||||
current_line = current_line .. " " .. word
|
||||
end
|
||||
else
|
||||
if current_line ~= "" then
|
||||
table.insert(lines, current_line)
|
||||
end
|
||||
current_line = word
|
||||
end
|
||||
end
|
||||
|
||||
if current_line ~= "" then
|
||||
table.insert(lines, current_line)
|
||||
end
|
||||
|
||||
return lines
|
||||
end
|
||||
|
||||
function pure_lua.slug(s)
|
||||
-- Remove accents (simplified)
|
||||
local result = s:lower()
|
||||
result = result:gsub("[àáâãäå]", "a")
|
||||
result = result:gsub("[èéêë]", "e")
|
||||
result = result:gsub("[ìíîï]", "i")
|
||||
result = result:gsub("[òóôõö]", "o")
|
||||
result = result:gsub("[ùúûü]", "u")
|
||||
result = result:gsub("[ñ]", "n")
|
||||
result = result:gsub("[ç]", "c")
|
||||
result = result:gsub("[^%w%s]", "")
|
||||
result = result:gsub("%s+", "-")
|
||||
result = result:gsub("^%-+", ""):gsub("%-+$", "")
|
||||
return result
|
||||
end
|
||||
|
||||
-- Test data
|
||||
local test_texts = {
|
||||
short = "Hello World",
|
||||
medium = "The quick brown fox jumps over the lazy dog. This is a test sentence for benchmarking.",
|
||||
long = string.rep("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", 100),
|
||||
template_str = "Hello ${name}, you are ${age} years old and work as a ${job}.",
|
||||
template_vars = {name = "John", age = 30, job = "developer"}
|
||||
}
|
||||
|
||||
-- Benchmark function
|
||||
local function benchmark_comparison(name, go_func, lua_func, test_data, iterations)
|
||||
iterations = iterations or 10000
|
||||
|
||||
print(string.format("\n=== %s Benchmark (%d iterations) ===", name, iterations))
|
||||
|
||||
-- Warmup
|
||||
for i = 1, 100 do
|
||||
go_func(test_data)
|
||||
lua_func(test_data)
|
||||
end
|
||||
|
||||
-- Benchmark Go version
|
||||
local go_start = os.clock()
|
||||
for i = 1, iterations do
|
||||
go_func(test_data)
|
||||
end
|
||||
local go_time = os.clock() - go_start
|
||||
|
||||
-- Benchmark Lua version
|
||||
local lua_start = os.clock()
|
||||
for i = 1, iterations do
|
||||
lua_func(test_data)
|
||||
end
|
||||
local lua_time = os.clock() - lua_start
|
||||
|
||||
-- Results
|
||||
local go_rate = iterations / go_time
|
||||
local lua_rate = iterations / lua_time
|
||||
local speedup = lua_time / go_time
|
||||
|
||||
print(string.format("Go (enhanced): %.3fs (%.0f ops/sec)", go_time, go_rate))
|
||||
print(string.format("Lua (pure): %.3fs (%.0f ops/sec)", lua_time, lua_rate))
|
||||
print(string.format("Speedup: %.2fx %s", math.abs(speedup), speedup > 1 and "(Go faster)" or "(Lua faster)"))
|
||||
|
||||
return {
|
||||
go_time = go_time,
|
||||
lua_time = lua_time,
|
||||
speedup = speedup,
|
||||
go_rate = go_rate,
|
||||
lua_rate = lua_rate
|
||||
}
|
||||
end
|
||||
|
||||
-- Benchmark wrapper functions
|
||||
local function run_benchmarks()
|
||||
print("String Operations Performance Comparison")
|
||||
print("=" .. string.rep("=", 50))
|
||||
|
||||
local results = {}
|
||||
|
||||
-- Capitalize benchmark
|
||||
results.capitalize = benchmark_comparison(
|
||||
"Capitalize",
|
||||
function(text) return str.capitalize(text) end,
|
||||
function(text) return pure_lua.capitalize(text) end,
|
||||
test_texts.medium
|
||||
)
|
||||
|
||||
-- CamelCase benchmark
|
||||
results.camel_case = benchmark_comparison(
|
||||
"CamelCase",
|
||||
function(text) return str.camel_case(text) end,
|
||||
function(text) return pure_lua.camel_case(text) end,
|
||||
test_texts.medium
|
||||
)
|
||||
|
||||
-- Template benchmark
|
||||
results.template = benchmark_comparison(
|
||||
"Template",
|
||||
function(data) return str.template(test_texts.template_str, data) end,
|
||||
function(data) return pure_lua.template(test_texts.template_str, data) end,
|
||||
test_texts.template_vars
|
||||
)
|
||||
|
||||
-- Distance benchmark (shorter strings due to O(n²) complexity)
|
||||
results.distance = benchmark_comparison(
|
||||
"Levenshtein Distance",
|
||||
function(texts) return str.distance(texts[1], texts[2]) end,
|
||||
function(texts) return pure_lua.distance(texts[1], texts[2]) end,
|
||||
{"kitten", "sitting"},
|
||||
1000 -- Fewer iterations for expensive operation
|
||||
)
|
||||
|
||||
-- Wrap benchmark
|
||||
results.wrap = benchmark_comparison(
|
||||
"Text Wrap",
|
||||
function(text) return str.wrap(text, 40) end,
|
||||
function(text) return pure_lua.wrap(text, 40) end,
|
||||
test_texts.long
|
||||
)
|
||||
|
||||
-- Slug benchmark
|
||||
results.slug = benchmark_comparison(
|
||||
"Slug Generation",
|
||||
function(text) return str.slug(text) end,
|
||||
function(text) return pure_lua.slug(text) end,
|
||||
"Café & Restaurant!! Special Characters"
|
||||
)
|
||||
|
||||
-- Summary
|
||||
print("\n" .. string.rep("=", 50))
|
||||
print("PERFORMANCE SUMMARY")
|
||||
print(string.rep("=", 50))
|
||||
|
||||
local go_wins = 0
|
||||
local lua_wins = 0
|
||||
|
||||
for name, result in pairs(results) do
|
||||
local winner = result.speedup > 1 and "Go" or "Lua"
|
||||
local margin = string.format("%.2fx", math.abs(result.speedup))
|
||||
print(string.format("%-20s: %s wins by %s", name, winner, margin))
|
||||
|
||||
if result.speedup > 1 then
|
||||
go_wins = go_wins + 1
|
||||
else
|
||||
lua_wins = lua_wins + 1
|
||||
end
|
||||
end
|
||||
|
||||
print(string.rep("-", 50))
|
||||
print(string.format("Go wins: %d, Lua wins: %d", go_wins, lua_wins))
|
||||
|
||||
-- Analysis
|
||||
print("\nANALYSIS:")
|
||||
print("• Go functions benefit from optimized implementations")
|
||||
print("• Lua functions avoid CGO overhead for simple operations")
|
||||
print("• Choice depends on operation complexity vs call frequency")
|
||||
|
||||
return results
|
||||
end
|
||||
|
||||
-- Run the benchmarks
|
||||
run_benchmarks()
|
@ -15,6 +15,19 @@ import (
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
const (
|
||||
maxStringLength = 10_000_000 // 10MB limit for safety
|
||||
maxRepeatCount = 1_000_000 // Prevent excessive memory usage
|
||||
maxRandomLength = 100_000 // Reasonable limit for random strings
|
||||
)
|
||||
|
||||
func validateStringLength(s string) error {
|
||||
if len(s) > maxStringLength {
|
||||
return fmt.Errorf("string too large (max %d bytes)", maxStringLength)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return map[string]luajit.GoFunction{
|
||||
"string_split": func(s *luajit.State) int {
|
||||
@ -29,6 +42,24 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_split: second argument must be a string")
|
||||
}
|
||||
|
||||
if err := validateStringLength(str); err != nil {
|
||||
return s.PushError("string_split: %v", err)
|
||||
}
|
||||
|
||||
// Handle empty separator - split into characters
|
||||
if sep == "" {
|
||||
runes := []rune(str)
|
||||
parts := make([]string, len(runes))
|
||||
for i, r := range runes {
|
||||
parts[i] = string(r)
|
||||
}
|
||||
if err := s.PushValue(parts); err != nil {
|
||||
return s.PushError("string_split: failed to push result: %v", err)
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
parts := strings.Split(str, sep)
|
||||
if err := s.PushValue(parts); err != nil {
|
||||
return s.PushError("string_split: failed to push result: %v", err)
|
||||
@ -50,25 +81,33 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
}
|
||||
|
||||
var parts []string
|
||||
if slice, ok := arr.([]string); ok {
|
||||
parts = slice
|
||||
} else if anySlice, ok := arr.([]interface{}); ok {
|
||||
parts = make([]string, len(anySlice))
|
||||
for i, v := range anySlice {
|
||||
parts[i] = fmt.Sprintf("%v", v)
|
||||
}
|
||||
} else if anyMap, ok := arr.(map[string]interface{}); ok {
|
||||
// Handle empty table case - check if it's meant to be an array
|
||||
if len(anyMap) == 0 {
|
||||
parts = []string{} // Empty array
|
||||
switch v := arr.(type) {
|
||||
case []string:
|
||||
parts = v
|
||||
case []interface{}:
|
||||
parts = make([]string, len(v))
|
||||
for i, val := range v {
|
||||
if val == nil {
|
||||
parts[i] = ""
|
||||
} else {
|
||||
return s.PushError("string_join: first argument must be an array")
|
||||
parts[i] = fmt.Sprintf("%v", val)
|
||||
}
|
||||
}
|
||||
case map[string]interface{}:
|
||||
if len(v) == 0 {
|
||||
parts = []string{}
|
||||
} else {
|
||||
return s.PushError("string_join: first argument must be an array, not a map")
|
||||
}
|
||||
default:
|
||||
return s.PushError("string_join: first argument must be an array")
|
||||
}
|
||||
|
||||
result := strings.Join(parts, sep)
|
||||
if err := validateStringLength(result); err != nil {
|
||||
return s.PushError("string_join: result %v", err)
|
||||
}
|
||||
|
||||
s.PushString(result)
|
||||
return 1
|
||||
},
|
||||
@ -94,7 +133,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_trim_left: first argument must be a string")
|
||||
}
|
||||
|
||||
if s.GetTop() >= 2 && s.IsString(2) {
|
||||
if s.GetTop() >= 2 && !s.IsNil(2) {
|
||||
cutset, err := s.SafeToString(2)
|
||||
if err != nil {
|
||||
return s.PushError("string_trim_left: second argument must be a string")
|
||||
@ -115,7 +154,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_trim_right: first argument must be a string")
|
||||
}
|
||||
|
||||
if s.GetTop() >= 2 && s.IsString(2) {
|
||||
if s.GetTop() >= 2 && !s.IsNil(2) {
|
||||
cutset, err := s.SafeToString(2)
|
||||
if err != nil {
|
||||
return s.PushError("string_trim_right: second argument must be a string")
|
||||
@ -159,7 +198,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_title: argument must be a string")
|
||||
}
|
||||
caser := cases.Title(language.English)
|
||||
caser := cases.Title(language.English, cases.NoLower)
|
||||
s.PushString(caser.String(str))
|
||||
return 1
|
||||
},
|
||||
@ -228,7 +267,16 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_replace: third argument must be a string")
|
||||
}
|
||||
|
||||
if old == "" {
|
||||
return s.PushError("string_replace: cannot replace empty string")
|
||||
}
|
||||
|
||||
result := strings.ReplaceAll(str, old, new)
|
||||
if err := validateStringLength(result); err != nil {
|
||||
return s.PushError("string_replace: result %v", err)
|
||||
}
|
||||
|
||||
s.PushString(result)
|
||||
return 1
|
||||
},
|
||||
@ -250,9 +298,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_replace_n: third argument must be a string")
|
||||
}
|
||||
n, err := s.SafeToNumber(4)
|
||||
if err != nil || n != float64(int(n)) {
|
||||
return s.PushError("string_replace_n: fourth argument must be an integer")
|
||||
if err != nil || n != float64(int(n)) || n < 0 {
|
||||
return s.PushError("string_replace_n: fourth argument must be a non-negative integer")
|
||||
}
|
||||
|
||||
if old == "" {
|
||||
return s.PushError("string_replace_n: cannot replace empty string")
|
||||
}
|
||||
|
||||
result := strings.Replace(str, old, new, int(n))
|
||||
s.PushString(result)
|
||||
return 1
|
||||
@ -270,8 +323,18 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_index: second argument must be a string")
|
||||
}
|
||||
|
||||
if substr == "" {
|
||||
s.PushNumber(1) // Empty string found at position 1
|
||||
return 1
|
||||
}
|
||||
|
||||
index := strings.Index(str, substr)
|
||||
s.PushNumber(float64(index + 1)) // Lua is 1-indexed
|
||||
if index == -1 {
|
||||
s.PushNumber(0) // Not found
|
||||
} else {
|
||||
s.PushNumber(float64(index + 1)) // Convert to 1-indexed
|
||||
}
|
||||
return 1
|
||||
},
|
||||
|
||||
@ -287,11 +350,17 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_last_index: second argument must be a string")
|
||||
}
|
||||
|
||||
if substr == "" {
|
||||
s.PushNumber(float64(utf8.RuneCountInString(str) + 1)) // Empty string at end
|
||||
return 1
|
||||
}
|
||||
|
||||
index := strings.LastIndex(str, substr)
|
||||
if index == -1 {
|
||||
s.PushNumber(0)
|
||||
s.PushNumber(0) // Not found
|
||||
} else {
|
||||
s.PushNumber(float64(index + 1)) // Lua is 1-indexed
|
||||
s.PushNumber(float64(index + 1)) // Convert to 1-indexed
|
||||
}
|
||||
return 1
|
||||
},
|
||||
@ -308,6 +377,13 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_count: second argument must be a string")
|
||||
}
|
||||
|
||||
if substr == "" {
|
||||
// Empty string matches at every position including boundaries
|
||||
s.PushNumber(float64(utf8.RuneCountInString(str) + 1))
|
||||
return 1
|
||||
}
|
||||
|
||||
count := strings.Count(str, substr)
|
||||
s.PushNumber(float64(count))
|
||||
return 1
|
||||
@ -325,10 +401,19 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil || count < 0 || count != float64(int(count)) {
|
||||
return s.PushError("string_repeat: second argument must be a non-negative integer")
|
||||
}
|
||||
if count > 1000000 {
|
||||
return s.PushError("string_repeat: count too large (max 1000000)")
|
||||
|
||||
n := int(count)
|
||||
if n == 0 {
|
||||
s.PushString("")
|
||||
return 1
|
||||
}
|
||||
result := strings.Repeat(str, int(count))
|
||||
|
||||
// Check for potential overflow
|
||||
if len(str) > 0 && n > maxRepeatCount/len(str) {
|
||||
return s.PushError("string_repeat: result would be too large")
|
||||
}
|
||||
|
||||
result := strings.Repeat(str, n)
|
||||
s.PushString(result)
|
||||
return 1
|
||||
},
|
||||
@ -341,6 +426,11 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_reverse: argument must be a string")
|
||||
}
|
||||
|
||||
if !utf8.ValidString(str) {
|
||||
return s.PushError("string_reverse: invalid UTF-8 string")
|
||||
}
|
||||
|
||||
runes := []rune(str)
|
||||
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
|
||||
runes[i], runes[j] = runes[j], runes[i]
|
||||
@ -381,7 +471,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
if err != nil {
|
||||
return s.PushError("string_lines: argument must be a string")
|
||||
}
|
||||
|
||||
// Handle different line endings
|
||||
str = strings.ReplaceAll(str, "\r\n", "\n")
|
||||
str = strings.ReplaceAll(str, "\r", "\n")
|
||||
lines := strings.Split(str, "\n")
|
||||
|
||||
if err := s.PushValue(lines); err != nil {
|
||||
return s.PushError("string_lines: failed to push result: %v", err)
|
||||
}
|
||||
@ -412,14 +507,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_pad_left: first argument must be a string")
|
||||
}
|
||||
width, err := s.SafeToNumber(2)
|
||||
if err != nil || width != float64(int(width)) {
|
||||
return s.PushError("string_pad_left: second argument must be an integer")
|
||||
if err != nil || width != float64(int(width)) || width < 0 {
|
||||
return s.PushError("string_pad_left: second argument must be a non-negative integer")
|
||||
}
|
||||
|
||||
padChar := " "
|
||||
if s.GetTop() >= 3 {
|
||||
if p, err := s.SafeToString(3); err == nil && len(p) > 0 {
|
||||
padChar = string([]rune(p)[0])
|
||||
if s.GetTop() >= 3 && !s.IsNil(3) {
|
||||
if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
|
||||
runes := []rune(p)
|
||||
padChar = string(runes[0])
|
||||
}
|
||||
}
|
||||
|
||||
@ -430,7 +526,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return 1
|
||||
}
|
||||
|
||||
padding := strings.Repeat(padChar, targetLen-currentLen)
|
||||
padLen := targetLen - currentLen
|
||||
if padLen > maxRepeatCount {
|
||||
return s.PushError("string_pad_left: padding too large")
|
||||
}
|
||||
|
||||
padding := strings.Repeat(padChar, padLen)
|
||||
s.PushString(padding + str)
|
||||
return 1
|
||||
},
|
||||
@ -444,14 +545,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_pad_right: first argument must be a string")
|
||||
}
|
||||
width, err := s.SafeToNumber(2)
|
||||
if err != nil || width != float64(int(width)) {
|
||||
return s.PushError("string_pad_right: second argument must be an integer")
|
||||
if err != nil || width != float64(int(width)) || width < 0 {
|
||||
return s.PushError("string_pad_right: second argument must be a non-negative integer")
|
||||
}
|
||||
|
||||
padChar := " "
|
||||
if s.GetTop() >= 3 {
|
||||
if p, err := s.SafeToString(3); err == nil && len(p) > 0 {
|
||||
padChar = string([]rune(p)[0])
|
||||
if s.GetTop() >= 3 && !s.IsNil(3) {
|
||||
if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
|
||||
runes := []rune(p)
|
||||
padChar = string(runes[0])
|
||||
}
|
||||
}
|
||||
|
||||
@ -462,7 +564,12 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return 1
|
||||
}
|
||||
|
||||
padding := strings.Repeat(padChar, targetLen-currentLen)
|
||||
padLen := targetLen - currentLen
|
||||
if padLen > maxRepeatCount {
|
||||
return s.PushError("string_pad_right: padding too large")
|
||||
}
|
||||
|
||||
padding := strings.Repeat(padChar, padLen)
|
||||
s.PushString(str + padding)
|
||||
return 1
|
||||
},
|
||||
@ -480,10 +587,15 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_slice: second argument must be an integer")
|
||||
}
|
||||
|
||||
if !utf8.ValidString(str) {
|
||||
return s.PushError("string_slice: invalid UTF-8 string")
|
||||
}
|
||||
|
||||
runes := []rune(str)
|
||||
length := len(runes)
|
||||
startIdx := int(start) - 1 // Convert from 1-indexed to 0-indexed
|
||||
|
||||
// Handle negative start index
|
||||
if startIdx < 0 {
|
||||
startIdx = 0
|
||||
}
|
||||
@ -493,10 +605,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
}
|
||||
|
||||
endIdx := length
|
||||
if s.GetTop() >= 3 {
|
||||
if s.GetTop() >= 3 && !s.IsNil(3) {
|
||||
end, err := s.SafeToNumber(3)
|
||||
if err == nil && end == float64(int(end)) {
|
||||
endIdx = int(end)
|
||||
// Handle negative end index (from end of string)
|
||||
if endIdx < 0 {
|
||||
endIdx = length + endIdx + 1
|
||||
}
|
||||
if endIdx < 0 {
|
||||
endIdx = 0
|
||||
}
|
||||
@ -531,8 +647,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushBoolean(false)
|
||||
s.PushString(err.Error())
|
||||
return 2
|
||||
return 1
|
||||
}
|
||||
|
||||
s.PushBoolean(re.MatchString(str))
|
||||
@ -555,8 +670,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushNil()
|
||||
s.PushString(err.Error())
|
||||
return 2
|
||||
return 1
|
||||
}
|
||||
|
||||
match := re.FindString(str)
|
||||
@ -583,12 +697,18 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushNil()
|
||||
s.PushString(err.Error())
|
||||
return 2
|
||||
// Return empty array for invalid patterns
|
||||
if err := s.PushValue([]string{}); err != nil {
|
||||
return s.PushError("regex_find_all: failed to push result: %v", err)
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
matches := re.FindAllString(str, -1)
|
||||
if matches == nil {
|
||||
matches = []string{} // Return empty array instead of nil
|
||||
}
|
||||
|
||||
if err := s.PushValue(matches); err != nil {
|
||||
return s.PushError("regex_find_all: failed to push result: %v", err)
|
||||
}
|
||||
@ -614,9 +734,9 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushNil()
|
||||
s.PushString(err.Error())
|
||||
return 2
|
||||
// Return original string for invalid patterns
|
||||
s.PushString(str)
|
||||
return 1
|
||||
}
|
||||
|
||||
result := re.ReplaceAllString(str, replacement)
|
||||
@ -633,14 +753,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_to_number: argument must be a string")
|
||||
}
|
||||
|
||||
// Trim whitespace for more lenient parsing
|
||||
str = strings.TrimSpace(str)
|
||||
|
||||
// Try float first for more general parsing
|
||||
if num, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
s.PushNumber(num)
|
||||
return 1
|
||||
}
|
||||
if num, err := strconv.ParseInt(str, 10, 64); err == nil {
|
||||
s.PushNumber(float64(num))
|
||||
return 1
|
||||
}
|
||||
|
||||
s.PushNil()
|
||||
return 1
|
||||
@ -655,9 +775,14 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
return s.PushError("string_is_numeric: argument must be a string")
|
||||
}
|
||||
|
||||
str = strings.TrimSpace(str)
|
||||
if str == "" {
|
||||
s.PushBoolean(false)
|
||||
return 1
|
||||
}
|
||||
|
||||
_, err1 := strconv.ParseFloat(str, 64)
|
||||
_, err2 := strconv.ParseInt(str, 10, 64)
|
||||
s.PushBoolean(err1 == nil || err2 == nil)
|
||||
s.PushBoolean(err1 == nil)
|
||||
return 1
|
||||
},
|
||||
|
||||
@ -719,7 +844,7 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
}
|
||||
|
||||
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
if s.GetTop() >= 2 {
|
||||
if s.GetTop() >= 2 && !s.IsNil(2) {
|
||||
if custom, err := s.SafeToString(2); err == nil && len(custom) > 0 {
|
||||
charset = custom
|
||||
}
|
||||
@ -730,18 +855,40 @@ func GetStringFunctions() map[string]luajit.GoFunction {
|
||||
s.PushString("")
|
||||
return 1
|
||||
}
|
||||
if n > 100000 {
|
||||
return s.PushError("random_string: length too large (max 100000)")
|
||||
if n > maxRandomLength {
|
||||
return s.PushError("random_string: length too large (max %d)", maxRandomLength)
|
||||
}
|
||||
|
||||
result := make([]byte, n)
|
||||
// Validate charset for UTF-8
|
||||
if !utf8.ValidString(charset) {
|
||||
return s.PushError("random_string: charset must be valid UTF-8")
|
||||
}
|
||||
|
||||
charsetRunes := []rune(charset)
|
||||
if len(charsetRunes) == 0 {
|
||||
return s.PushError("random_string: charset cannot be empty")
|
||||
}
|
||||
|
||||
result := make([]rune, n)
|
||||
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for i := range result {
|
||||
result[i] = charset[rnd.Intn(len(charset))]
|
||||
result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
|
||||
}
|
||||
|
||||
s.PushString(string(result))
|
||||
return 1
|
||||
},
|
||||
|
||||
"string_is_valid_utf8": func(s *luajit.State) int {
|
||||
if err := s.CheckMinArgs(1); err != nil {
|
||||
return s.PushError("string_is_valid_utf8: %v", err)
|
||||
}
|
||||
str, err := s.SafeToString(1)
|
||||
if err != nil {
|
||||
return s.PushError("string_is_valid_utf8: argument must be a string")
|
||||
}
|
||||
s.PushBoolean(utf8.ValidString(str))
|
||||
return 1
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -1,111 +1,217 @@
|
||||
-- modules/string.lua - Comprehensive string manipulation utilities
|
||||
-- modules/string.lua - Enhanced string manipulation utilities
|
||||
|
||||
local str = {}
|
||||
|
||||
-- Helper function to handle errors from Go functions
|
||||
local function safe_call(func, ...)
|
||||
local success, result = pcall(func, ...)
|
||||
if not success then
|
||||
error(result, 2)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
-- Helper to validate arguments
|
||||
local function validate_string(s, func_name, arg_num)
|
||||
if type(s) ~= "string" then
|
||||
error(string.format("%s: argument %d must be a string, got %s",
|
||||
func_name, arg_num or 1, type(s)), 3)
|
||||
end
|
||||
end
|
||||
|
||||
local function validate_number(n, func_name, arg_num)
|
||||
if type(n) ~= "number" then
|
||||
error(string.format("%s: argument %d must be a number, got %s",
|
||||
func_name, arg_num or 1, type(n)), 3)
|
||||
end
|
||||
end
|
||||
|
||||
local function validate_table(t, func_name, arg_num)
|
||||
if type(t) ~= "table" then
|
||||
error(string.format("%s: argument %d must be a table, got %s",
|
||||
func_name, arg_num or 1, type(t)), 3)
|
||||
end
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
-- BASIC STRING OPERATIONS
|
||||
-- ======================================================================
|
||||
|
||||
function str.split(s, delimiter)
|
||||
return moonshark.string_split(s, delimiter)
|
||||
validate_string(s, "str.split", 1)
|
||||
validate_string(delimiter, "str.split", 2)
|
||||
return safe_call(moonshark.string_split, s, delimiter)
|
||||
end
|
||||
|
||||
function str.join(arr, separator)
|
||||
return moonshark.string_join(arr, separator)
|
||||
validate_table(arr, "str.join", 1)
|
||||
validate_string(separator, "str.join", 2)
|
||||
return safe_call(moonshark.string_join, arr, separator)
|
||||
end
|
||||
|
||||
function str.trim(s)
|
||||
return moonshark.string_trim(s)
|
||||
validate_string(s, "str.trim")
|
||||
return safe_call(moonshark.string_trim, s)
|
||||
end
|
||||
|
||||
function str.trim_left(s, cutset)
|
||||
return moonshark.string_trim_left(s, cutset)
|
||||
validate_string(s, "str.trim_left", 1)
|
||||
if cutset ~= nil then
|
||||
validate_string(cutset, "str.trim_left", 2)
|
||||
end
|
||||
return safe_call(moonshark.string_trim_left, s, cutset)
|
||||
end
|
||||
|
||||
function str.trim_right(s, cutset)
|
||||
return moonshark.string_trim_right(s, cutset)
|
||||
validate_string(s, "str.trim_right", 1)
|
||||
if cutset ~= nil then
|
||||
validate_string(cutset, "str.trim_right", 2)
|
||||
end
|
||||
return safe_call(moonshark.string_trim_right, s, cutset)
|
||||
end
|
||||
|
||||
function str.upper(s)
|
||||
return moonshark.string_upper(s)
|
||||
validate_string(s, "str.upper")
|
||||
return safe_call(moonshark.string_upper, s)
|
||||
end
|
||||
|
||||
function str.lower(s)
|
||||
return moonshark.string_lower(s)
|
||||
validate_string(s, "str.lower")
|
||||
return safe_call(moonshark.string_lower, s)
|
||||
end
|
||||
|
||||
function str.title(s)
|
||||
return moonshark.string_title(s)
|
||||
validate_string(s, "str.title")
|
||||
return safe_call(moonshark.string_title, s)
|
||||
end
|
||||
|
||||
function str.contains(s, substr)
|
||||
return moonshark.string_contains(s, substr)
|
||||
validate_string(s, "str.contains", 1)
|
||||
validate_string(substr, "str.contains", 2)
|
||||
return safe_call(moonshark.string_contains, s, substr)
|
||||
end
|
||||
|
||||
function str.starts_with(s, prefix)
|
||||
return moonshark.string_starts_with(s, prefix)
|
||||
validate_string(s, "str.starts_with", 1)
|
||||
validate_string(prefix, "str.starts_with", 2)
|
||||
return safe_call(moonshark.string_starts_with, s, prefix)
|
||||
end
|
||||
|
||||
function str.ends_with(s, suffix)
|
||||
return moonshark.string_ends_with(s, suffix)
|
||||
validate_string(s, "str.ends_with", 1)
|
||||
validate_string(suffix, "str.ends_with", 2)
|
||||
return safe_call(moonshark.string_ends_with, s, suffix)
|
||||
end
|
||||
|
||||
function str.replace(s, old, new)
|
||||
return moonshark.string_replace(s, old, new)
|
||||
validate_string(s, "str.replace", 1)
|
||||
validate_string(old, "str.replace", 2)
|
||||
validate_string(new, "str.replace", 3)
|
||||
return safe_call(moonshark.string_replace, s, old, new)
|
||||
end
|
||||
|
||||
function str.replace_n(s, old, new, n)
|
||||
return moonshark.string_replace_n(s, old, new, n)
|
||||
validate_string(s, "str.replace_n", 1)
|
||||
validate_string(old, "str.replace_n", 2)
|
||||
validate_string(new, "str.replace_n", 3)
|
||||
validate_number(n, "str.replace_n", 4)
|
||||
if n < 0 or n ~= math.floor(n) then
|
||||
error("str.replace_n: count must be a non-negative integer", 2)
|
||||
end
|
||||
return safe_call(moonshark.string_replace_n, s, old, new, n)
|
||||
end
|
||||
|
||||
function str.index(s, substr)
|
||||
local idx = moonshark.string_index(s, substr)
|
||||
validate_string(s, "str.index", 1)
|
||||
validate_string(substr, "str.index", 2)
|
||||
local idx = safe_call(moonshark.string_index, s, substr)
|
||||
return idx > 0 and idx or nil
|
||||
end
|
||||
|
||||
function str.last_index(s, substr)
|
||||
local idx = moonshark.string_last_index(s, substr)
|
||||
validate_string(s, "str.last_index", 1)
|
||||
validate_string(substr, "str.last_index", 2)
|
||||
local idx = safe_call(moonshark.string_last_index, s, substr)
|
||||
return idx > 0 and idx or nil
|
||||
end
|
||||
|
||||
function str.count(s, substr)
|
||||
return moonshark.string_count(s, substr)
|
||||
validate_string(s, "str.count", 1)
|
||||
validate_string(substr, "str.count", 2)
|
||||
return safe_call(moonshark.string_count, s, substr)
|
||||
end
|
||||
|
||||
function str.repeat_(s, n)
|
||||
return moonshark.string_repeat(s, n)
|
||||
validate_string(s, "str.repeat_", 1)
|
||||
validate_number(n, "str.repeat_", 2)
|
||||
if n < 0 or n ~= math.floor(n) then
|
||||
error("str.repeat_: count must be a non-negative integer", 2)
|
||||
end
|
||||
return safe_call(moonshark.string_repeat, s, n)
|
||||
end
|
||||
|
||||
function str.reverse(s)
|
||||
return moonshark.string_reverse(s)
|
||||
validate_string(s, "str.reverse")
|
||||
return safe_call(moonshark.string_reverse, s)
|
||||
end
|
||||
|
||||
function str.length(s)
|
||||
return moonshark.string_length(s)
|
||||
validate_string(s, "str.length")
|
||||
return safe_call(moonshark.string_length, s)
|
||||
end
|
||||
|
||||
function str.byte_length(s)
|
||||
return moonshark.string_byte_length(s)
|
||||
validate_string(s, "str.byte_length")
|
||||
return safe_call(moonshark.string_byte_length, s)
|
||||
end
|
||||
|
||||
function str.lines(s)
|
||||
return moonshark.string_lines(s)
|
||||
validate_string(s, "str.lines")
|
||||
return safe_call(moonshark.string_lines, s)
|
||||
end
|
||||
|
||||
function str.words(s)
|
||||
return moonshark.string_words(s)
|
||||
validate_string(s, "str.words")
|
||||
return safe_call(moonshark.string_words, s)
|
||||
end
|
||||
|
||||
function str.pad_left(s, width, pad_char)
|
||||
return moonshark.string_pad_left(s, width, pad_char)
|
||||
validate_string(s, "str.pad_left", 1)
|
||||
validate_number(width, "str.pad_left", 2)
|
||||
if width < 0 or width ~= math.floor(width) then
|
||||
error("str.pad_left: width must be a non-negative integer", 2)
|
||||
end
|
||||
if pad_char ~= nil then
|
||||
validate_string(pad_char, "str.pad_left", 3)
|
||||
end
|
||||
return safe_call(moonshark.string_pad_left, s, width, pad_char)
|
||||
end
|
||||
|
||||
function str.pad_right(s, width, pad_char)
|
||||
return moonshark.string_pad_right(s, width, pad_char)
|
||||
validate_string(s, "str.pad_right", 1)
|
||||
validate_number(width, "str.pad_right", 2)
|
||||
if width < 0 or width ~= math.floor(width) then
|
||||
error("str.pad_right: width must be a non-negative integer", 2)
|
||||
end
|
||||
if pad_char ~= nil then
|
||||
validate_string(pad_char, "str.pad_right", 3)
|
||||
end
|
||||
return safe_call(moonshark.string_pad_right, s, width, pad_char)
|
||||
end
|
||||
|
||||
function str.slice(s, start, end_pos)
|
||||
return moonshark.string_slice(s, start, end_pos)
|
||||
validate_string(s, "str.slice", 1)
|
||||
validate_number(start, "str.slice", 2)
|
||||
if start ~= math.floor(start) then
|
||||
error("str.slice: start must be an integer", 2)
|
||||
end
|
||||
if end_pos ~= nil then
|
||||
validate_number(end_pos, "str.slice", 3)
|
||||
if end_pos ~= math.floor(end_pos) then
|
||||
error("str.slice: end position must be an integer", 2)
|
||||
end
|
||||
end
|
||||
return safe_call(moonshark.string_slice, s, start, end_pos)
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
@ -113,19 +219,28 @@ end
|
||||
-- ======================================================================
|
||||
|
||||
function str.match(pattern, s)
|
||||
return moonshark.regex_match(pattern, s)
|
||||
validate_string(pattern, "str.match", 1)
|
||||
validate_string(s, "str.match", 2)
|
||||
return safe_call(moonshark.regex_match, pattern, s)
|
||||
end
|
||||
|
||||
function str.find(pattern, s)
|
||||
return moonshark.regex_find(pattern, s)
|
||||
validate_string(pattern, "str.find", 1)
|
||||
validate_string(s, "str.find", 2)
|
||||
return safe_call(moonshark.regex_find, pattern, s)
|
||||
end
|
||||
|
||||
function str.find_all(pattern, s)
|
||||
return moonshark.regex_find_all(pattern, s)
|
||||
validate_string(pattern, "str.find_all", 1)
|
||||
validate_string(s, "str.find_all", 2)
|
||||
return safe_call(moonshark.regex_find_all, pattern, s)
|
||||
end
|
||||
|
||||
function str.gsub(pattern, s, replacement)
|
||||
return moonshark.regex_replace(pattern, s, replacement)
|
||||
validate_string(pattern, "str.gsub", 1)
|
||||
validate_string(s, "str.gsub", 2)
|
||||
validate_string(replacement, "str.gsub", 3)
|
||||
return safe_call(moonshark.regex_replace, pattern, s, replacement)
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
@ -133,19 +248,23 @@ end
|
||||
-- ======================================================================
|
||||
|
||||
function str.to_number(s)
|
||||
return moonshark.string_to_number(s)
|
||||
validate_string(s, "str.to_number")
|
||||
return safe_call(moonshark.string_to_number, s)
|
||||
end
|
||||
|
||||
function str.is_numeric(s)
|
||||
return moonshark.string_is_numeric(s)
|
||||
validate_string(s, "str.is_numeric")
|
||||
return safe_call(moonshark.string_is_numeric, s)
|
||||
end
|
||||
|
||||
function str.is_alpha(s)
|
||||
return moonshark.string_is_alpha(s)
|
||||
validate_string(s, "str.is_alpha")
|
||||
return safe_call(moonshark.string_is_alpha, s)
|
||||
end
|
||||
|
||||
function str.is_alphanumeric(s)
|
||||
return moonshark.string_is_alphanumeric(s)
|
||||
validate_string(s, "str.is_alphanumeric")
|
||||
return safe_call(moonshark.string_is_alphanumeric, s)
|
||||
end
|
||||
|
||||
function str.is_empty(s)
|
||||
@ -156,60 +275,106 @@ function str.is_blank(s)
|
||||
return str.is_empty(s) or str.trim(s) == ""
|
||||
end
|
||||
|
||||
function str.is_utf8(s)
|
||||
validate_string(s, "str.is_utf8")
|
||||
return safe_call(moonshark.string_is_valid_utf8, s)
|
||||
end
|
||||
|
||||
function str.is_valid_utf8(s)
|
||||
validate_string(s, "str.is_valid_utf8")
|
||||
return safe_call(moonshark.string_is_valid_utf8, s)
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
-- ADVANCED STRING OPERATIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Capitalize first letter of each word
|
||||
-- Capitalize first letter of each word (Pure Lua - faster)
|
||||
function str.capitalize(s)
|
||||
validate_string(s, "str.capitalize")
|
||||
return s:gsub("(%a)([%w_']*)", function(first, rest)
|
||||
return str.upper(first) .. str.lower(rest)
|
||||
return first:upper() .. rest:lower()
|
||||
end)
|
||||
end
|
||||
|
||||
-- Convert string to camelCase
|
||||
-- Convert string to camelCase (Pure Lua - faster)
|
||||
function str.camel_case(s)
|
||||
local words = str.words(str.lower(s))
|
||||
validate_string(s, "str.camel_case")
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:lower())
|
||||
end
|
||||
if #words == 0 then return s end
|
||||
|
||||
local result = words[1]
|
||||
for i = 2, #words do
|
||||
result = result .. str.capitalize(words[i])
|
||||
result = result .. words[i]:gsub("^%l", string.upper)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
-- Convert string to PascalCase
|
||||
-- Convert string to PascalCase (Pure Lua - faster)
|
||||
function str.pascal_case(s)
|
||||
local words = str.words(str.lower(s))
|
||||
validate_string(s, "str.pascal_case")
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:lower())
|
||||
end
|
||||
local result = ""
|
||||
for _, word in ipairs(words) do
|
||||
result = result .. str.capitalize(word)
|
||||
result = result .. word:gsub("^%l", string.upper)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
-- Convert string to snake_case
|
||||
-- Convert string to snake_case (Pure Lua - faster)
|
||||
function str.snake_case(s)
|
||||
local words = str.words(str.lower(s))
|
||||
return str.join(words, "_")
|
||||
validate_string(s, "str.snake_case")
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:lower())
|
||||
end
|
||||
return table.concat(words, "_")
|
||||
end
|
||||
|
||||
-- Convert string to kebab-case
|
||||
-- Convert string to kebab-case (Pure Lua - faster)
|
||||
function str.kebab_case(s)
|
||||
local words = str.words(str.lower(s))
|
||||
return str.join(words, "-")
|
||||
validate_string(s, "str.kebab_case")
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:lower())
|
||||
end
|
||||
return table.concat(words, "-")
|
||||
end
|
||||
|
||||
-- Convert string to SCREAMING_SNAKE_CASE
|
||||
-- Convert string to SCREAMING_SNAKE_CASE (Pure Lua - faster)
|
||||
function str.screaming_snake_case(s)
|
||||
return str.upper(str.snake_case(s))
|
||||
validate_string(s, "str.screaming_snake_case")
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word:upper())
|
||||
end
|
||||
return table.concat(words, "_")
|
||||
end
|
||||
|
||||
-- Center text within given width
|
||||
-- Center text within given width (Pure Lua - faster)
|
||||
function str.center(s, width, fill_char)
|
||||
validate_string(s, "str.center", 1)
|
||||
validate_number(width, "str.center", 2)
|
||||
if width < 0 or width ~= math.floor(width) then
|
||||
error("str.center: width must be a non-negative integer", 2)
|
||||
end
|
||||
|
||||
fill_char = fill_char or " "
|
||||
local len = str.length(s)
|
||||
if fill_char ~= nil then
|
||||
validate_string(fill_char, "str.center", 3)
|
||||
if #fill_char == 0 then
|
||||
error("str.center: fill character cannot be empty", 2)
|
||||
end
|
||||
fill_char = fill_char:sub(1,1) -- Use only first character
|
||||
end
|
||||
|
||||
local len = #s
|
||||
if len >= width then return s end
|
||||
|
||||
local pad_total = width - len
|
||||
@ -219,25 +384,66 @@ function str.center(s, width, fill_char)
|
||||
return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right)
|
||||
end
|
||||
|
||||
-- Truncate string to maximum length
|
||||
-- Truncate string to maximum length (Pure Lua - faster)
|
||||
function str.truncate(s, max_length, suffix)
|
||||
validate_string(s, "str.truncate", 1)
|
||||
validate_number(max_length, "str.truncate", 2)
|
||||
if max_length < 0 or max_length ~= math.floor(max_length) then
|
||||
error("str.truncate: max_length must be a non-negative integer", 2)
|
||||
end
|
||||
|
||||
suffix = suffix or "..."
|
||||
if str.length(s) <= max_length then
|
||||
validate_string(suffix, "str.truncate", 3)
|
||||
|
||||
if #s <= max_length then
|
||||
return s
|
||||
end
|
||||
local main_part = str.slice(s, 1, max_length - str.length(suffix))
|
||||
main_part = str.trim_right(main_part)
|
||||
|
||||
local suffix_len = #suffix
|
||||
if max_length <= suffix_len then
|
||||
return suffix:sub(1, max_length)
|
||||
end
|
||||
|
||||
local main_part = s:sub(1, max_length - suffix_len)
|
||||
main_part = main_part:gsub("%s+$", "") -- trim right
|
||||
return main_part .. suffix
|
||||
end
|
||||
|
||||
-- Wrap text to specified width
|
||||
-- Wrap text to specified width (Pure Lua - much faster)
|
||||
function str.wrap(s, width)
|
||||
local words = str.words(s)
|
||||
validate_string(s, "str.wrap", 1)
|
||||
validate_number(width, "str.wrap", 2)
|
||||
if width <= 0 or width ~= math.floor(width) then
|
||||
error("str.wrap: width must be a positive integer", 2)
|
||||
end
|
||||
|
||||
local words = {}
|
||||
for word in s:gmatch("%S+") do
|
||||
table.insert(words, word)
|
||||
end
|
||||
|
||||
local lines = {}
|
||||
local current_line = ""
|
||||
|
||||
for _, word in ipairs(words) do
|
||||
if str.length(current_line) + str.length(word) + 1 <= width then
|
||||
local word_len = #word
|
||||
local current_len = #current_line
|
||||
|
||||
-- Handle words longer than width
|
||||
if word_len > width then
|
||||
if current_line ~= "" then
|
||||
table.insert(lines, current_line)
|
||||
current_line = ""
|
||||
end
|
||||
-- Break long word
|
||||
while #word > width do
|
||||
table.insert(lines, word:sub(1, width))
|
||||
word = word:sub(width + 1)
|
||||
end
|
||||
if #word > 0 then
|
||||
current_line = word
|
||||
end
|
||||
elseif current_len + word_len + 1 <= width then
|
||||
if current_line == "" then
|
||||
current_line = word
|
||||
else
|
||||
@ -258,19 +464,26 @@ function str.wrap(s, width)
|
||||
return lines
|
||||
end
|
||||
|
||||
-- Remove common leading whitespace
|
||||
-- Remove common leading whitespace (Pure Lua - faster)
|
||||
function str.dedent(s)
|
||||
local lines = str.split(s, "\n")
|
||||
validate_string(s, "str.dedent")
|
||||
local lines = {}
|
||||
for line in (s.."\n"):gmatch("([^\n]*)\n") do
|
||||
table.insert(lines, line)
|
||||
end
|
||||
if #lines <= 1 then return s end
|
||||
|
||||
-- Find minimum indentation (excluding empty lines)
|
||||
local min_indent = math.huge
|
||||
for _, line in ipairs(lines) do
|
||||
if str.trim(line) ~= "" then
|
||||
local trimmed = line:gsub("%s", "")
|
||||
if trimmed ~= "" then
|
||||
local indent = line:match("^%s*")
|
||||
if indent then
|
||||
min_indent = math.min(min_indent, #indent)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if min_indent == math.huge or min_indent == 0 then
|
||||
return s
|
||||
@ -278,26 +491,30 @@ function str.dedent(s)
|
||||
|
||||
-- Remove common indentation
|
||||
for i, line in ipairs(lines) do
|
||||
if str.trim(line) ~= "" then
|
||||
local trimmed = line:gsub("%s", "")
|
||||
if trimmed ~= "" then
|
||||
lines[i] = line:sub(min_indent + 1)
|
||||
end
|
||||
end
|
||||
|
||||
return str.join(lines, "\n")
|
||||
return table.concat(lines, "\n")
|
||||
end
|
||||
|
||||
-- Escape special characters for regex
|
||||
function str.escape_regex(s)
|
||||
validate_string(s, "str.escape_regex")
|
||||
return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1")
|
||||
end
|
||||
|
||||
-- Quote string for shell usage
|
||||
function str.shell_quote(s)
|
||||
validate_string(s, "str.shell_quote")
|
||||
return "'" .. s:gsub("'", "'\"'\"'") .. "'"
|
||||
end
|
||||
|
||||
-- URL encode string
|
||||
function str.url_encode(s)
|
||||
validate_string(s, "str.url_encode")
|
||||
return s:gsub("([^%w%-%.%_%~])", function(c)
|
||||
return string.format("%%%02X", string.byte(c))
|
||||
end)
|
||||
@ -305,9 +522,22 @@ end
|
||||
|
||||
-- URL decode string
|
||||
function str.url_decode(s)
|
||||
return s:gsub("%%(%x%x)", function(hex)
|
||||
return string.char(tonumber(hex, 16))
|
||||
validate_string(s, "str.url_decode")
|
||||
local result = s:gsub("%%(%x%x)", function(hex)
|
||||
local byte = tonumber(hex, 16)
|
||||
if byte then
|
||||
return string.char(byte)
|
||||
else
|
||||
return "%" .. hex -- Invalid hex, keep original
|
||||
end
|
||||
end):gsub("+", " ")
|
||||
|
||||
-- Validate result is UTF-8
|
||||
if not str.is_valid_utf8(result) then
|
||||
error("str.url_decode: result is not valid UTF-8", 2)
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
@ -316,12 +546,27 @@ end
|
||||
|
||||
-- Case-insensitive comparison
|
||||
function str.iequals(a, b)
|
||||
validate_string(a, "str.iequals", 1)
|
||||
validate_string(b, "str.iequals", 2)
|
||||
return str.lower(a) == str.lower(b)
|
||||
end
|
||||
|
||||
-- Levenshtein distance
|
||||
-- Levenshtein distance (Pure Lua - much faster)
|
||||
function str.distance(a, b)
|
||||
local len_a, len_b = str.length(a), str.length(b)
|
||||
validate_string(a, "str.distance", 1)
|
||||
validate_string(b, "str.distance", 2)
|
||||
|
||||
local len_a, len_b = #a, #b
|
||||
|
||||
-- Handle empty strings
|
||||
if len_a == 0 then return len_b end
|
||||
if len_b == 0 then return len_a end
|
||||
|
||||
-- Limit computation for very long strings
|
||||
if len_a > 1000 or len_b > 1000 then
|
||||
error("str.distance: strings too long for distance calculation", 2)
|
||||
end
|
||||
|
||||
local matrix = {}
|
||||
|
||||
-- Initialize matrix
|
||||
@ -335,7 +580,7 @@ function str.distance(a, b)
|
||||
-- Fill matrix
|
||||
for i = 1, len_a do
|
||||
for j = 1, len_b do
|
||||
local cost = (str.slice(a, i, i) == str.slice(b, j, j)) and 0 or 1
|
||||
local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1
|
||||
matrix[i][j] = math.min(
|
||||
matrix[i-1][j] + 1, -- deletion
|
||||
matrix[i][j-1] + 1, -- insertion
|
||||
@ -347,28 +592,49 @@ function str.distance(a, b)
|
||||
return matrix[len_a][len_b]
|
||||
end
|
||||
|
||||
-- String similarity (0-1)
|
||||
-- String similarity (0-1) (Pure Lua - faster)
|
||||
function str.similarity(a, b)
|
||||
local max_len = math.max(str.length(a), str.length(b))
|
||||
if max_len == 0 then return 1 end
|
||||
return 1 - (str.distance(a, b) / max_len)
|
||||
validate_string(a, "str.similarity", 1)
|
||||
validate_string(b, "str.similarity", 2)
|
||||
|
||||
local max_len = math.max(#a, #b)
|
||||
if max_len == 0 then return 1.0 end
|
||||
|
||||
local dist = str.distance(a, b)
|
||||
return 1.0 - (dist / max_len)
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
-- TEMPLATE FUNCTIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Simple template substitution
|
||||
-- Simple template substitution (Pure Lua - faster)
|
||||
function str.template(template, vars)
|
||||
vars = vars or {}
|
||||
validate_string(template, "str.template", 1)
|
||||
if vars ~= nil then
|
||||
validate_table(vars, "str.template", 2)
|
||||
else
|
||||
vars = {}
|
||||
end
|
||||
|
||||
return template:gsub("%${([%w_]+)}", function(var)
|
||||
return tostring(vars[var] or "")
|
||||
local value = vars[var]
|
||||
if value == nil then
|
||||
return ""
|
||||
else
|
||||
return tostring(value)
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
-- Advanced template with functions
|
||||
-- Advanced template with functions (Pure Lua - faster)
|
||||
function str.template_advanced(template, context)
|
||||
context = context or {}
|
||||
validate_string(template, "str.template_advanced", 1)
|
||||
if context ~= nil then
|
||||
validate_table(context, "str.template_advanced", 2)
|
||||
else
|
||||
context = {}
|
||||
end
|
||||
|
||||
return template:gsub("%${([^}]+)}", function(expr)
|
||||
-- Simple variable substitution
|
||||
@ -377,10 +643,14 @@ function str.template_advanced(template, context)
|
||||
end
|
||||
|
||||
-- Handle simple expressions like ${var.prop}
|
||||
local parts = str.split(expr, ".")
|
||||
local parts = {}
|
||||
for part in expr:gmatch("[^%.]+") do
|
||||
table.insert(parts, part)
|
||||
end
|
||||
|
||||
local value = context
|
||||
for _, part in ipairs(parts) do
|
||||
if type(value) == "table" and value[part] then
|
||||
if type(value) == "table" and value[part] ~= nil then
|
||||
value = value[part]
|
||||
else
|
||||
return ""
|
||||
@ -397,21 +667,25 @@ end
|
||||
|
||||
-- Check if string contains only whitespace
|
||||
function str.is_whitespace(s)
|
||||
validate_string(s, "str.is_whitespace")
|
||||
return s:match("^%s*$") ~= nil
|
||||
end
|
||||
|
||||
-- Remove all whitespace
|
||||
function str.strip_whitespace(s)
|
||||
validate_string(s, "str.strip_whitespace")
|
||||
return s:gsub("%s", "")
|
||||
end
|
||||
|
||||
-- Normalize whitespace (replace multiple spaces with single space)
|
||||
function str.normalize_whitespace(s)
|
||||
validate_string(s, "str.normalize_whitespace")
|
||||
return str.trim(s:gsub("%s+", " "))
|
||||
end
|
||||
|
||||
-- Extract numbers from string
|
||||
function str.extract_numbers(s)
|
||||
validate_string(s, "str.extract_numbers")
|
||||
local numbers = {}
|
||||
for num in s:gmatch("%-?%d+%.?%d*") do
|
||||
local n = tonumber(num)
|
||||
@ -422,13 +696,21 @@ end
|
||||
|
||||
-- Remove diacritics/accents
|
||||
function str.remove_accents(s)
|
||||
validate_string(s, "str.remove_accents")
|
||||
local accents = {
|
||||
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a",
|
||||
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
|
||||
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
|
||||
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
|
||||
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
|
||||
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
|
||||
["ñ"] = "n", ["ç"] = "c"
|
||||
["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
|
||||
-- Uppercase versions
|
||||
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
|
||||
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
|
||||
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
|
||||
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
|
||||
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
|
||||
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
|
||||
}
|
||||
|
||||
local result = s
|
||||
@ -440,25 +722,48 @@ end
|
||||
|
||||
-- Generate random string
|
||||
function str.random(length, charset)
|
||||
return moonshark.random_string(length, charset)
|
||||
validate_number(length, "str.random", 1)
|
||||
if length < 0 or length ~= math.floor(length) then
|
||||
error("str.random: length must be a non-negative integer", 2)
|
||||
end
|
||||
if charset ~= nil then
|
||||
validate_string(charset, "str.random", 2)
|
||||
end
|
||||
return safe_call(moonshark.random_string, length, charset)
|
||||
end
|
||||
|
||||
-- Check if string is valid UTF-8
|
||||
function str.is_utf8(s)
|
||||
-- Simple check - if we can iterate through the string as UTF-8, it's valid
|
||||
local success = pcall(function()
|
||||
for p, c in utf8 and utf8.codes or string.gmatch(s, ".") do
|
||||
-- Just iterate through
|
||||
end
|
||||
end)
|
||||
return success
|
||||
end
|
||||
|
||||
-- Generate slug from string
|
||||
-- Generate slug from string (Pure Lua - faster)
|
||||
function str.slug(s)
|
||||
local kebab = str.kebab_case(str.remove_accents(s))
|
||||
local cleaned = (kebab:gsub("[^%w%-]", ""))
|
||||
return (cleaned:gsub("%-+", "-"))
|
||||
validate_string(s, "str.slug")
|
||||
|
||||
-- Remove accents (simplified but faster)
|
||||
local accents = {
|
||||
["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a",
|
||||
["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e",
|
||||
["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i",
|
||||
["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o",
|
||||
["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u",
|
||||
["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y",
|
||||
-- Uppercase versions
|
||||
["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A",
|
||||
["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
|
||||
["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I",
|
||||
["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O",
|
||||
["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U",
|
||||
["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y"
|
||||
}
|
||||
|
||||
local result = s:lower()
|
||||
for accented, plain in pairs(accents) do
|
||||
result = result:gsub(accented:lower(), plain:lower())
|
||||
end
|
||||
|
||||
-- Keep only alphanumeric characters and spaces, then convert spaces to hyphens
|
||||
result = result:gsub("[^%w%s]", "")
|
||||
result = result:gsub("%s+", "-")
|
||||
result = result:gsub("^%-+", ""):gsub("%-+$", "")
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
return str
|
Loading…
x
Reference in New Issue
Block a user