diff --git a/benchmarks/string.lua b/benchmarks/string.lua new file mode 100644 index 0000000..34b8f30 --- /dev/null +++ b/benchmarks/string.lua @@ -0,0 +1,250 @@ +require("../tests/tests") +local str = require("string") + +-- Pure Lua implementations for comparison +local pure_lua = {} + +function pure_lua.capitalize(s) + return s:gsub("(%a)([%w_']*)", function(first, rest) + return first:upper() .. rest:lower() + end) +end + +function pure_lua.camel_case(s) + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:lower()) + end + if #words == 0 then return s end + + local result = words[1] + for i = 2, #words do + result = result .. words[i]:gsub("^%l", string.upper) + end + return result +end + +function pure_lua.template(template, vars) + vars = vars or {} + return template:gsub("%${([%w_]+)}", function(var) + return tostring(vars[var] or "") + end) +end + +function pure_lua.distance(a, b) + local len_a, len_b = #a, #b + if len_a == 0 then return len_b end + if len_b == 0 then return len_a end + + local matrix = {} + for i = 0, len_a do + matrix[i] = {[0] = i} + end + for j = 0, len_b do + matrix[0][j] = j + end + + for i = 1, len_a do + for j = 1, len_b do + local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1 + matrix[i][j] = math.min( + matrix[i-1][j] + 1, + matrix[i][j-1] + 1, + matrix[i-1][j-1] + cost + ) + end + end + + return matrix[len_a][len_b] +end + +function pure_lua.wrap(s, width) + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word) + end + + local lines = {} + local current_line = "" + + for _, word in ipairs(words) do + if #current_line + #word + 1 <= width then + if current_line == "" then + current_line = word + else + current_line = current_line .. " " .. word + end + else + if current_line ~= "" then + table.insert(lines, current_line) + end + current_line = word + end + end + + if current_line ~= "" then + table.insert(lines, current_line) + end + + return lines +end + +function pure_lua.slug(s) + -- Remove accents (simplified) + local result = s:lower() + result = result:gsub("[àáâãäå]", "a") + result = result:gsub("[èéêë]", "e") + result = result:gsub("[ìíîï]", "i") + result = result:gsub("[òóôõö]", "o") + result = result:gsub("[ùúûü]", "u") + result = result:gsub("[ñ]", "n") + result = result:gsub("[ç]", "c") + result = result:gsub("[^%w%s]", "") + result = result:gsub("%s+", "-") + result = result:gsub("^%-+", ""):gsub("%-+$", "") + return result +end + +-- Test data +local test_texts = { + short = "Hello World", + medium = "The quick brown fox jumps over the lazy dog. This is a test sentence for benchmarking.", + long = string.rep("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", 100), + template_str = "Hello ${name}, you are ${age} years old and work as a ${job}.", + template_vars = {name = "John", age = 30, job = "developer"} +} + +-- Benchmark function +local function benchmark_comparison(name, go_func, lua_func, test_data, iterations) + iterations = iterations or 10000 + + print(string.format("\n=== %s Benchmark (%d iterations) ===", name, iterations)) + + -- Warmup + for i = 1, 100 do + go_func(test_data) + lua_func(test_data) + end + + -- Benchmark Go version + local go_start = os.clock() + for i = 1, iterations do + go_func(test_data) + end + local go_time = os.clock() - go_start + + -- Benchmark Lua version + local lua_start = os.clock() + for i = 1, iterations do + lua_func(test_data) + end + local lua_time = os.clock() - lua_start + + -- Results + local go_rate = iterations / go_time + local lua_rate = iterations / lua_time + local speedup = lua_time / go_time + + print(string.format("Go (enhanced): %.3fs (%.0f ops/sec)", go_time, go_rate)) + print(string.format("Lua (pure): %.3fs (%.0f ops/sec)", lua_time, lua_rate)) + print(string.format("Speedup: %.2fx %s", math.abs(speedup), speedup > 1 and "(Go faster)" or "(Lua faster)")) + + return { + go_time = go_time, + lua_time = lua_time, + speedup = speedup, + go_rate = go_rate, + lua_rate = lua_rate + } +end + +-- Benchmark wrapper functions +local function run_benchmarks() + print("String Operations Performance Comparison") + print("=" .. string.rep("=", 50)) + + local results = {} + + -- Capitalize benchmark + results.capitalize = benchmark_comparison( + "Capitalize", + function(text) return str.capitalize(text) end, + function(text) return pure_lua.capitalize(text) end, + test_texts.medium + ) + + -- CamelCase benchmark + results.camel_case = benchmark_comparison( + "CamelCase", + function(text) return str.camel_case(text) end, + function(text) return pure_lua.camel_case(text) end, + test_texts.medium + ) + + -- Template benchmark + results.template = benchmark_comparison( + "Template", + function(data) return str.template(test_texts.template_str, data) end, + function(data) return pure_lua.template(test_texts.template_str, data) end, + test_texts.template_vars + ) + + -- Distance benchmark (shorter strings due to O(n²) complexity) + results.distance = benchmark_comparison( + "Levenshtein Distance", + function(texts) return str.distance(texts[1], texts[2]) end, + function(texts) return pure_lua.distance(texts[1], texts[2]) end, + {"kitten", "sitting"}, + 1000 -- Fewer iterations for expensive operation + ) + + -- Wrap benchmark + results.wrap = benchmark_comparison( + "Text Wrap", + function(text) return str.wrap(text, 40) end, + function(text) return pure_lua.wrap(text, 40) end, + test_texts.long + ) + + -- Slug benchmark + results.slug = benchmark_comparison( + "Slug Generation", + function(text) return str.slug(text) end, + function(text) return pure_lua.slug(text) end, + "Café & Restaurant!! Special Characters" + ) + + -- Summary + print("\n" .. string.rep("=", 50)) + print("PERFORMANCE SUMMARY") + print(string.rep("=", 50)) + + local go_wins = 0 + local lua_wins = 0 + + for name, result in pairs(results) do + local winner = result.speedup > 1 and "Go" or "Lua" + local margin = string.format("%.2fx", math.abs(result.speedup)) + print(string.format("%-20s: %s wins by %s", name, winner, margin)) + + if result.speedup > 1 then + go_wins = go_wins + 1 + else + lua_wins = lua_wins + 1 + end + end + + print(string.rep("-", 50)) + print(string.format("Go wins: %d, Lua wins: %d", go_wins, lua_wins)) + + -- Analysis + print("\nANALYSIS:") + print("• Go functions benefit from optimized implementations") + print("• Lua functions avoid CGO overhead for simple operations") + print("• Choice depends on operation complexity vs call frequency") + + return results +end + +-- Run the benchmarks +run_benchmarks() \ No newline at end of file diff --git a/functions/string.go b/functions/string.go index 2f5752e..9fc220d 100644 --- a/functions/string.go +++ b/functions/string.go @@ -15,6 +15,19 @@ import ( "golang.org/x/text/language" ) +const ( + maxStringLength = 10_000_000 // 10MB limit for safety + maxRepeatCount = 1_000_000 // Prevent excessive memory usage + maxRandomLength = 100_000 // Reasonable limit for random strings +) + +func validateStringLength(s string) error { + if len(s) > maxStringLength { + return fmt.Errorf("string too large (max %d bytes)", maxStringLength) + } + return nil +} + func GetStringFunctions() map[string]luajit.GoFunction { return map[string]luajit.GoFunction{ "string_split": func(s *luajit.State) int { @@ -29,6 +42,24 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_split: second argument must be a string") } + + if err := validateStringLength(str); err != nil { + return s.PushError("string_split: %v", err) + } + + // Handle empty separator - split into characters + if sep == "" { + runes := []rune(str) + parts := make([]string, len(runes)) + for i, r := range runes { + parts[i] = string(r) + } + if err := s.PushValue(parts); err != nil { + return s.PushError("string_split: failed to push result: %v", err) + } + return 1 + } + parts := strings.Split(str, sep) if err := s.PushValue(parts); err != nil { return s.PushError("string_split: failed to push result: %v", err) @@ -50,25 +81,33 @@ func GetStringFunctions() map[string]luajit.GoFunction { } var parts []string - if slice, ok := arr.([]string); ok { - parts = slice - } else if anySlice, ok := arr.([]interface{}); ok { - parts = make([]string, len(anySlice)) - for i, v := range anySlice { - parts[i] = fmt.Sprintf("%v", v) + switch v := arr.(type) { + case []string: + parts = v + case []interface{}: + parts = make([]string, len(v)) + for i, val := range v { + if val == nil { + parts[i] = "" + } else { + parts[i] = fmt.Sprintf("%v", val) + } } - } else if anyMap, ok := arr.(map[string]interface{}); ok { - // Handle empty table case - check if it's meant to be an array - if len(anyMap) == 0 { - parts = []string{} // Empty array + case map[string]interface{}: + if len(v) == 0 { + parts = []string{} } else { - return s.PushError("string_join: first argument must be an array") + return s.PushError("string_join: first argument must be an array, not a map") } - } else { + default: return s.PushError("string_join: first argument must be an array") } result := strings.Join(parts, sep) + if err := validateStringLength(result); err != nil { + return s.PushError("string_join: result %v", err) + } + s.PushString(result) return 1 }, @@ -94,7 +133,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_trim_left: first argument must be a string") } - if s.GetTop() >= 2 && s.IsString(2) { + if s.GetTop() >= 2 && !s.IsNil(2) { cutset, err := s.SafeToString(2) if err != nil { return s.PushError("string_trim_left: second argument must be a string") @@ -115,7 +154,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_trim_right: first argument must be a string") } - if s.GetTop() >= 2 && s.IsString(2) { + if s.GetTop() >= 2 && !s.IsNil(2) { cutset, err := s.SafeToString(2) if err != nil { return s.PushError("string_trim_right: second argument must be a string") @@ -159,7 +198,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_title: argument must be a string") } - caser := cases.Title(language.English) + caser := cases.Title(language.English, cases.NoLower) s.PushString(caser.String(str)) return 1 }, @@ -228,7 +267,16 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_replace: third argument must be a string") } + + if old == "" { + return s.PushError("string_replace: cannot replace empty string") + } + result := strings.ReplaceAll(str, old, new) + if err := validateStringLength(result); err != nil { + return s.PushError("string_replace: result %v", err) + } + s.PushString(result) return 1 }, @@ -250,9 +298,14 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_replace_n: third argument must be a string") } n, err := s.SafeToNumber(4) - if err != nil || n != float64(int(n)) { - return s.PushError("string_replace_n: fourth argument must be an integer") + if err != nil || n != float64(int(n)) || n < 0 { + return s.PushError("string_replace_n: fourth argument must be a non-negative integer") } + + if old == "" { + return s.PushError("string_replace_n: cannot replace empty string") + } + result := strings.Replace(str, old, new, int(n)) s.PushString(result) return 1 @@ -270,8 +323,18 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_index: second argument must be a string") } + + if substr == "" { + s.PushNumber(1) // Empty string found at position 1 + return 1 + } + index := strings.Index(str, substr) - s.PushNumber(float64(index + 1)) // Lua is 1-indexed + if index == -1 { + s.PushNumber(0) // Not found + } else { + s.PushNumber(float64(index + 1)) // Convert to 1-indexed + } return 1 }, @@ -287,11 +350,17 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_last_index: second argument must be a string") } + + if substr == "" { + s.PushNumber(float64(utf8.RuneCountInString(str) + 1)) // Empty string at end + return 1 + } + index := strings.LastIndex(str, substr) if index == -1 { - s.PushNumber(0) + s.PushNumber(0) // Not found } else { - s.PushNumber(float64(index + 1)) // Lua is 1-indexed + s.PushNumber(float64(index + 1)) // Convert to 1-indexed } return 1 }, @@ -308,6 +377,13 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_count: second argument must be a string") } + + if substr == "" { + // Empty string matches at every position including boundaries + s.PushNumber(float64(utf8.RuneCountInString(str) + 1)) + return 1 + } + count := strings.Count(str, substr) s.PushNumber(float64(count)) return 1 @@ -325,10 +401,19 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil || count < 0 || count != float64(int(count)) { return s.PushError("string_repeat: second argument must be a non-negative integer") } - if count > 1000000 { - return s.PushError("string_repeat: count too large (max 1000000)") + + n := int(count) + if n == 0 { + s.PushString("") + return 1 } - result := strings.Repeat(str, int(count)) + + // Check for potential overflow + if len(str) > 0 && n > maxRepeatCount/len(str) { + return s.PushError("string_repeat: result would be too large") + } + + result := strings.Repeat(str, n) s.PushString(result) return 1 }, @@ -341,6 +426,11 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_reverse: argument must be a string") } + + if !utf8.ValidString(str) { + return s.PushError("string_reverse: invalid UTF-8 string") + } + runes := []rune(str) for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { runes[i], runes[j] = runes[j], runes[i] @@ -381,7 +471,12 @@ func GetStringFunctions() map[string]luajit.GoFunction { if err != nil { return s.PushError("string_lines: argument must be a string") } + + // Handle different line endings + str = strings.ReplaceAll(str, "\r\n", "\n") + str = strings.ReplaceAll(str, "\r", "\n") lines := strings.Split(str, "\n") + if err := s.PushValue(lines); err != nil { return s.PushError("string_lines: failed to push result: %v", err) } @@ -412,14 +507,15 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_pad_left: first argument must be a string") } width, err := s.SafeToNumber(2) - if err != nil || width != float64(int(width)) { - return s.PushError("string_pad_left: second argument must be an integer") + if err != nil || width != float64(int(width)) || width < 0 { + return s.PushError("string_pad_left: second argument must be a non-negative integer") } padChar := " " - if s.GetTop() >= 3 { - if p, err := s.SafeToString(3); err == nil && len(p) > 0 { - padChar = string([]rune(p)[0]) + if s.GetTop() >= 3 && !s.IsNil(3) { + if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 { + runes := []rune(p) + padChar = string(runes[0]) } } @@ -430,7 +526,12 @@ func GetStringFunctions() map[string]luajit.GoFunction { return 1 } - padding := strings.Repeat(padChar, targetLen-currentLen) + padLen := targetLen - currentLen + if padLen > maxRepeatCount { + return s.PushError("string_pad_left: padding too large") + } + + padding := strings.Repeat(padChar, padLen) s.PushString(padding + str) return 1 }, @@ -444,14 +545,15 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_pad_right: first argument must be a string") } width, err := s.SafeToNumber(2) - if err != nil || width != float64(int(width)) { - return s.PushError("string_pad_right: second argument must be an integer") + if err != nil || width != float64(int(width)) || width < 0 { + return s.PushError("string_pad_right: second argument must be a non-negative integer") } padChar := " " - if s.GetTop() >= 3 { - if p, err := s.SafeToString(3); err == nil && len(p) > 0 { - padChar = string([]rune(p)[0]) + if s.GetTop() >= 3 && !s.IsNil(3) { + if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 { + runes := []rune(p) + padChar = string(runes[0]) } } @@ -462,7 +564,12 @@ func GetStringFunctions() map[string]luajit.GoFunction { return 1 } - padding := strings.Repeat(padChar, targetLen-currentLen) + padLen := targetLen - currentLen + if padLen > maxRepeatCount { + return s.PushError("string_pad_right: padding too large") + } + + padding := strings.Repeat(padChar, padLen) s.PushString(str + padding) return 1 }, @@ -480,10 +587,15 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_slice: second argument must be an integer") } + if !utf8.ValidString(str) { + return s.PushError("string_slice: invalid UTF-8 string") + } + runes := []rune(str) length := len(runes) startIdx := int(start) - 1 // Convert from 1-indexed to 0-indexed + // Handle negative start index if startIdx < 0 { startIdx = 0 } @@ -493,10 +605,14 @@ func GetStringFunctions() map[string]luajit.GoFunction { } endIdx := length - if s.GetTop() >= 3 { + if s.GetTop() >= 3 && !s.IsNil(3) { end, err := s.SafeToNumber(3) if err == nil && end == float64(int(end)) { endIdx = int(end) + // Handle negative end index (from end of string) + if endIdx < 0 { + endIdx = length + endIdx + 1 + } if endIdx < 0 { endIdx = 0 } @@ -531,8 +647,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { re, err := regexp.Compile(pattern) if err != nil { s.PushBoolean(false) - s.PushString(err.Error()) - return 2 + return 1 } s.PushBoolean(re.MatchString(str)) @@ -555,8 +670,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { re, err := regexp.Compile(pattern) if err != nil { s.PushNil() - s.PushString(err.Error()) - return 2 + return 1 } match := re.FindString(str) @@ -583,12 +697,18 @@ func GetStringFunctions() map[string]luajit.GoFunction { re, err := regexp.Compile(pattern) if err != nil { - s.PushNil() - s.PushString(err.Error()) - return 2 + // Return empty array for invalid patterns + if err := s.PushValue([]string{}); err != nil { + return s.PushError("regex_find_all: failed to push result: %v", err) + } + return 1 } matches := re.FindAllString(str, -1) + if matches == nil { + matches = []string{} // Return empty array instead of nil + } + if err := s.PushValue(matches); err != nil { return s.PushError("regex_find_all: failed to push result: %v", err) } @@ -614,9 +734,9 @@ func GetStringFunctions() map[string]luajit.GoFunction { re, err := regexp.Compile(pattern) if err != nil { - s.PushNil() - s.PushString(err.Error()) - return 2 + // Return original string for invalid patterns + s.PushString(str) + return 1 } result := re.ReplaceAllString(str, replacement) @@ -633,14 +753,14 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_to_number: argument must be a string") } + // Trim whitespace for more lenient parsing + str = strings.TrimSpace(str) + + // Try float first for more general parsing if num, err := strconv.ParseFloat(str, 64); err == nil { s.PushNumber(num) return 1 } - if num, err := strconv.ParseInt(str, 10, 64); err == nil { - s.PushNumber(float64(num)) - return 1 - } s.PushNil() return 1 @@ -655,9 +775,14 @@ func GetStringFunctions() map[string]luajit.GoFunction { return s.PushError("string_is_numeric: argument must be a string") } + str = strings.TrimSpace(str) + if str == "" { + s.PushBoolean(false) + return 1 + } + _, err1 := strconv.ParseFloat(str, 64) - _, err2 := strconv.ParseInt(str, 10, 64) - s.PushBoolean(err1 == nil || err2 == nil) + s.PushBoolean(err1 == nil) return 1 }, @@ -719,7 +844,7 @@ func GetStringFunctions() map[string]luajit.GoFunction { } charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - if s.GetTop() >= 2 { + if s.GetTop() >= 2 && !s.IsNil(2) { if custom, err := s.SafeToString(2); err == nil && len(custom) > 0 { charset = custom } @@ -730,18 +855,40 @@ func GetStringFunctions() map[string]luajit.GoFunction { s.PushString("") return 1 } - if n > 100000 { - return s.PushError("random_string: length too large (max 100000)") + if n > maxRandomLength { + return s.PushError("random_string: length too large (max %d)", maxRandomLength) } - result := make([]byte, n) + // Validate charset for UTF-8 + if !utf8.ValidString(charset) { + return s.PushError("random_string: charset must be valid UTF-8") + } + + charsetRunes := []rune(charset) + if len(charsetRunes) == 0 { + return s.PushError("random_string: charset cannot be empty") + } + + result := make([]rune, n) rnd := rand.New(rand.NewSource(time.Now().UnixNano())) for i := range result { - result[i] = charset[rnd.Intn(len(charset))] + result[i] = charsetRunes[rnd.Intn(len(charsetRunes))] } s.PushString(string(result)) return 1 }, + + "string_is_valid_utf8": func(s *luajit.State) int { + if err := s.CheckMinArgs(1); err != nil { + return s.PushError("string_is_valid_utf8: %v", err) + } + str, err := s.SafeToString(1) + if err != nil { + return s.PushError("string_is_valid_utf8: argument must be a string") + } + s.PushBoolean(utf8.ValidString(str)) + return 1 + }, } } diff --git a/modules/string.lua b/modules/string.lua index 01db1f7..507495e 100644 --- a/modules/string.lua +++ b/modules/string.lua @@ -1,111 +1,217 @@ --- modules/string.lua - Comprehensive string manipulation utilities +-- modules/string.lua - Enhanced string manipulation utilities local str = {} +-- Helper function to handle errors from Go functions +local function safe_call(func, ...) + local success, result = pcall(func, ...) + if not success then + error(result, 2) + end + return result +end + +-- Helper to validate arguments +local function validate_string(s, func_name, arg_num) + if type(s) ~= "string" then + error(string.format("%s: argument %d must be a string, got %s", + func_name, arg_num or 1, type(s)), 3) + end +end + +local function validate_number(n, func_name, arg_num) + if type(n) ~= "number" then + error(string.format("%s: argument %d must be a number, got %s", + func_name, arg_num or 1, type(n)), 3) + end +end + +local function validate_table(t, func_name, arg_num) + if type(t) ~= "table" then + error(string.format("%s: argument %d must be a table, got %s", + func_name, arg_num or 1, type(t)), 3) + end +end + -- ====================================================================== -- BASIC STRING OPERATIONS -- ====================================================================== function str.split(s, delimiter) - return moonshark.string_split(s, delimiter) + validate_string(s, "str.split", 1) + validate_string(delimiter, "str.split", 2) + return safe_call(moonshark.string_split, s, delimiter) end function str.join(arr, separator) - return moonshark.string_join(arr, separator) + validate_table(arr, "str.join", 1) + validate_string(separator, "str.join", 2) + return safe_call(moonshark.string_join, arr, separator) end function str.trim(s) - return moonshark.string_trim(s) + validate_string(s, "str.trim") + return safe_call(moonshark.string_trim, s) end function str.trim_left(s, cutset) - return moonshark.string_trim_left(s, cutset) + validate_string(s, "str.trim_left", 1) + if cutset ~= nil then + validate_string(cutset, "str.trim_left", 2) + end + return safe_call(moonshark.string_trim_left, s, cutset) end function str.trim_right(s, cutset) - return moonshark.string_trim_right(s, cutset) + validate_string(s, "str.trim_right", 1) + if cutset ~= nil then + validate_string(cutset, "str.trim_right", 2) + end + return safe_call(moonshark.string_trim_right, s, cutset) end function str.upper(s) - return moonshark.string_upper(s) + validate_string(s, "str.upper") + return safe_call(moonshark.string_upper, s) end function str.lower(s) - return moonshark.string_lower(s) + validate_string(s, "str.lower") + return safe_call(moonshark.string_lower, s) end function str.title(s) - return moonshark.string_title(s) + validate_string(s, "str.title") + return safe_call(moonshark.string_title, s) end function str.contains(s, substr) - return moonshark.string_contains(s, substr) + validate_string(s, "str.contains", 1) + validate_string(substr, "str.contains", 2) + return safe_call(moonshark.string_contains, s, substr) end function str.starts_with(s, prefix) - return moonshark.string_starts_with(s, prefix) + validate_string(s, "str.starts_with", 1) + validate_string(prefix, "str.starts_with", 2) + return safe_call(moonshark.string_starts_with, s, prefix) end function str.ends_with(s, suffix) - return moonshark.string_ends_with(s, suffix) + validate_string(s, "str.ends_with", 1) + validate_string(suffix, "str.ends_with", 2) + return safe_call(moonshark.string_ends_with, s, suffix) end function str.replace(s, old, new) - return moonshark.string_replace(s, old, new) + validate_string(s, "str.replace", 1) + validate_string(old, "str.replace", 2) + validate_string(new, "str.replace", 3) + return safe_call(moonshark.string_replace, s, old, new) end function str.replace_n(s, old, new, n) - return moonshark.string_replace_n(s, old, new, n) + validate_string(s, "str.replace_n", 1) + validate_string(old, "str.replace_n", 2) + validate_string(new, "str.replace_n", 3) + validate_number(n, "str.replace_n", 4) + if n < 0 or n ~= math.floor(n) then + error("str.replace_n: count must be a non-negative integer", 2) + end + return safe_call(moonshark.string_replace_n, s, old, new, n) end function str.index(s, substr) - local idx = moonshark.string_index(s, substr) + validate_string(s, "str.index", 1) + validate_string(substr, "str.index", 2) + local idx = safe_call(moonshark.string_index, s, substr) return idx > 0 and idx or nil end function str.last_index(s, substr) - local idx = moonshark.string_last_index(s, substr) + validate_string(s, "str.last_index", 1) + validate_string(substr, "str.last_index", 2) + local idx = safe_call(moonshark.string_last_index, s, substr) return idx > 0 and idx or nil end function str.count(s, substr) - return moonshark.string_count(s, substr) + validate_string(s, "str.count", 1) + validate_string(substr, "str.count", 2) + return safe_call(moonshark.string_count, s, substr) end function str.repeat_(s, n) - return moonshark.string_repeat(s, n) + validate_string(s, "str.repeat_", 1) + validate_number(n, "str.repeat_", 2) + if n < 0 or n ~= math.floor(n) then + error("str.repeat_: count must be a non-negative integer", 2) + end + return safe_call(moonshark.string_repeat, s, n) end function str.reverse(s) - return moonshark.string_reverse(s) + validate_string(s, "str.reverse") + return safe_call(moonshark.string_reverse, s) end function str.length(s) - return moonshark.string_length(s) + validate_string(s, "str.length") + return safe_call(moonshark.string_length, s) end function str.byte_length(s) - return moonshark.string_byte_length(s) + validate_string(s, "str.byte_length") + return safe_call(moonshark.string_byte_length, s) end function str.lines(s) - return moonshark.string_lines(s) + validate_string(s, "str.lines") + return safe_call(moonshark.string_lines, s) end function str.words(s) - return moonshark.string_words(s) + validate_string(s, "str.words") + return safe_call(moonshark.string_words, s) end function str.pad_left(s, width, pad_char) - return moonshark.string_pad_left(s, width, pad_char) + validate_string(s, "str.pad_left", 1) + validate_number(width, "str.pad_left", 2) + if width < 0 or width ~= math.floor(width) then + error("str.pad_left: width must be a non-negative integer", 2) + end + if pad_char ~= nil then + validate_string(pad_char, "str.pad_left", 3) + end + return safe_call(moonshark.string_pad_left, s, width, pad_char) end function str.pad_right(s, width, pad_char) - return moonshark.string_pad_right(s, width, pad_char) + validate_string(s, "str.pad_right", 1) + validate_number(width, "str.pad_right", 2) + if width < 0 or width ~= math.floor(width) then + error("str.pad_right: width must be a non-negative integer", 2) + end + if pad_char ~= nil then + validate_string(pad_char, "str.pad_right", 3) + end + return safe_call(moonshark.string_pad_right, s, width, pad_char) end function str.slice(s, start, end_pos) - return moonshark.string_slice(s, start, end_pos) + validate_string(s, "str.slice", 1) + validate_number(start, "str.slice", 2) + if start ~= math.floor(start) then + error("str.slice: start must be an integer", 2) + end + if end_pos ~= nil then + validate_number(end_pos, "str.slice", 3) + if end_pos ~= math.floor(end_pos) then + error("str.slice: end position must be an integer", 2) + end + end + return safe_call(moonshark.string_slice, s, start, end_pos) end -- ====================================================================== @@ -113,19 +219,28 @@ end -- ====================================================================== function str.match(pattern, s) - return moonshark.regex_match(pattern, s) + validate_string(pattern, "str.match", 1) + validate_string(s, "str.match", 2) + return safe_call(moonshark.regex_match, pattern, s) end function str.find(pattern, s) - return moonshark.regex_find(pattern, s) + validate_string(pattern, "str.find", 1) + validate_string(s, "str.find", 2) + return safe_call(moonshark.regex_find, pattern, s) end function str.find_all(pattern, s) - return moonshark.regex_find_all(pattern, s) + validate_string(pattern, "str.find_all", 1) + validate_string(s, "str.find_all", 2) + return safe_call(moonshark.regex_find_all, pattern, s) end function str.gsub(pattern, s, replacement) - return moonshark.regex_replace(pattern, s, replacement) + validate_string(pattern, "str.gsub", 1) + validate_string(s, "str.gsub", 2) + validate_string(replacement, "str.gsub", 3) + return safe_call(moonshark.regex_replace, pattern, s, replacement) end -- ====================================================================== @@ -133,19 +248,23 @@ end -- ====================================================================== function str.to_number(s) - return moonshark.string_to_number(s) + validate_string(s, "str.to_number") + return safe_call(moonshark.string_to_number, s) end function str.is_numeric(s) - return moonshark.string_is_numeric(s) + validate_string(s, "str.is_numeric") + return safe_call(moonshark.string_is_numeric, s) end function str.is_alpha(s) - return moonshark.string_is_alpha(s) + validate_string(s, "str.is_alpha") + return safe_call(moonshark.string_is_alpha, s) end function str.is_alphanumeric(s) - return moonshark.string_is_alphanumeric(s) + validate_string(s, "str.is_alphanumeric") + return safe_call(moonshark.string_is_alphanumeric, s) end function str.is_empty(s) @@ -156,60 +275,106 @@ function str.is_blank(s) return str.is_empty(s) or str.trim(s) == "" end +function str.is_utf8(s) + validate_string(s, "str.is_utf8") + return safe_call(moonshark.string_is_valid_utf8, s) +end + +function str.is_valid_utf8(s) + validate_string(s, "str.is_valid_utf8") + return safe_call(moonshark.string_is_valid_utf8, s) +end + -- ====================================================================== -- ADVANCED STRING OPERATIONS -- ====================================================================== --- Capitalize first letter of each word +-- Capitalize first letter of each word (Pure Lua - faster) function str.capitalize(s) + validate_string(s, "str.capitalize") return s:gsub("(%a)([%w_']*)", function(first, rest) - return str.upper(first) .. str.lower(rest) + return first:upper() .. rest:lower() end) end --- Convert string to camelCase +-- Convert string to camelCase (Pure Lua - faster) function str.camel_case(s) - local words = str.words(str.lower(s)) + validate_string(s, "str.camel_case") + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:lower()) + end if #words == 0 then return s end local result = words[1] for i = 2, #words do - result = result .. str.capitalize(words[i]) + result = result .. words[i]:gsub("^%l", string.upper) end return result end --- Convert string to PascalCase +-- Convert string to PascalCase (Pure Lua - faster) function str.pascal_case(s) - local words = str.words(str.lower(s)) + validate_string(s, "str.pascal_case") + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:lower()) + end local result = "" for _, word in ipairs(words) do - result = result .. str.capitalize(word) + result = result .. word:gsub("^%l", string.upper) end return result end --- Convert string to snake_case +-- Convert string to snake_case (Pure Lua - faster) function str.snake_case(s) - local words = str.words(str.lower(s)) - return str.join(words, "_") + validate_string(s, "str.snake_case") + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:lower()) + end + return table.concat(words, "_") end --- Convert string to kebab-case +-- Convert string to kebab-case (Pure Lua - faster) function str.kebab_case(s) - local words = str.words(str.lower(s)) - return str.join(words, "-") + validate_string(s, "str.kebab_case") + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:lower()) + end + return table.concat(words, "-") end --- Convert string to SCREAMING_SNAKE_CASE +-- Convert string to SCREAMING_SNAKE_CASE (Pure Lua - faster) function str.screaming_snake_case(s) - return str.upper(str.snake_case(s)) + validate_string(s, "str.screaming_snake_case") + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word:upper()) + end + return table.concat(words, "_") end --- Center text within given width +-- Center text within given width (Pure Lua - faster) function str.center(s, width, fill_char) + validate_string(s, "str.center", 1) + validate_number(width, "str.center", 2) + if width < 0 or width ~= math.floor(width) then + error("str.center: width must be a non-negative integer", 2) + end + fill_char = fill_char or " " - local len = str.length(s) + if fill_char ~= nil then + validate_string(fill_char, "str.center", 3) + if #fill_char == 0 then + error("str.center: fill character cannot be empty", 2) + end + fill_char = fill_char:sub(1,1) -- Use only first character + end + + local len = #s if len >= width then return s end local pad_total = width - len @@ -219,25 +384,66 @@ function str.center(s, width, fill_char) return string.rep(fill_char, pad_left) .. s .. string.rep(fill_char, pad_right) end --- Truncate string to maximum length +-- Truncate string to maximum length (Pure Lua - faster) function str.truncate(s, max_length, suffix) + validate_string(s, "str.truncate", 1) + validate_number(max_length, "str.truncate", 2) + if max_length < 0 or max_length ~= math.floor(max_length) then + error("str.truncate: max_length must be a non-negative integer", 2) + end + suffix = suffix or "..." - if str.length(s) <= max_length then + validate_string(suffix, "str.truncate", 3) + + if #s <= max_length then return s end - local main_part = str.slice(s, 1, max_length - str.length(suffix)) - main_part = str.trim_right(main_part) + + local suffix_len = #suffix + if max_length <= suffix_len then + return suffix:sub(1, max_length) + end + + local main_part = s:sub(1, max_length - suffix_len) + main_part = main_part:gsub("%s+$", "") -- trim right return main_part .. suffix end --- Wrap text to specified width +-- Wrap text to specified width (Pure Lua - much faster) function str.wrap(s, width) - local words = str.words(s) + validate_string(s, "str.wrap", 1) + validate_number(width, "str.wrap", 2) + if width <= 0 or width ~= math.floor(width) then + error("str.wrap: width must be a positive integer", 2) + end + + local words = {} + for word in s:gmatch("%S+") do + table.insert(words, word) + end + local lines = {} local current_line = "" for _, word in ipairs(words) do - if str.length(current_line) + str.length(word) + 1 <= width then + local word_len = #word + local current_len = #current_line + + -- Handle words longer than width + if word_len > width then + if current_line ~= "" then + table.insert(lines, current_line) + current_line = "" + end + -- Break long word + while #word > width do + table.insert(lines, word:sub(1, width)) + word = word:sub(width + 1) + end + if #word > 0 then + current_line = word + end + elseif current_len + word_len + 1 <= width then if current_line == "" then current_line = word else @@ -258,17 +464,24 @@ function str.wrap(s, width) return lines end --- Remove common leading whitespace +-- Remove common leading whitespace (Pure Lua - faster) function str.dedent(s) - local lines = str.split(s, "\n") + validate_string(s, "str.dedent") + local lines = {} + for line in (s.."\n"):gmatch("([^\n]*)\n") do + table.insert(lines, line) + end if #lines <= 1 then return s end -- Find minimum indentation (excluding empty lines) local min_indent = math.huge for _, line in ipairs(lines) do - if str.trim(line) ~= "" then + local trimmed = line:gsub("%s", "") + if trimmed ~= "" then local indent = line:match("^%s*") - min_indent = math.min(min_indent, #indent) + if indent then + min_indent = math.min(min_indent, #indent) + end end end @@ -278,26 +491,30 @@ function str.dedent(s) -- Remove common indentation for i, line in ipairs(lines) do - if str.trim(line) ~= "" then + local trimmed = line:gsub("%s", "") + if trimmed ~= "" then lines[i] = line:sub(min_indent + 1) end end - return str.join(lines, "\n") + return table.concat(lines, "\n") end -- Escape special characters for regex function str.escape_regex(s) + validate_string(s, "str.escape_regex") return s:gsub("([%.%+%*%?%[%]%^%$%(%)%{%}%|%\\])", "\\%1") end -- Quote string for shell usage function str.shell_quote(s) + validate_string(s, "str.shell_quote") return "'" .. s:gsub("'", "'\"'\"'") .. "'" end -- URL encode string function str.url_encode(s) + validate_string(s, "str.url_encode") return s:gsub("([^%w%-%.%_%~])", function(c) return string.format("%%%02X", string.byte(c)) end) @@ -305,9 +522,22 @@ end -- URL decode string function str.url_decode(s) - return s:gsub("%%(%x%x)", function(hex) - return string.char(tonumber(hex, 16)) + validate_string(s, "str.url_decode") + local result = s:gsub("%%(%x%x)", function(hex) + local byte = tonumber(hex, 16) + if byte then + return string.char(byte) + else + return "%" .. hex -- Invalid hex, keep original + end end):gsub("+", " ") + + -- Validate result is UTF-8 + if not str.is_valid_utf8(result) then + error("str.url_decode: result is not valid UTF-8", 2) + end + + return result end -- ====================================================================== @@ -316,12 +546,27 @@ end -- Case-insensitive comparison function str.iequals(a, b) + validate_string(a, "str.iequals", 1) + validate_string(b, "str.iequals", 2) return str.lower(a) == str.lower(b) end --- Levenshtein distance +-- Levenshtein distance (Pure Lua - much faster) function str.distance(a, b) - local len_a, len_b = str.length(a), str.length(b) + validate_string(a, "str.distance", 1) + validate_string(b, "str.distance", 2) + + local len_a, len_b = #a, #b + + -- Handle empty strings + if len_a == 0 then return len_b end + if len_b == 0 then return len_a end + + -- Limit computation for very long strings + if len_a > 1000 or len_b > 1000 then + error("str.distance: strings too long for distance calculation", 2) + end + local matrix = {} -- Initialize matrix @@ -335,7 +580,7 @@ function str.distance(a, b) -- Fill matrix for i = 1, len_a do for j = 1, len_b do - local cost = (str.slice(a, i, i) == str.slice(b, j, j)) and 0 or 1 + local cost = (a:sub(i,i) == b:sub(j,j)) and 0 or 1 matrix[i][j] = math.min( matrix[i-1][j] + 1, -- deletion matrix[i][j-1] + 1, -- insertion @@ -347,28 +592,49 @@ function str.distance(a, b) return matrix[len_a][len_b] end --- String similarity (0-1) +-- String similarity (0-1) (Pure Lua - faster) function str.similarity(a, b) - local max_len = math.max(str.length(a), str.length(b)) - if max_len == 0 then return 1 end - return 1 - (str.distance(a, b) / max_len) + validate_string(a, "str.similarity", 1) + validate_string(b, "str.similarity", 2) + + local max_len = math.max(#a, #b) + if max_len == 0 then return 1.0 end + + local dist = str.distance(a, b) + return 1.0 - (dist / max_len) end -- ====================================================================== -- TEMPLATE FUNCTIONS -- ====================================================================== --- Simple template substitution +-- Simple template substitution (Pure Lua - faster) function str.template(template, vars) - vars = vars or {} + validate_string(template, "str.template", 1) + if vars ~= nil then + validate_table(vars, "str.template", 2) + else + vars = {} + end + return template:gsub("%${([%w_]+)}", function(var) - return tostring(vars[var] or "") + local value = vars[var] + if value == nil then + return "" + else + return tostring(value) + end end) end --- Advanced template with functions +-- Advanced template with functions (Pure Lua - faster) function str.template_advanced(template, context) - context = context or {} + validate_string(template, "str.template_advanced", 1) + if context ~= nil then + validate_table(context, "str.template_advanced", 2) + else + context = {} + end return template:gsub("%${([^}]+)}", function(expr) -- Simple variable substitution @@ -377,10 +643,14 @@ function str.template_advanced(template, context) end -- Handle simple expressions like ${var.prop} - local parts = str.split(expr, ".") + local parts = {} + for part in expr:gmatch("[^%.]+") do + table.insert(parts, part) + end + local value = context for _, part in ipairs(parts) do - if type(value) == "table" and value[part] then + if type(value) == "table" and value[part] ~= nil then value = value[part] else return "" @@ -397,21 +667,25 @@ end -- Check if string contains only whitespace function str.is_whitespace(s) + validate_string(s, "str.is_whitespace") return s:match("^%s*$") ~= nil end -- Remove all whitespace function str.strip_whitespace(s) + validate_string(s, "str.strip_whitespace") return s:gsub("%s", "") end -- Normalize whitespace (replace multiple spaces with single space) function str.normalize_whitespace(s) + validate_string(s, "str.normalize_whitespace") return str.trim(s:gsub("%s+", " ")) end -- Extract numbers from string function str.extract_numbers(s) + validate_string(s, "str.extract_numbers") local numbers = {} for num in s:gmatch("%-?%d+%.?%d*") do local n = tonumber(num) @@ -422,13 +696,21 @@ end -- Remove diacritics/accents function str.remove_accents(s) + validate_string(s, "str.remove_accents") local accents = { - ["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", + ["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a", ["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e", ["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i", ["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o", ["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u", - ["ñ"] = "n", ["ç"] = "c" + ["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y", + -- Uppercase versions + ["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A", + ["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E", + ["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I", + ["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O", + ["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U", + ["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y" } local result = s @@ -440,25 +722,48 @@ end -- Generate random string function str.random(length, charset) - return moonshark.random_string(length, charset) + validate_number(length, "str.random", 1) + if length < 0 or length ~= math.floor(length) then + error("str.random: length must be a non-negative integer", 2) + end + if charset ~= nil then + validate_string(charset, "str.random", 2) + end + return safe_call(moonshark.random_string, length, charset) end --- Check if string is valid UTF-8 -function str.is_utf8(s) - -- Simple check - if we can iterate through the string as UTF-8, it's valid - local success = pcall(function() - for p, c in utf8 and utf8.codes or string.gmatch(s, ".") do - -- Just iterate through - end - end) - return success -end - --- Generate slug from string +-- Generate slug from string (Pure Lua - faster) function str.slug(s) - local kebab = str.kebab_case(str.remove_accents(s)) - local cleaned = (kebab:gsub("[^%w%-]", "")) - return (cleaned:gsub("%-+", "-")) + validate_string(s, "str.slug") + + -- Remove accents (simplified but faster) + local accents = { + ["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a", + ["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ë"] = "e", + ["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i", + ["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o", + ["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u", + ["ñ"] = "n", ["ç"] = "c", ["ý"] = "y", ["ÿ"] = "y", + -- Uppercase versions + ["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A", + ["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E", + ["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I", + ["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O", + ["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U", + ["Ñ"] = "N", ["Ç"] = "C", ["Ý"] = "Y", ["Ÿ"] = "Y" + } + + local result = s:lower() + for accented, plain in pairs(accents) do + result = result:gsub(accented:lower(), plain:lower()) + end + + -- Keep only alphanumeric characters and spaces, then convert spaces to hyphens + result = result:gsub("[^%w%s]", "") + result = result:gsub("%s+", "-") + result = result:gsub("^%-+", ""):gsub("%-+$", "") + + return result end return str \ No newline at end of file