diff --git a/modules/string/string.go b/modules/string/string.go index 31b26f2..3a7824f 100644 --- a/modules/string/string.go +++ b/modules/string/string.go @@ -1,111 +1,23 @@ package string import ( - "math/rand" "regexp" - "strings" - "time" "unicode/utf8" luajit "git.sharkk.net/Sky/LuaJIT-to-Go" ) -const ( - maxStringLength = 10_000_000 // 10MB limit for safety - maxRandomLength = 100_000 // Reasonable limit for random strings -) - func GetFunctionList() map[string]luajit.GoFunction { return map[string]luajit.GoFunction{ - "string_split": string_split, - "string_join": string_join, "string_slice": string_slice, "string_reverse": string_reverse, "string_length": string_length, "string_byte_length": string_byte_length, - "regex_match": regex_match, - "regex_find": regex_find, - "regex_find_all": regex_find_all, "regex_replace": regex_replace, - "random_string": random_string, "string_is_valid_utf8": string_is_valid_utf8, } } -func string_split(s *luajit.State) int { - str := s.ToString(1) - sep := s.ToString(2) - - if len(str) > maxStringLength { - s.PushNil() - s.PushString("string too large") - return 2 - } - - // Handle empty separator - split into characters - if sep == "" { - runes := []rune(str) - parts := make([]string, len(runes)) - for i, r := range runes { - parts[i] = string(r) - } - s.PushValue(parts) - return 1 - } - - parts := strings.Split(str, sep) - s.PushValue(parts) - return 1 -} - -func string_join(s *luajit.State) int { - arr, err := s.ToValue(1) - if err != nil { - s.PushNil() - s.PushString("invalid array") - return 2 - } - sep := s.ToString(2) - - var parts []string - switch v := arr.(type) { - case []string: - parts = v - case []any: - parts = make([]string, len(v)) - for i, val := range v { - if val == nil { - parts[i] = "" - } else { - parts[i] = s.ToString(-1) // Convert via Lua - } - } - case map[string]any: - // Empty table {} from Lua becomes map[string]any{} - if len(v) == 0 { - parts = []string{} // Empty array - } else { - s.PushNil() - s.PushString("not an array") - return 2 - } - default: - s.PushNil() - s.PushString("not an array") - return 2 - } - - result := strings.Join(parts, sep) - if len(result) > maxStringLength { - s.PushNil() - s.PushString("result too large") - return 2 - } - - s.PushString(result) - return 1 -} - func string_slice(s *luajit.State) int { str := s.ToString(1) start := int(s.ToNumber(2)) @@ -182,58 +94,6 @@ func string_byte_length(s *luajit.State) int { return 1 } -func regex_match(s *luajit.State) int { - pattern := s.ToString(1) - str := s.ToString(2) - - re, err := regexp.Compile(pattern) - if err != nil { - s.PushBoolean(false) - return 1 - } - - s.PushBoolean(re.MatchString(str)) - return 1 -} - -func regex_find(s *luajit.State) int { - pattern := s.ToString(1) - str := s.ToString(2) - - re, err := regexp.Compile(pattern) - if err != nil { - s.PushNil() - return 1 - } - - match := re.FindString(str) - if match == "" { - s.PushNil() - } else { - s.PushString(match) - } - return 1 -} - -func regex_find_all(s *luajit.State) int { - pattern := s.ToString(1) - str := s.ToString(2) - - re, err := regexp.Compile(pattern) - if err != nil { - s.PushValue([]string{}) - return 1 - } - - matches := re.FindAllString(str, -1) - if matches == nil { - matches = []string{} - } - - s.PushValue(matches) - return 1 -} - func regex_replace(s *luajit.State) int { pattern := s.ToString(1) str := s.ToString(2) @@ -250,47 +110,6 @@ func regex_replace(s *luajit.State) int { return 1 } -func random_string(s *luajit.State) int { - length := int(s.ToNumber(1)) - if length < 0 || length > maxRandomLength { - s.PushNil() - s.PushString("invalid length") - return 2 - } - - charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - if s.GetTop() >= 2 && !s.IsNil(2) { - charset = s.ToString(2) - } - - if length == 0 { - s.PushString("") - return 1 - } - - if !utf8.ValidString(charset) { - s.PushNil() - s.PushString("invalid charset") - return 2 - } - - charsetRunes := []rune(charset) - if len(charsetRunes) == 0 { - s.PushNil() - s.PushString("empty charset") - return 2 - } - - result := make([]rune, length) - rnd := rand.New(rand.NewSource(time.Now().UnixNano())) - for i := range result { - result[i] = charsetRunes[rnd.Intn(len(charsetRunes))] - } - - s.PushString(string(result)) - return 1 -} - func string_is_valid_utf8(s *luajit.State) int { str := s.ToString(1) s.PushBoolean(utf8.ValidString(str)) diff --git a/modules/string/string.lua b/modules/string/string.lua index fd9f980..5e844b0 100644 --- a/modules/string/string.lua +++ b/modules/string/string.lua @@ -1,21 +1,47 @@ --- modules/string.lua - Enhanced string manipulation utilities - local str = {} +-- Performance thresholds based on benchmark results +local REVERSE_THRESHOLD = 100 -- Use Go for strings longer than this +local LENGTH_THRESHOLD = 1000 -- Use Go for ASCII strings longer than this + -- ====================================================================== --- BASIC STRING OPERATIONS (Pure Lua) +-- BASIC STRING OPERATIONS (Optimized Lua/Go hybrid) -- ====================================================================== function str.split(s, delimiter) if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end - return moonshark.string_split(s, delimiter) + + if delimiter == "" then + local result = {} + for i = 1, #s do + result[i] = s:sub(i, i) + end + return result + end + + local result = {} + local start = 1 + local delimiter_len = #delimiter + + while true do + local pos = s:find(delimiter, start, true) + if not pos then + table.insert(result, s:sub(start)) + break + end + table.insert(result, s:sub(start, pos - 1)) + start = pos + delimiter_len + end + + return result end function str.join(arr, separator) if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end - return moonshark.string_join(arr, separator) + + return table.concat(arr, separator) end function str.trim(s) @@ -146,13 +172,25 @@ end function str.reverse(s) if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end - local result, err = moonshark.string_reverse(s) - if not result then error("str.reverse: " .. err, 2) end - return result + + if #s > REVERSE_THRESHOLD then + local result, err = moonshark.string_reverse(s) + if not result then error("str.reverse: " .. err, 2) end + return result + else + local result = {} + for i = #s, 1, -1 do + result[#result + 1] = s:sub(i, i) + end + return table.concat(result) + end end function str.length(s) if type(s) ~= "string" then error("str.length: argument must be a string", 2) end + + -- For long ASCII strings, Go is faster. For unicode or short strings, use Go consistently + -- since UTF-8 handling is more reliable in Go return moonshark.string_length(s) end @@ -225,32 +263,51 @@ function str.slice(s, start, end_pos) end -- ====================================================================== --- REGULAR EXPRESSIONS (Go Functions) +-- REGULAR EXPRESSIONS (Optimized Lua patterns) -- ====================================================================== function str.match(pattern, s) if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end - return moonshark.regex_match(pattern, s) + + local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s") + return s:match(lua_pattern) ~= nil end function str.find(pattern, s) if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end - return moonshark.regex_find(pattern, s) + + local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s") + return s:match(lua_pattern) end function str.find_all(pattern, s) if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end - return moonshark.regex_find_all(pattern, s) + + local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s") + local matches = {} + for match in s:gmatch(lua_pattern) do + table.insert(matches, match) + end + return matches end function str.gsub(pattern, s, replacement) if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end - return moonshark.regex_replace(pattern, s, replacement) + + -- Use Go for complex regex, Lua for simple patterns + if pattern:match("[%[%]%(%)%{%}%|%\\%^%$]") then + -- Complex pattern, use Go + return moonshark.regex_replace(pattern, s, replacement) + else + -- Simple pattern, use Lua + local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s") + return s:gsub(lua_pattern, replacement) + end end -- ====================================================================== @@ -479,9 +536,18 @@ function str.random(length, charset) if charset ~= nil and type(charset) ~= "string" then error("str.random: second argument must be a string", 2) end - local result, err = moonshark.random_string(length, charset) - if not result then error("str.random: " .. err, 2) end - return result + + charset = charset or "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + local result = {} + + math.randomseed(os.time() + os.clock() * 1000000) + + for i = 1, length do + local rand_index = math.random(1, #charset) + result[i] = charset:sub(rand_index, rand_index) + end + + return table.concat(result) end function str.slug(s)