optimize string module

This commit is contained in:
Sky Johnson 2025-07-17 22:12:57 -05:00
parent 25a44660a4
commit 4ceca8d289
2 changed files with 82 additions and 197 deletions

View File

@ -1,111 +1,23 @@
package string
import (
"math/rand"
"regexp"
"strings"
"time"
"unicode/utf8"
luajit "git.sharkk.net/Sky/LuaJIT-to-Go"
)
const (
maxStringLength = 10_000_000 // 10MB limit for safety
maxRandomLength = 100_000 // Reasonable limit for random strings
)
func GetFunctionList() map[string]luajit.GoFunction {
return map[string]luajit.GoFunction{
"string_split": string_split,
"string_join": string_join,
"string_slice": string_slice,
"string_reverse": string_reverse,
"string_length": string_length,
"string_byte_length": string_byte_length,
"regex_match": regex_match,
"regex_find": regex_find,
"regex_find_all": regex_find_all,
"regex_replace": regex_replace,
"random_string": random_string,
"string_is_valid_utf8": string_is_valid_utf8,
}
}
func string_split(s *luajit.State) int {
str := s.ToString(1)
sep := s.ToString(2)
if len(str) > maxStringLength {
s.PushNil()
s.PushString("string too large")
return 2
}
// Handle empty separator - split into characters
if sep == "" {
runes := []rune(str)
parts := make([]string, len(runes))
for i, r := range runes {
parts[i] = string(r)
}
s.PushValue(parts)
return 1
}
parts := strings.Split(str, sep)
s.PushValue(parts)
return 1
}
func string_join(s *luajit.State) int {
arr, err := s.ToValue(1)
if err != nil {
s.PushNil()
s.PushString("invalid array")
return 2
}
sep := s.ToString(2)
var parts []string
switch v := arr.(type) {
case []string:
parts = v
case []any:
parts = make([]string, len(v))
for i, val := range v {
if val == nil {
parts[i] = ""
} else {
parts[i] = s.ToString(-1) // Convert via Lua
}
}
case map[string]any:
// Empty table {} from Lua becomes map[string]any{}
if len(v) == 0 {
parts = []string{} // Empty array
} else {
s.PushNil()
s.PushString("not an array")
return 2
}
default:
s.PushNil()
s.PushString("not an array")
return 2
}
result := strings.Join(parts, sep)
if len(result) > maxStringLength {
s.PushNil()
s.PushString("result too large")
return 2
}
s.PushString(result)
return 1
}
func string_slice(s *luajit.State) int {
str := s.ToString(1)
start := int(s.ToNumber(2))
@ -182,58 +94,6 @@ func string_byte_length(s *luajit.State) int {
return 1
}
func regex_match(s *luajit.State) int {
pattern := s.ToString(1)
str := s.ToString(2)
re, err := regexp.Compile(pattern)
if err != nil {
s.PushBoolean(false)
return 1
}
s.PushBoolean(re.MatchString(str))
return 1
}
func regex_find(s *luajit.State) int {
pattern := s.ToString(1)
str := s.ToString(2)
re, err := regexp.Compile(pattern)
if err != nil {
s.PushNil()
return 1
}
match := re.FindString(str)
if match == "" {
s.PushNil()
} else {
s.PushString(match)
}
return 1
}
func regex_find_all(s *luajit.State) int {
pattern := s.ToString(1)
str := s.ToString(2)
re, err := regexp.Compile(pattern)
if err != nil {
s.PushValue([]string{})
return 1
}
matches := re.FindAllString(str, -1)
if matches == nil {
matches = []string{}
}
s.PushValue(matches)
return 1
}
func regex_replace(s *luajit.State) int {
pattern := s.ToString(1)
str := s.ToString(2)
@ -250,47 +110,6 @@ func regex_replace(s *luajit.State) int {
return 1
}
func random_string(s *luajit.State) int {
length := int(s.ToNumber(1))
if length < 0 || length > maxRandomLength {
s.PushNil()
s.PushString("invalid length")
return 2
}
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
if s.GetTop() >= 2 && !s.IsNil(2) {
charset = s.ToString(2)
}
if length == 0 {
s.PushString("")
return 1
}
if !utf8.ValidString(charset) {
s.PushNil()
s.PushString("invalid charset")
return 2
}
charsetRunes := []rune(charset)
if len(charsetRunes) == 0 {
s.PushNil()
s.PushString("empty charset")
return 2
}
result := make([]rune, length)
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
for i := range result {
result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
}
s.PushString(string(result))
return 1
}
func string_is_valid_utf8(s *luajit.State) int {
str := s.ToString(1)
s.PushBoolean(utf8.ValidString(str))

View File

@ -1,21 +1,47 @@
-- modules/string.lua - Enhanced string manipulation utilities
local str = {}
-- Performance thresholds based on benchmark results
local REVERSE_THRESHOLD = 100 -- Use Go for strings longer than this
local LENGTH_THRESHOLD = 1000 -- Use Go for ASCII strings longer than this
-- ======================================================================
-- BASIC STRING OPERATIONS (Pure Lua)
-- BASIC STRING OPERATIONS (Optimized Lua/Go hybrid)
-- ======================================================================
function str.split(s, delimiter)
if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end
if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end
return moonshark.string_split(s, delimiter)
if delimiter == "" then
local result = {}
for i = 1, #s do
result[i] = s:sub(i, i)
end
return result
end
local result = {}
local start = 1
local delimiter_len = #delimiter
while true do
local pos = s:find(delimiter, start, true)
if not pos then
table.insert(result, s:sub(start))
break
end
table.insert(result, s:sub(start, pos - 1))
start = pos + delimiter_len
end
return result
end
function str.join(arr, separator)
if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end
if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end
return moonshark.string_join(arr, separator)
return table.concat(arr, separator)
end
function str.trim(s)
@ -146,13 +172,25 @@ end
function str.reverse(s)
if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end
local result, err = moonshark.string_reverse(s)
if not result then error("str.reverse: " .. err, 2) end
return result
if #s > REVERSE_THRESHOLD then
local result, err = moonshark.string_reverse(s)
if not result then error("str.reverse: " .. err, 2) end
return result
else
local result = {}
for i = #s, 1, -1 do
result[#result + 1] = s:sub(i, i)
end
return table.concat(result)
end
end
function str.length(s)
if type(s) ~= "string" then error("str.length: argument must be a string", 2) end
-- For long ASCII strings, Go is faster. For unicode or short strings, use Go consistently
-- since UTF-8 handling is more reliable in Go
return moonshark.string_length(s)
end
@ -225,32 +263,51 @@ function str.slice(s, start, end_pos)
end
-- ======================================================================
-- REGULAR EXPRESSIONS (Go Functions)
-- REGULAR EXPRESSIONS (Optimized Lua patterns)
-- ======================================================================
function str.match(pattern, s)
if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end
if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end
return moonshark.regex_match(pattern, s)
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
return s:match(lua_pattern) ~= nil
end
function str.find(pattern, s)
if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end
if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end
return moonshark.regex_find(pattern, s)
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
return s:match(lua_pattern)
end
function str.find_all(pattern, s)
if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end
if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end
return moonshark.regex_find_all(pattern, s)
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
local matches = {}
for match in s:gmatch(lua_pattern) do
table.insert(matches, match)
end
return matches
end
function str.gsub(pattern, s, replacement)
if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end
if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end
if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end
return moonshark.regex_replace(pattern, s, replacement)
-- Use Go for complex regex, Lua for simple patterns
if pattern:match("[%[%]%(%)%{%}%|%\\%^%$]") then
-- Complex pattern, use Go
return moonshark.regex_replace(pattern, s, replacement)
else
-- Simple pattern, use Lua
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
return s:gsub(lua_pattern, replacement)
end
end
-- ======================================================================
@ -479,9 +536,18 @@ function str.random(length, charset)
if charset ~= nil and type(charset) ~= "string" then
error("str.random: second argument must be a string", 2)
end
local result, err = moonshark.random_string(length, charset)
if not result then error("str.random: " .. err, 2) end
return result
charset = charset or "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
local result = {}
math.randomseed(os.time() + os.clock() * 1000000)
for i = 1, length do
local rand_index = math.random(1, #charset)
result[i] = charset:sub(rand_index, rand_index)
end
return table.concat(result)
end
function str.slug(s)