optimize string module
This commit is contained in:
parent
25a44660a4
commit
4ceca8d289
@ -1,111 +1,23 @@
|
||||
package string
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
luajit "git.sharkk.net/Sky/LuaJIT-to-Go"
|
||||
)
|
||||
|
||||
const (
|
||||
maxStringLength = 10_000_000 // 10MB limit for safety
|
||||
maxRandomLength = 100_000 // Reasonable limit for random strings
|
||||
)
|
||||
|
||||
func GetFunctionList() map[string]luajit.GoFunction {
|
||||
return map[string]luajit.GoFunction{
|
||||
"string_split": string_split,
|
||||
"string_join": string_join,
|
||||
"string_slice": string_slice,
|
||||
"string_reverse": string_reverse,
|
||||
"string_length": string_length,
|
||||
"string_byte_length": string_byte_length,
|
||||
"regex_match": regex_match,
|
||||
"regex_find": regex_find,
|
||||
"regex_find_all": regex_find_all,
|
||||
"regex_replace": regex_replace,
|
||||
"random_string": random_string,
|
||||
"string_is_valid_utf8": string_is_valid_utf8,
|
||||
}
|
||||
}
|
||||
|
||||
func string_split(s *luajit.State) int {
|
||||
str := s.ToString(1)
|
||||
sep := s.ToString(2)
|
||||
|
||||
if len(str) > maxStringLength {
|
||||
s.PushNil()
|
||||
s.PushString("string too large")
|
||||
return 2
|
||||
}
|
||||
|
||||
// Handle empty separator - split into characters
|
||||
if sep == "" {
|
||||
runes := []rune(str)
|
||||
parts := make([]string, len(runes))
|
||||
for i, r := range runes {
|
||||
parts[i] = string(r)
|
||||
}
|
||||
s.PushValue(parts)
|
||||
return 1
|
||||
}
|
||||
|
||||
parts := strings.Split(str, sep)
|
||||
s.PushValue(parts)
|
||||
return 1
|
||||
}
|
||||
|
||||
func string_join(s *luajit.State) int {
|
||||
arr, err := s.ToValue(1)
|
||||
if err != nil {
|
||||
s.PushNil()
|
||||
s.PushString("invalid array")
|
||||
return 2
|
||||
}
|
||||
sep := s.ToString(2)
|
||||
|
||||
var parts []string
|
||||
switch v := arr.(type) {
|
||||
case []string:
|
||||
parts = v
|
||||
case []any:
|
||||
parts = make([]string, len(v))
|
||||
for i, val := range v {
|
||||
if val == nil {
|
||||
parts[i] = ""
|
||||
} else {
|
||||
parts[i] = s.ToString(-1) // Convert via Lua
|
||||
}
|
||||
}
|
||||
case map[string]any:
|
||||
// Empty table {} from Lua becomes map[string]any{}
|
||||
if len(v) == 0 {
|
||||
parts = []string{} // Empty array
|
||||
} else {
|
||||
s.PushNil()
|
||||
s.PushString("not an array")
|
||||
return 2
|
||||
}
|
||||
default:
|
||||
s.PushNil()
|
||||
s.PushString("not an array")
|
||||
return 2
|
||||
}
|
||||
|
||||
result := strings.Join(parts, sep)
|
||||
if len(result) > maxStringLength {
|
||||
s.PushNil()
|
||||
s.PushString("result too large")
|
||||
return 2
|
||||
}
|
||||
|
||||
s.PushString(result)
|
||||
return 1
|
||||
}
|
||||
|
||||
func string_slice(s *luajit.State) int {
|
||||
str := s.ToString(1)
|
||||
start := int(s.ToNumber(2))
|
||||
@ -182,58 +94,6 @@ func string_byte_length(s *luajit.State) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func regex_match(s *luajit.State) int {
|
||||
pattern := s.ToString(1)
|
||||
str := s.ToString(2)
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushBoolean(false)
|
||||
return 1
|
||||
}
|
||||
|
||||
s.PushBoolean(re.MatchString(str))
|
||||
return 1
|
||||
}
|
||||
|
||||
func regex_find(s *luajit.State) int {
|
||||
pattern := s.ToString(1)
|
||||
str := s.ToString(2)
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushNil()
|
||||
return 1
|
||||
}
|
||||
|
||||
match := re.FindString(str)
|
||||
if match == "" {
|
||||
s.PushNil()
|
||||
} else {
|
||||
s.PushString(match)
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func regex_find_all(s *luajit.State) int {
|
||||
pattern := s.ToString(1)
|
||||
str := s.ToString(2)
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
s.PushValue([]string{})
|
||||
return 1
|
||||
}
|
||||
|
||||
matches := re.FindAllString(str, -1)
|
||||
if matches == nil {
|
||||
matches = []string{}
|
||||
}
|
||||
|
||||
s.PushValue(matches)
|
||||
return 1
|
||||
}
|
||||
|
||||
func regex_replace(s *luajit.State) int {
|
||||
pattern := s.ToString(1)
|
||||
str := s.ToString(2)
|
||||
@ -250,47 +110,6 @@ func regex_replace(s *luajit.State) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func random_string(s *luajit.State) int {
|
||||
length := int(s.ToNumber(1))
|
||||
if length < 0 || length > maxRandomLength {
|
||||
s.PushNil()
|
||||
s.PushString("invalid length")
|
||||
return 2
|
||||
}
|
||||
|
||||
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
if s.GetTop() >= 2 && !s.IsNil(2) {
|
||||
charset = s.ToString(2)
|
||||
}
|
||||
|
||||
if length == 0 {
|
||||
s.PushString("")
|
||||
return 1
|
||||
}
|
||||
|
||||
if !utf8.ValidString(charset) {
|
||||
s.PushNil()
|
||||
s.PushString("invalid charset")
|
||||
return 2
|
||||
}
|
||||
|
||||
charsetRunes := []rune(charset)
|
||||
if len(charsetRunes) == 0 {
|
||||
s.PushNil()
|
||||
s.PushString("empty charset")
|
||||
return 2
|
||||
}
|
||||
|
||||
result := make([]rune, length)
|
||||
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for i := range result {
|
||||
result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
|
||||
}
|
||||
|
||||
s.PushString(string(result))
|
||||
return 1
|
||||
}
|
||||
|
||||
func string_is_valid_utf8(s *luajit.State) int {
|
||||
str := s.ToString(1)
|
||||
s.PushBoolean(utf8.ValidString(str))
|
||||
|
@ -1,21 +1,47 @@
|
||||
-- modules/string.lua - Enhanced string manipulation utilities
|
||||
|
||||
local str = {}
|
||||
|
||||
-- Performance thresholds based on benchmark results
|
||||
local REVERSE_THRESHOLD = 100 -- Use Go for strings longer than this
|
||||
local LENGTH_THRESHOLD = 1000 -- Use Go for ASCII strings longer than this
|
||||
|
||||
-- ======================================================================
|
||||
-- BASIC STRING OPERATIONS (Pure Lua)
|
||||
-- BASIC STRING OPERATIONS (Optimized Lua/Go hybrid)
|
||||
-- ======================================================================
|
||||
|
||||
function str.split(s, delimiter)
|
||||
if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end
|
||||
if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end
|
||||
return moonshark.string_split(s, delimiter)
|
||||
|
||||
if delimiter == "" then
|
||||
local result = {}
|
||||
for i = 1, #s do
|
||||
result[i] = s:sub(i, i)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
local result = {}
|
||||
local start = 1
|
||||
local delimiter_len = #delimiter
|
||||
|
||||
while true do
|
||||
local pos = s:find(delimiter, start, true)
|
||||
if not pos then
|
||||
table.insert(result, s:sub(start))
|
||||
break
|
||||
end
|
||||
table.insert(result, s:sub(start, pos - 1))
|
||||
start = pos + delimiter_len
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
function str.join(arr, separator)
|
||||
if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end
|
||||
if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end
|
||||
return moonshark.string_join(arr, separator)
|
||||
|
||||
return table.concat(arr, separator)
|
||||
end
|
||||
|
||||
function str.trim(s)
|
||||
@ -146,13 +172,25 @@ end
|
||||
|
||||
function str.reverse(s)
|
||||
if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end
|
||||
|
||||
if #s > REVERSE_THRESHOLD then
|
||||
local result, err = moonshark.string_reverse(s)
|
||||
if not result then error("str.reverse: " .. err, 2) end
|
||||
return result
|
||||
else
|
||||
local result = {}
|
||||
for i = #s, 1, -1 do
|
||||
result[#result + 1] = s:sub(i, i)
|
||||
end
|
||||
return table.concat(result)
|
||||
end
|
||||
end
|
||||
|
||||
function str.length(s)
|
||||
if type(s) ~= "string" then error("str.length: argument must be a string", 2) end
|
||||
|
||||
-- For long ASCII strings, Go is faster. For unicode or short strings, use Go consistently
|
||||
-- since UTF-8 handling is more reliable in Go
|
||||
return moonshark.string_length(s)
|
||||
end
|
||||
|
||||
@ -225,32 +263,51 @@ function str.slice(s, start, end_pos)
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
-- REGULAR EXPRESSIONS (Go Functions)
|
||||
-- REGULAR EXPRESSIONS (Optimized Lua patterns)
|
||||
-- ======================================================================
|
||||
|
||||
function str.match(pattern, s)
|
||||
if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end
|
||||
if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end
|
||||
return moonshark.regex_match(pattern, s)
|
||||
|
||||
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||
return s:match(lua_pattern) ~= nil
|
||||
end
|
||||
|
||||
function str.find(pattern, s)
|
||||
if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end
|
||||
if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end
|
||||
return moonshark.regex_find(pattern, s)
|
||||
|
||||
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||
return s:match(lua_pattern)
|
||||
end
|
||||
|
||||
function str.find_all(pattern, s)
|
||||
if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end
|
||||
if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end
|
||||
return moonshark.regex_find_all(pattern, s)
|
||||
|
||||
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||
local matches = {}
|
||||
for match in s:gmatch(lua_pattern) do
|
||||
table.insert(matches, match)
|
||||
end
|
||||
return matches
|
||||
end
|
||||
|
||||
function str.gsub(pattern, s, replacement)
|
||||
if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end
|
||||
if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end
|
||||
if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end
|
||||
|
||||
-- Use Go for complex regex, Lua for simple patterns
|
||||
if pattern:match("[%[%]%(%)%{%}%|%\\%^%$]") then
|
||||
-- Complex pattern, use Go
|
||||
return moonshark.regex_replace(pattern, s, replacement)
|
||||
else
|
||||
-- Simple pattern, use Lua
|
||||
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||
return s:gsub(lua_pattern, replacement)
|
||||
end
|
||||
end
|
||||
|
||||
-- ======================================================================
|
||||
@ -479,9 +536,18 @@ function str.random(length, charset)
|
||||
if charset ~= nil and type(charset) ~= "string" then
|
||||
error("str.random: second argument must be a string", 2)
|
||||
end
|
||||
local result, err = moonshark.random_string(length, charset)
|
||||
if not result then error("str.random: " .. err, 2) end
|
||||
return result
|
||||
|
||||
charset = charset or "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
local result = {}
|
||||
|
||||
math.randomseed(os.time() + os.clock() * 1000000)
|
||||
|
||||
for i = 1, length do
|
||||
local rand_index = math.random(1, #charset)
|
||||
result[i] = charset:sub(rand_index, rand_index)
|
||||
end
|
||||
|
||||
return table.concat(result)
|
||||
end
|
||||
|
||||
function str.slug(s)
|
||||
|
Loading…
x
Reference in New Issue
Block a user