optimize string module
This commit is contained in:
parent
25a44660a4
commit
4ceca8d289
@ -1,111 +1,23 @@
|
|||||||
package string
|
package string
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math/rand"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
luajit "git.sharkk.net/Sky/LuaJIT-to-Go"
|
luajit "git.sharkk.net/Sky/LuaJIT-to-Go"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
maxStringLength = 10_000_000 // 10MB limit for safety
|
|
||||||
maxRandomLength = 100_000 // Reasonable limit for random strings
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetFunctionList() map[string]luajit.GoFunction {
|
func GetFunctionList() map[string]luajit.GoFunction {
|
||||||
return map[string]luajit.GoFunction{
|
return map[string]luajit.GoFunction{
|
||||||
"string_split": string_split,
|
|
||||||
"string_join": string_join,
|
|
||||||
"string_slice": string_slice,
|
"string_slice": string_slice,
|
||||||
"string_reverse": string_reverse,
|
"string_reverse": string_reverse,
|
||||||
"string_length": string_length,
|
"string_length": string_length,
|
||||||
"string_byte_length": string_byte_length,
|
"string_byte_length": string_byte_length,
|
||||||
"regex_match": regex_match,
|
|
||||||
"regex_find": regex_find,
|
|
||||||
"regex_find_all": regex_find_all,
|
|
||||||
"regex_replace": regex_replace,
|
"regex_replace": regex_replace,
|
||||||
"random_string": random_string,
|
|
||||||
"string_is_valid_utf8": string_is_valid_utf8,
|
"string_is_valid_utf8": string_is_valid_utf8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func string_split(s *luajit.State) int {
|
|
||||||
str := s.ToString(1)
|
|
||||||
sep := s.ToString(2)
|
|
||||||
|
|
||||||
if len(str) > maxStringLength {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("string too large")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle empty separator - split into characters
|
|
||||||
if sep == "" {
|
|
||||||
runes := []rune(str)
|
|
||||||
parts := make([]string, len(runes))
|
|
||||||
for i, r := range runes {
|
|
||||||
parts[i] = string(r)
|
|
||||||
}
|
|
||||||
s.PushValue(parts)
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
parts := strings.Split(str, sep)
|
|
||||||
s.PushValue(parts)
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func string_join(s *luajit.State) int {
|
|
||||||
arr, err := s.ToValue(1)
|
|
||||||
if err != nil {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("invalid array")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
sep := s.ToString(2)
|
|
||||||
|
|
||||||
var parts []string
|
|
||||||
switch v := arr.(type) {
|
|
||||||
case []string:
|
|
||||||
parts = v
|
|
||||||
case []any:
|
|
||||||
parts = make([]string, len(v))
|
|
||||||
for i, val := range v {
|
|
||||||
if val == nil {
|
|
||||||
parts[i] = ""
|
|
||||||
} else {
|
|
||||||
parts[i] = s.ToString(-1) // Convert via Lua
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case map[string]any:
|
|
||||||
// Empty table {} from Lua becomes map[string]any{}
|
|
||||||
if len(v) == 0 {
|
|
||||||
parts = []string{} // Empty array
|
|
||||||
} else {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("not an array")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("not an array")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
result := strings.Join(parts, sep)
|
|
||||||
if len(result) > maxStringLength {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("result too large")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
s.PushString(result)
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func string_slice(s *luajit.State) int {
|
func string_slice(s *luajit.State) int {
|
||||||
str := s.ToString(1)
|
str := s.ToString(1)
|
||||||
start := int(s.ToNumber(2))
|
start := int(s.ToNumber(2))
|
||||||
@ -182,58 +94,6 @@ func string_byte_length(s *luajit.State) int {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func regex_match(s *luajit.State) int {
|
|
||||||
pattern := s.ToString(1)
|
|
||||||
str := s.ToString(2)
|
|
||||||
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
s.PushBoolean(false)
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
s.PushBoolean(re.MatchString(str))
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func regex_find(s *luajit.State) int {
|
|
||||||
pattern := s.ToString(1)
|
|
||||||
str := s.ToString(2)
|
|
||||||
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
s.PushNil()
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
match := re.FindString(str)
|
|
||||||
if match == "" {
|
|
||||||
s.PushNil()
|
|
||||||
} else {
|
|
||||||
s.PushString(match)
|
|
||||||
}
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func regex_find_all(s *luajit.State) int {
|
|
||||||
pattern := s.ToString(1)
|
|
||||||
str := s.ToString(2)
|
|
||||||
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
s.PushValue([]string{})
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
matches := re.FindAllString(str, -1)
|
|
||||||
if matches == nil {
|
|
||||||
matches = []string{}
|
|
||||||
}
|
|
||||||
|
|
||||||
s.PushValue(matches)
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func regex_replace(s *luajit.State) int {
|
func regex_replace(s *luajit.State) int {
|
||||||
pattern := s.ToString(1)
|
pattern := s.ToString(1)
|
||||||
str := s.ToString(2)
|
str := s.ToString(2)
|
||||||
@ -250,47 +110,6 @@ func regex_replace(s *luajit.State) int {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func random_string(s *luajit.State) int {
|
|
||||||
length := int(s.ToNumber(1))
|
|
||||||
if length < 0 || length > maxRandomLength {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("invalid length")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
|
||||||
if s.GetTop() >= 2 && !s.IsNil(2) {
|
|
||||||
charset = s.ToString(2)
|
|
||||||
}
|
|
||||||
|
|
||||||
if length == 0 {
|
|
||||||
s.PushString("")
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if !utf8.ValidString(charset) {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("invalid charset")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
charsetRunes := []rune(charset)
|
|
||||||
if len(charsetRunes) == 0 {
|
|
||||||
s.PushNil()
|
|
||||||
s.PushString("empty charset")
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
result := make([]rune, length)
|
|
||||||
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
||||||
for i := range result {
|
|
||||||
result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
|
|
||||||
}
|
|
||||||
|
|
||||||
s.PushString(string(result))
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func string_is_valid_utf8(s *luajit.State) int {
|
func string_is_valid_utf8(s *luajit.State) int {
|
||||||
str := s.ToString(1)
|
str := s.ToString(1)
|
||||||
s.PushBoolean(utf8.ValidString(str))
|
s.PushBoolean(utf8.ValidString(str))
|
||||||
|
@ -1,21 +1,47 @@
|
|||||||
-- modules/string.lua - Enhanced string manipulation utilities
|
|
||||||
|
|
||||||
local str = {}
|
local str = {}
|
||||||
|
|
||||||
|
-- Performance thresholds based on benchmark results
|
||||||
|
local REVERSE_THRESHOLD = 100 -- Use Go for strings longer than this
|
||||||
|
local LENGTH_THRESHOLD = 1000 -- Use Go for ASCII strings longer than this
|
||||||
|
|
||||||
-- ======================================================================
|
-- ======================================================================
|
||||||
-- BASIC STRING OPERATIONS (Pure Lua)
|
-- BASIC STRING OPERATIONS (Optimized Lua/Go hybrid)
|
||||||
-- ======================================================================
|
-- ======================================================================
|
||||||
|
|
||||||
function str.split(s, delimiter)
|
function str.split(s, delimiter)
|
||||||
if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.split: first argument must be a string", 2) end
|
||||||
if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end
|
if type(delimiter) ~= "string" then error("str.split: second argument must be a string", 2) end
|
||||||
return moonshark.string_split(s, delimiter)
|
|
||||||
|
if delimiter == "" then
|
||||||
|
local result = {}
|
||||||
|
for i = 1, #s do
|
||||||
|
result[i] = s:sub(i, i)
|
||||||
|
end
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
local result = {}
|
||||||
|
local start = 1
|
||||||
|
local delimiter_len = #delimiter
|
||||||
|
|
||||||
|
while true do
|
||||||
|
local pos = s:find(delimiter, start, true)
|
||||||
|
if not pos then
|
||||||
|
table.insert(result, s:sub(start))
|
||||||
|
break
|
||||||
|
end
|
||||||
|
table.insert(result, s:sub(start, pos - 1))
|
||||||
|
start = pos + delimiter_len
|
||||||
|
end
|
||||||
|
|
||||||
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.join(arr, separator)
|
function str.join(arr, separator)
|
||||||
if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end
|
if type(arr) ~= "table" then error("str.join: first argument must be a table", 2) end
|
||||||
if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end
|
if type(separator) ~= "string" then error("str.join: second argument must be a string", 2) end
|
||||||
return moonshark.string_join(arr, separator)
|
|
||||||
|
return table.concat(arr, separator)
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.trim(s)
|
function str.trim(s)
|
||||||
@ -146,13 +172,25 @@ end
|
|||||||
|
|
||||||
function str.reverse(s)
|
function str.reverse(s)
|
||||||
if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.reverse: argument must be a string", 2) end
|
||||||
local result, err = moonshark.string_reverse(s)
|
|
||||||
if not result then error("str.reverse: " .. err, 2) end
|
if #s > REVERSE_THRESHOLD then
|
||||||
return result
|
local result, err = moonshark.string_reverse(s)
|
||||||
|
if not result then error("str.reverse: " .. err, 2) end
|
||||||
|
return result
|
||||||
|
else
|
||||||
|
local result = {}
|
||||||
|
for i = #s, 1, -1 do
|
||||||
|
result[#result + 1] = s:sub(i, i)
|
||||||
|
end
|
||||||
|
return table.concat(result)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.length(s)
|
function str.length(s)
|
||||||
if type(s) ~= "string" then error("str.length: argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.length: argument must be a string", 2) end
|
||||||
|
|
||||||
|
-- For long ASCII strings, Go is faster. For unicode or short strings, use Go consistently
|
||||||
|
-- since UTF-8 handling is more reliable in Go
|
||||||
return moonshark.string_length(s)
|
return moonshark.string_length(s)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -225,32 +263,51 @@ function str.slice(s, start, end_pos)
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- ======================================================================
|
-- ======================================================================
|
||||||
-- REGULAR EXPRESSIONS (Go Functions)
|
-- REGULAR EXPRESSIONS (Optimized Lua patterns)
|
||||||
-- ======================================================================
|
-- ======================================================================
|
||||||
|
|
||||||
function str.match(pattern, s)
|
function str.match(pattern, s)
|
||||||
if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end
|
if type(pattern) ~= "string" then error("str.match: first argument must be a string", 2) end
|
||||||
if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.match: second argument must be a string", 2) end
|
||||||
return moonshark.regex_match(pattern, s)
|
|
||||||
|
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||||
|
return s:match(lua_pattern) ~= nil
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.find(pattern, s)
|
function str.find(pattern, s)
|
||||||
if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end
|
if type(pattern) ~= "string" then error("str.find: first argument must be a string", 2) end
|
||||||
if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.find: second argument must be a string", 2) end
|
||||||
return moonshark.regex_find(pattern, s)
|
|
||||||
|
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||||
|
return s:match(lua_pattern)
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.find_all(pattern, s)
|
function str.find_all(pattern, s)
|
||||||
if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end
|
if type(pattern) ~= "string" then error("str.find_all: first argument must be a string", 2) end
|
||||||
if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.find_all: second argument must be a string", 2) end
|
||||||
return moonshark.regex_find_all(pattern, s)
|
|
||||||
|
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||||
|
local matches = {}
|
||||||
|
for match in s:gmatch(lua_pattern) do
|
||||||
|
table.insert(matches, match)
|
||||||
|
end
|
||||||
|
return matches
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.gsub(pattern, s, replacement)
|
function str.gsub(pattern, s, replacement)
|
||||||
if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end
|
if type(pattern) ~= "string" then error("str.gsub: first argument must be a string", 2) end
|
||||||
if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end
|
if type(s) ~= "string" then error("str.gsub: second argument must be a string", 2) end
|
||||||
if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end
|
if type(replacement) ~= "string" then error("str.gsub: third argument must be a string", 2) end
|
||||||
return moonshark.regex_replace(pattern, s, replacement)
|
|
||||||
|
-- Use Go for complex regex, Lua for simple patterns
|
||||||
|
if pattern:match("[%[%]%(%)%{%}%|%\\%^%$]") then
|
||||||
|
-- Complex pattern, use Go
|
||||||
|
return moonshark.regex_replace(pattern, s, replacement)
|
||||||
|
else
|
||||||
|
-- Simple pattern, use Lua
|
||||||
|
local lua_pattern = pattern:gsub("\\d", "%%d"):gsub("\\w", "%%w"):gsub("\\s", "%%s")
|
||||||
|
return s:gsub(lua_pattern, replacement)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- ======================================================================
|
-- ======================================================================
|
||||||
@ -479,9 +536,18 @@ function str.random(length, charset)
|
|||||||
if charset ~= nil and type(charset) ~= "string" then
|
if charset ~= nil and type(charset) ~= "string" then
|
||||||
error("str.random: second argument must be a string", 2)
|
error("str.random: second argument must be a string", 2)
|
||||||
end
|
end
|
||||||
local result, err = moonshark.random_string(length, charset)
|
|
||||||
if not result then error("str.random: " .. err, 2) end
|
charset = charset or "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
return result
|
local result = {}
|
||||||
|
|
||||||
|
math.randomseed(os.time() + os.clock() * 1000000)
|
||||||
|
|
||||||
|
for i = 1, length do
|
||||||
|
local rand_index = math.random(1, #charset)
|
||||||
|
result[i] = charset:sub(rand_index, rand_index)
|
||||||
|
end
|
||||||
|
|
||||||
|
return table.concat(result)
|
||||||
end
|
end
|
||||||
|
|
||||||
function str.slug(s)
|
function str.slug(s)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user