Moonshark/functions/string.go
2025-07-16 20:54:15 -05:00

895 lines
23 KiB
Go

package functions
import (
"fmt"
"math/rand"
"regexp"
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"
luajit "git.sharkk.net/Sky/LuaJIT-to-Go"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
const (
maxStringLength = 10_000_000 // 10MB limit for safety
maxRepeatCount = 1_000_000 // Prevent excessive memory usage
maxRandomLength = 100_000 // Reasonable limit for random strings
)
func validateStringLength(s string) error {
if len(s) > maxStringLength {
return fmt.Errorf("string too large (max %d bytes)", maxStringLength)
}
return nil
}
func GetStringFunctions() map[string]luajit.GoFunction {
return map[string]luajit.GoFunction{
"string_split": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_split: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_split: first argument must be a string")
}
sep, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_split: second argument must be a string")
}
if err := validateStringLength(str); err != nil {
return s.PushError("string_split: %v", err)
}
// Handle empty separator - split into characters
if sep == "" {
runes := []rune(str)
parts := make([]string, len(runes))
for i, r := range runes {
parts[i] = string(r)
}
if err := s.PushValue(parts); err != nil {
return s.PushError("string_split: failed to push result: %v", err)
}
return 1
}
parts := strings.Split(str, sep)
if err := s.PushValue(parts); err != nil {
return s.PushError("string_split: failed to push result: %v", err)
}
return 1
},
"string_join": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_join: %v", err)
}
arr, err := s.SafeToTable(1)
if err != nil {
return s.PushError("string_join: first argument must be a table")
}
sep, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_join: second argument must be a string")
}
var parts []string
switch v := arr.(type) {
case []string:
parts = v
case []any:
parts = make([]string, len(v))
for i, val := range v {
if val == nil {
parts[i] = ""
} else {
parts[i] = fmt.Sprintf("%v", val)
}
}
case map[string]any:
if len(v) == 0 {
parts = []string{}
} else {
return s.PushError("string_join: first argument must be an array, not a map")
}
default:
return s.PushError("string_join: first argument must be an array")
}
result := strings.Join(parts, sep)
if err := validateStringLength(result); err != nil {
return s.PushError("string_join: result %v", err)
}
s.PushString(result)
return 1
},
"string_trim": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_trim: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_trim: argument must be a string")
}
s.PushString(strings.TrimSpace(str))
return 1
},
"string_trim_left": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_trim_left: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_trim_left: first argument must be a string")
}
if s.GetTop() >= 2 && !s.IsNil(2) {
cutset, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_trim_left: second argument must be a string")
}
s.PushString(strings.TrimLeft(str, cutset))
} else {
s.PushString(strings.TrimLeftFunc(str, unicode.IsSpace))
}
return 1
},
"string_trim_right": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_trim_right: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_trim_right: first argument must be a string")
}
if s.GetTop() >= 2 && !s.IsNil(2) {
cutset, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_trim_right: second argument must be a string")
}
s.PushString(strings.TrimRight(str, cutset))
} else {
s.PushString(strings.TrimRightFunc(str, unicode.IsSpace))
}
return 1
},
"string_upper": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_upper: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_upper: argument must be a string")
}
s.PushString(strings.ToUpper(str))
return 1
},
"string_lower": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_lower: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_lower: argument must be a string")
}
s.PushString(strings.ToLower(str))
return 1
},
"string_title": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_title: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_title: argument must be a string")
}
caser := cases.Title(language.English, cases.NoLower)
s.PushString(caser.String(str))
return 1
},
"string_contains": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_contains: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_contains: first argument must be a string")
}
substr, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_contains: second argument must be a string")
}
s.PushBoolean(strings.Contains(str, substr))
return 1
},
"string_starts_with": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_starts_with: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_starts_with: first argument must be a string")
}
prefix, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_starts_with: second argument must be a string")
}
s.PushBoolean(strings.HasPrefix(str, prefix))
return 1
},
"string_ends_with": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_ends_with: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_ends_with: first argument must be a string")
}
suffix, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_ends_with: second argument must be a string")
}
s.PushBoolean(strings.HasSuffix(str, suffix))
return 1
},
"string_replace": func(s *luajit.State) int {
if err := s.CheckExactArgs(3); err != nil {
return s.PushError("string_replace: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_replace: first argument must be a string")
}
old, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_replace: second argument must be a string")
}
new, err := s.SafeToString(3)
if err != nil {
return s.PushError("string_replace: third argument must be a string")
}
if old == "" {
return s.PushError("string_replace: cannot replace empty string")
}
result := strings.ReplaceAll(str, old, new)
if err := validateStringLength(result); err != nil {
return s.PushError("string_replace: result %v", err)
}
s.PushString(result)
return 1
},
"string_replace_n": func(s *luajit.State) int {
if err := s.CheckExactArgs(4); err != nil {
return s.PushError("string_replace_n: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_replace_n: first argument must be a string")
}
old, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_replace_n: second argument must be a string")
}
new, err := s.SafeToString(3)
if err != nil {
return s.PushError("string_replace_n: third argument must be a string")
}
n, err := s.SafeToNumber(4)
if err != nil || n != float64(int(n)) || n < 0 {
return s.PushError("string_replace_n: fourth argument must be a non-negative integer")
}
if old == "" {
return s.PushError("string_replace_n: cannot replace empty string")
}
result := strings.Replace(str, old, new, int(n))
s.PushString(result)
return 1
},
"string_index": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_index: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_index: first argument must be a string")
}
substr, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_index: second argument must be a string")
}
if substr == "" {
s.PushNumber(1) // Empty string found at position 1
return 1
}
index := strings.Index(str, substr)
if index == -1 {
s.PushNumber(0) // Not found
} else {
s.PushNumber(float64(index + 1)) // Convert to 1-indexed
}
return 1
},
"string_last_index": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_last_index: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_last_index: first argument must be a string")
}
substr, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_last_index: second argument must be a string")
}
if substr == "" {
s.PushNumber(float64(utf8.RuneCountInString(str) + 1)) // Empty string at end
return 1
}
index := strings.LastIndex(str, substr)
if index == -1 {
s.PushNumber(0) // Not found
} else {
s.PushNumber(float64(index + 1)) // Convert to 1-indexed
}
return 1
},
"string_count": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_count: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_count: first argument must be a string")
}
substr, err := s.SafeToString(2)
if err != nil {
return s.PushError("string_count: second argument must be a string")
}
if substr == "" {
// Empty string matches at every position including boundaries
s.PushNumber(float64(utf8.RuneCountInString(str) + 1))
return 1
}
count := strings.Count(str, substr)
s.PushNumber(float64(count))
return 1
},
"string_repeat": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("string_repeat: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_repeat: first argument must be a string")
}
count, err := s.SafeToNumber(2)
if err != nil || count < 0 || count != float64(int(count)) {
return s.PushError("string_repeat: second argument must be a non-negative integer")
}
n := int(count)
if n == 0 {
s.PushString("")
return 1
}
// Check for potential overflow
if len(str) > 0 && n > maxRepeatCount/len(str) {
return s.PushError("string_repeat: result would be too large")
}
result := strings.Repeat(str, n)
s.PushString(result)
return 1
},
"string_reverse": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_reverse: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_reverse: argument must be a string")
}
if !utf8.ValidString(str) {
return s.PushError("string_reverse: invalid UTF-8 string")
}
runes := []rune(str)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
s.PushString(string(runes))
return 1
},
"string_length": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_length: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_length: argument must be a string")
}
s.PushNumber(float64(utf8.RuneCountInString(str)))
return 1
},
"string_byte_length": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_byte_length: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_byte_length: argument must be a string")
}
s.PushNumber(float64(len(str)))
return 1
},
"string_lines": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_lines: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_lines: argument must be a string")
}
// Handle different line endings
str = strings.ReplaceAll(str, "\r\n", "\n")
str = strings.ReplaceAll(str, "\r", "\n")
lines := strings.Split(str, "\n")
if err := s.PushValue(lines); err != nil {
return s.PushError("string_lines: failed to push result: %v", err)
}
return 1
},
"string_words": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_words: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_words: argument must be a string")
}
words := strings.Fields(str)
if err := s.PushValue(words); err != nil {
return s.PushError("string_words: failed to push result: %v", err)
}
return 1
},
"string_pad_left": func(s *luajit.State) int {
if err := s.CheckMinArgs(2); err != nil {
return s.PushError("string_pad_left: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_pad_left: first argument must be a string")
}
width, err := s.SafeToNumber(2)
if err != nil || width != float64(int(width)) || width < 0 {
return s.PushError("string_pad_left: second argument must be a non-negative integer")
}
padChar := " "
if s.GetTop() >= 3 && !s.IsNil(3) {
if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
runes := []rune(p)
padChar = string(runes[0])
}
}
currentLen := utf8.RuneCountInString(str)
targetLen := int(width)
if currentLen >= targetLen {
s.PushString(str)
return 1
}
padLen := targetLen - currentLen
if padLen > maxRepeatCount {
return s.PushError("string_pad_left: padding too large")
}
padding := strings.Repeat(padChar, padLen)
s.PushString(padding + str)
return 1
},
"string_pad_right": func(s *luajit.State) int {
if err := s.CheckMinArgs(2); err != nil {
return s.PushError("string_pad_right: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_pad_right: first argument must be a string")
}
width, err := s.SafeToNumber(2)
if err != nil || width != float64(int(width)) || width < 0 {
return s.PushError("string_pad_right: second argument must be a non-negative integer")
}
padChar := " "
if s.GetTop() >= 3 && !s.IsNil(3) {
if p, err := s.SafeToString(3); err == nil && utf8.RuneCountInString(p) > 0 {
runes := []rune(p)
padChar = string(runes[0])
}
}
currentLen := utf8.RuneCountInString(str)
targetLen := int(width)
if currentLen >= targetLen {
s.PushString(str)
return 1
}
padLen := targetLen - currentLen
if padLen > maxRepeatCount {
return s.PushError("string_pad_right: padding too large")
}
padding := strings.Repeat(padChar, padLen)
s.PushString(str + padding)
return 1
},
"string_slice": func(s *luajit.State) int {
if err := s.CheckMinArgs(2); err != nil {
return s.PushError("string_slice: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_slice: first argument must be a string")
}
start, err := s.SafeToNumber(2)
if err != nil || start != float64(int(start)) {
return s.PushError("string_slice: second argument must be an integer")
}
if !utf8.ValidString(str) {
return s.PushError("string_slice: invalid UTF-8 string")
}
runes := []rune(str)
length := len(runes)
startIdx := int(start) - 1 // Convert from 1-indexed to 0-indexed
// Handle negative start index
if startIdx < 0 {
startIdx = 0
}
if startIdx >= length {
s.PushString("")
return 1
}
endIdx := length
if s.GetTop() >= 3 && !s.IsNil(3) {
end, err := s.SafeToNumber(3)
if err == nil && end == float64(int(end)) {
endIdx = int(end)
// Handle negative end index (from end of string)
if endIdx < 0 {
endIdx = length + endIdx + 1
}
if endIdx < 0 {
endIdx = 0
}
if endIdx > length {
endIdx = length
}
}
}
if startIdx >= endIdx {
s.PushString("")
return 1
}
s.PushString(string(runes[startIdx:endIdx]))
return 1
},
"regex_match": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("regex_match: %v", err)
}
pattern, err := s.SafeToString(1)
if err != nil {
return s.PushError("regex_match: first argument must be a string")
}
str, err := s.SafeToString(2)
if err != nil {
return s.PushError("regex_match: second argument must be a string")
}
re, err := regexp.Compile(pattern)
if err != nil {
s.PushBoolean(false)
return 1
}
s.PushBoolean(re.MatchString(str))
return 1
},
"regex_find": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("regex_find: %v", err)
}
pattern, err := s.SafeToString(1)
if err != nil {
return s.PushError("regex_find: first argument must be a string")
}
str, err := s.SafeToString(2)
if err != nil {
return s.PushError("regex_find: second argument must be a string")
}
re, err := regexp.Compile(pattern)
if err != nil {
s.PushNil()
return 1
}
match := re.FindString(str)
if match == "" {
s.PushNil()
} else {
s.PushString(match)
}
return 1
},
"regex_find_all": func(s *luajit.State) int {
if err := s.CheckExactArgs(2); err != nil {
return s.PushError("regex_find_all: %v", err)
}
pattern, err := s.SafeToString(1)
if err != nil {
return s.PushError("regex_find_all: first argument must be a string")
}
str, err := s.SafeToString(2)
if err != nil {
return s.PushError("regex_find_all: second argument must be a string")
}
re, err := regexp.Compile(pattern)
if err != nil {
// Return empty array for invalid patterns
if err := s.PushValue([]string{}); err != nil {
return s.PushError("regex_find_all: failed to push result: %v", err)
}
return 1
}
matches := re.FindAllString(str, -1)
if matches == nil {
matches = []string{} // Return empty array instead of nil
}
if err := s.PushValue(matches); err != nil {
return s.PushError("regex_find_all: failed to push result: %v", err)
}
return 1
},
"regex_replace": func(s *luajit.State) int {
if err := s.CheckExactArgs(3); err != nil {
return s.PushError("regex_replace: %v", err)
}
pattern, err := s.SafeToString(1)
if err != nil {
return s.PushError("regex_replace: first argument must be a string")
}
str, err := s.SafeToString(2)
if err != nil {
return s.PushError("regex_replace: second argument must be a string")
}
replacement, err := s.SafeToString(3)
if err != nil {
return s.PushError("regex_replace: third argument must be a string")
}
re, err := regexp.Compile(pattern)
if err != nil {
// Return original string for invalid patterns
s.PushString(str)
return 1
}
result := re.ReplaceAllString(str, replacement)
s.PushString(result)
return 1
},
"string_to_number": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_to_number: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_to_number: argument must be a string")
}
// Trim whitespace for more lenient parsing
str = strings.TrimSpace(str)
// Try float first for more general parsing
if num, err := strconv.ParseFloat(str, 64); err == nil {
s.PushNumber(num)
return 1
}
s.PushNil()
return 1
},
"string_is_numeric": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_is_numeric: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_is_numeric: argument must be a string")
}
str = strings.TrimSpace(str)
if str == "" {
s.PushBoolean(false)
return 1
}
_, err1 := strconv.ParseFloat(str, 64)
s.PushBoolean(err1 == nil)
return 1
},
"string_is_alpha": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_is_alpha: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_is_alpha: argument must be a string")
}
if len(str) == 0 {
s.PushBoolean(false)
return 1
}
for _, r := range str {
if !unicode.IsLetter(r) {
s.PushBoolean(false)
return 1
}
}
s.PushBoolean(true)
return 1
},
"string_is_alphanumeric": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_is_alphanumeric: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_is_alphanumeric: argument must be a string")
}
if len(str) == 0 {
s.PushBoolean(false)
return 1
}
for _, r := range str {
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
s.PushBoolean(false)
return 1
}
}
s.PushBoolean(true)
return 1
},
"random_string": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("random_string: %v", err)
}
length, err := s.SafeToNumber(1)
if err != nil || length != float64(int(length)) || length < 0 {
return s.PushError("random_string: first argument must be a non-negative integer")
}
charset := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
if s.GetTop() >= 2 && !s.IsNil(2) {
if custom, err := s.SafeToString(2); err == nil && len(custom) > 0 {
charset = custom
}
}
n := int(length)
if n == 0 {
s.PushString("")
return 1
}
if n > maxRandomLength {
return s.PushError("random_string: length too large (max %d)", maxRandomLength)
}
// Validate charset for UTF-8
if !utf8.ValidString(charset) {
return s.PushError("random_string: charset must be valid UTF-8")
}
charsetRunes := []rune(charset)
if len(charsetRunes) == 0 {
return s.PushError("random_string: charset cannot be empty")
}
result := make([]rune, n)
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
for i := range result {
result[i] = charsetRunes[rnd.Intn(len(charsetRunes))]
}
s.PushString(string(result))
return 1
},
"string_is_valid_utf8": func(s *luajit.State) int {
if err := s.CheckMinArgs(1); err != nil {
return s.PushError("string_is_valid_utf8: %v", err)
}
str, err := s.SafeToString(1)
if err != nil {
return s.PushError("string_is_valid_utf8: argument must be a string")
}
s.PushBoolean(utf8.ValidString(str))
return 1
},
}
}