Moonshark/tests/string.lua

require("tests")
local str = require("string")

-- Test data
local test_string = "Hello, World!"
local multi_line = "Line 1\nLine 2\nLine 3"
local padded_string = "  Hello World  "

-- ======================================================================
-- BASIC STRING OPERATIONS
-- ======================================================================

test("String Split and Join", function()
	local parts = str.split("a,b,c,d", ",")
	assert_equal("table", type(parts))
	assert_equal(4, #parts)
	assert_equal("a", parts[1])
	assert_equal("d", parts[4])

	local joined = str.join(parts, "-")
	assert_equal("a-b-c-d", joined)

	-- Test empty split
	local empty_parts = str.split("", ",")
	assert_equal(1, #empty_parts)
	assert_equal("", empty_parts[1])
end)

test("String Trim Operations", function()
	assert_equal("Hello World", str.trim(padded_string))
	assert_equal("Hello World  ", str.trim_left(padded_string))
	assert_equal("  Hello World", str.trim_right(padded_string))

	-- Custom cutset
	assert_equal("Helloxxx", str.trim_left("xxxHelloxxx", "x"))
	assert_equal("xxxHello", str.trim_right("xxxHelloxxx", "x"))
end)

test("Case Operations", function()
	assert_equal("HELLO", str.upper("hello"))
	assert_equal("hello", str.lower("HELLO"))
	assert_equal("Hello World", str.title("hello world"))

	-- Test with mixed content
	assert_equal("HELLO123!", str.upper("Hello123!"))
	assert_equal("hello123!", str.lower("HELLO123!"))
end)

test("String Contains and Position", function()
	assert_equal(true, str.contains(test_string, "World"))
	assert_equal(false, str.contains(test_string, "world"))
	assert_equal(true, str.starts_with(test_string, "Hello"))
	assert_equal(false, str.starts_with(test_string, "hello"))
	assert_equal(true, str.ends_with(test_string, "!"))
	assert_equal(false, str.ends_with(test_string, "?"))
end)

test("String Replace", function()
	assert_equal("hi world hi", str.replace("hello world hello", "hello", "hi"))
	assert_equal("hi world hello", str.replace_n("hello world hello", "hello", "hi", 1))

	-- Test with no matches
	assert_equal("hello", str.replace("hello", "xyz", "abc"))
end)

test("String Index Operations", function()
	assert_equal(7, str.index("hello world", "world"))
	assert_equal(nil, str.index("hello world", "xyz"))
	assert_equal(7, str.last_index("hello hello", "hello"))
	assert_equal(3, str.count("hello hello hello", "hello"))
end)

test("String Repeat and Reverse", function()
	assert_equal("abcabcabc", str.repeat_("abc", 3))
	assert_equal("", str.repeat_("x", 0))
	assert_equal("olleh", str.reverse("hello"))
	assert_equal("", str.reverse(""))
end)

test("String Length Operations", function()
	assert_equal(5, str.length("hello"))
	assert_equal(5, str.byte_length("hello"))
	assert_equal(0, str.length(""))

	-- Test Unicode
	local unicode_str = "héllo"
	assert_equal(5, str.length(unicode_str))
	assert_equal(6, str.byte_length(unicode_str)) -- é takes 2 bytes in UTF-8
end)

test("String Lines and Words", function()
	local lines = str.lines(multi_line)
	assert_equal(3, #lines)
	assert_equal("Line 1", lines[1])
	assert_equal("Line 3", lines[3])

	local words = str.words("Hello world test")
	assert_equal(3, #words)
	assert_equal("Hello", words[1])
	assert_equal("test", words[3])

	-- Test with extra whitespace
	local words2 = str.words("  Hello   world  ")
	assert_equal(2, #words2)
end)

test("String Padding", function()
	assert_equal("   hi", str.pad_left("hi", 5))
	assert_equal("hi   ", str.pad_right("hi", 5))
	assert_equal("000hi", str.pad_left("hi", 5, "0"))
	assert_equal("hi***", str.pad_right("hi", 5, "*"))

	-- Test when string is already long enough
	assert_equal("hello", str.pad_left("hello", 3))
end)

test("String Slice", function()
	assert_equal("ell", str.slice("hello", 2, 4))
	assert_equal("ello", str.slice("hello", 2))
	assert_equal("", str.slice("hello", 10))
	assert_equal("h", str.slice("hello", 1, 1))
end)

-- ======================================================================
-- REGULAR EXPRESSIONS
-- ======================================================================

test("Regex Match", function()
	assert_equal(true, str.match("\\d+", "hello123"))
	assert_equal(false, str.match("\\d+", "hello"))
	assert_equal(true, str.match("^[a-z]+$", "hello"))
	assert_equal(false, str.match("^[a-z]+$", "Hello"))
end)

test("Regex Find", function()
	assert_equal("123", str.find("\\d+", "hello123world"))
	assert_equal(nil, str.find("\\d+", "hello"))

	local matches = str.find_all("\\d+", "123 and 456 and 789")
	assert_equal(3, #matches)
	assert_equal("123", matches[1])
	assert_equal("789", matches[3])
end)

test("Regex Replace", function()
	assert_equal("helloXXXworldXXX", str.gsub("\\d+", "hello123world456", "XXX"))
	assert_equal("hello world", str.gsub("\\s+", "hello   world", " "))
end)

-- ======================================================================
-- TYPE CONVERSION & VALIDATION
-- ======================================================================

test("String to Number", function()
	assert_equal(123, str.to_number("123"))
	assert_equal(123.45, str.to_number("123.45"))
	assert_equal(-42, str.to_number("-42"))
	assert_equal(nil, str.to_number("not_a_number"))
end)

test("String Validation", function()
	assert_equal(true, str.is_numeric("123"))
	assert_equal(true, str.is_numeric("123.45"))
	assert_equal(false, str.is_numeric("abc"))

	assert_equal(true, str.is_alpha("hello"))
	assert_equal(false, str.is_alpha("hello123"))
	assert_equal(false, str.is_alpha(""))

	assert_equal(true, str.is_alphanumeric("hello123"))
	assert_equal(false, str.is_alphanumeric("hello!"))
	assert_equal(false, str.is_alphanumeric(""))

	assert_equal(true, str.is_empty(""))
	assert_equal(true, str.is_empty(nil))
	assert_equal(false, str.is_empty("hello"))

	assert_equal(true, str.is_blank(""))
	assert_equal(true, str.is_blank("   "))
	assert_equal(false, str.is_blank("hello"))
end)

-- ======================================================================
-- ADVANCED STRING OPERATIONS
-- ======================================================================

test("Case Conversion Functions", function()
	assert_equal("Hello World", str.capitalize("hello world"))
	assert_equal("helloWorld", str.camel_case("hello world"))
	assert_equal("HelloWorld", str.pascal_case("hello world"))
	assert_equal("hello_world", str.snake_case("Hello World"))
	assert_equal("hello-world", str.kebab_case("Hello World"))
	assert_equal("HELLO_WORLD", str.screaming_snake_case("hello world"))
end)

test("String Center and Truncate", function()
	assert_equal("  hi  ", str.center("hi", 6))
	assert_equal("**hi***", str.center("hi", 7, "*"))
	assert_equal("hello", str.center("hello", 3)) -- Already longer

	assert_equal("hello...", str.truncate("hello world", 8))
	assert_equal("hello>>", str.truncate("hello world", 8, ">>"))
	assert_equal("hi", str.truncate("hi", 10)) -- Shorter than limit
end)

test("String Wrap", function()
	local wrapped = str.wrap("The quick brown fox jumps over the lazy dog", 10)
	assert_equal("table", type(wrapped))
	assert(#wrapped > 1, "should wrap into multiple lines")

	-- Each line should be within limit
	for _, line in ipairs(wrapped) do
		assert(str.length(line) <= 10, "line should be within width limit")
	end
end)

test("String Dedent", function()
	local indented = "    line1\n    line2\n    line3"
	local dedented = str.dedent(indented)
	local lines = str.lines(dedented)

	assert_equal("line1", lines[1])
	assert_equal("line2", lines[2])
	assert_equal("line3", lines[3])
end)

test("Escape and Quote Functions", function()
	assert_equal("hello\\.world", str.escape_regex("hello.world"))
	assert_equal("a\\+b\\*c\\?", str.escape_regex("a+b*c?"))

	assert_equal("'hello world'", str.shell_quote("hello world"))
	assert_equal("'it'\"'\"'s great'", str.shell_quote("it's great"))
end)

test("URL Encoding", function()
	assert_equal("hello%20world", str.url_encode("hello world"))
	assert_equal("caf%C3%A9", str.url_encode("café"))

	local encoded = str.url_encode("hello world")
	assert_equal("hello world", str.url_decode(encoded))

	assert_equal("hello world", str.url_decode("hello+world"))
end)

-- ======================================================================
-- STRING COMPARISON
-- ======================================================================

test("String Comparison", function()
	assert_equal(true, str.iequals("Hello", "HELLO"))
	assert_equal(false, str.iequals("Hello", "world"))

	-- Test distance and similarity
	assert_equal(3, str.distance("kitten", "sitting"))
	assert_equal(0, str.distance("hello", "hello"))

	local similarity = str.similarity("hello", "hallo")
	assert(similarity > 0.5 and similarity < 1, "should be partial similarity")
	assert_equal(1, str.similarity("hello", "hello"))
end)

-- ======================================================================
-- TEMPLATE FUNCTIONS
-- ======================================================================

test("Template Functions", function()
	local simple_template = "Hello ${name}, you are ${age} years old"
	local vars = {name = "John", age = 25}

	assert_equal("Hello John, you are 25 years old", str.template(simple_template, vars))

	-- Test with missing variables
	local incomplete = str.template("Hello ${name} and ${unknown}", {name = "John"})
	assert_equal("Hello John and ", incomplete)

	-- Advanced template
	local context = {
		user = {name = "Jane", role = "admin"},
		count = 5
	}
	local advanced = str.template_advanced("User ${user.name} (${user.role}) has ${count} items", context)
	assert_equal("User Jane (admin) has 5 items", advanced)
end)

-- ======================================================================
-- UTILITY FUNCTIONS
-- ======================================================================

test("Whitespace Functions", function()
	assert_equal(true, str.is_whitespace("   "))
	assert_equal(true, str.is_whitespace(""))
	assert_equal(false, str.is_whitespace("hello"))

	assert_equal("hello", str.strip_whitespace("h e l l o"))
	assert_equal("hello world test", str.normalize_whitespace("hello    world   test"))
end)

test("Number Extraction", function()
	local numbers = str.extract_numbers("The price is $123.45 and tax is 8.5%")
	assert_equal(2, #numbers)
	assert_close(123.45, numbers[1])
	assert_close(8.5, numbers[2])

	local negative_nums = str.extract_numbers("Temperature: -15.5 degrees")
	assert_equal(1, #negative_nums)
	assert_close(-15.5, negative_nums[1])
end)

test("Accent Removal", function()
	assert_equal("cafe", str.remove_accents("café"))
	assert_equal("resume", str.remove_accents("résumé"))
	assert_equal("naive", str.remove_accents("naïve"))
	assert_equal("hello", str.remove_accents("hello"))
end)

test("Random String Generation", function()
	local random1 = str.random(10)
	local random2 = str.random(10)

	assert_equal(10, str.length(random1))
	assert_equal(10, str.length(random2))
	assert(random1 ~= random2, "random strings should be different")

	-- Custom charset
	local custom = str.random(5, "abc")
	assert_equal(5, str.length(custom))
	assert(str.match("^[abc]+$", custom), "should only contain specified characters")
end)

test("UTF-8 Validation", function()
	assert_equal(true, str.is_utf8("hello"))
	assert_equal(true, str.is_utf8("café"))
	assert_equal(true, str.is_utf8(""))

	-- Note: This test depends on the actual UTF-8 validation implementation
	-- Some invalid UTF-8 sequences might still pass depending on the system
end)

test("Slug Generation", function()
	assert_equal("hello-world", str.slug("Hello World"))
	assert_equal("cafe-restaurant", str.slug("Café & Restaurant"))
	assert_equal("specialcharacters", str.slug("Special!@#$%Characters"))
end)

-- ======================================================================
-- EDGE CASES AND ERROR HANDLING
-- ======================================================================

test("Empty String Handling", function()
	assert_table_equal({""}, str.split("", ","))
	assert_equal("", str.join({}, ","))
	assert_equal("", str.trim(""))
	assert_equal("", str.reverse(""))
	assert_equal("", str.repeat_("", 5))
	assert_table_equal({""}, str.lines(""))
	assert_table_equal({}, str.words(""))
end)

test("Large String Handling", function()
	local large_string = string.rep("test ", 1000)

	assert_equal(5000, str.length(large_string))
	assert_equal(1000, str.count(large_string, "test"))

	local words = str.words(large_string)
	assert_equal(1000, #words)

	local trimmed = str.trim(large_string)
	assert_equal(true, str.ends_with(trimmed, "test"))
end)

test("Unicode Handling", function()
	local unicode_string = "Hello 🌍 World 🚀"

	-- Basic operations should work with Unicode
	assert_equal(true, str.contains(unicode_string, "🌍"))
	assert_equal(str.upper(unicode_string), str.upper(unicode_string)) -- Should not crash

	local parts = str.split(unicode_string, " ")
	assert_equal(4, #parts)
	assert_equal("🌍", parts[2])
end)

test("Regex Error Handling", function()
	-- Invalid regex pattern - check if it actually fails
	local success, result = pcall(str.match, "\\", "test")
	if success then
		-- If it doesn't fail, just verify it works with valid patterns
		assert_equal(true, str.match("test", "test"))
	else
		assert_equal(false, success)
	end

	local success2, result2 = pcall(str.find, "\\", "test")
	if success2 then
		-- If it doesn't fail, just verify it works with valid patterns
		assert(str.find("test", "test") ~= nil)
	else
		assert_equal(false, success2)
	end
end)

-- ======================================================================
-- PERFORMANCE TESTS
-- ======================================================================

test("Performance Test", function()
	local large_text = string.rep("The quick brown fox jumps over the lazy dog. ", 1000)

	local start = os.clock()
	local words = str.words(large_text)
	local words_time = os.clock() - start

	start = os.clock()
	local lines = str.lines(large_text)
	local lines_time = os.clock() - start

	start = os.clock()
	local replaced = str.replace(large_text, "fox", "cat")
	local replace_time = os.clock() - start

	start = os.clock()
	local parts = str.split(large_text, " ")
	local split_time = os.clock() - start

	print(string.format("  Extract %d words: %.3fs", #words, words_time))
	print(string.format("  Extract %d lines: %.3fs", #lines, lines_time))
	print(string.format("  Replace in %d chars: %.3fs", str.length(large_text), replace_time))
	print(string.format("  Split into %d parts: %.3fs", #parts, split_time))

	assert(#words > 8000, "should extract many words")
	assert(str.contains(replaced, "cat"), "replacement should work")
end)

-- ======================================================================
-- INTEGRATION TESTS
-- ======================================================================

test("String Processing Pipeline", function()
	local messy_input = "  HELLO,    world!  How ARE you?  "

	-- Clean and normalize
	local cleaned = str.normalize_whitespace(str.trim(messy_input))
	local lowered = str.lower(cleaned)
	local words = str.words(lowered)
	local filtered = {}

	for _, word in ipairs(words) do
		-- Remove punctuation from word before checking length
		local clean_word = str.gsub("[[:punct:]]", word, "")
		if str.length(clean_word) > 2 then
			table.insert(filtered, clean_word)
		end
	end

	local result = str.join(filtered, "-")

	assert_equal("hello-world-how-are-you", result)
end)

test("Text Analysis", function()
	local text = "The quick brown fox jumps over the lazy dog. The dog was sleeping."

	local word_count = #str.words(text)
	local sentence_count = str.count(text, ".")
	local the_count = str.count(str.lower(text), "the")

	assert_equal(13, word_count)
	assert_equal(2, sentence_count)
	assert_equal(3, the_count)

	-- Extract all words starting with vowels
	local words = str.words(str.lower(text))
	local vowel_words = {}
	for _, word in ipairs(words) do
		local clean_word = str.replace(word, "%p", "") -- Remove punctuation
		if str.match("^[aeiou]", clean_word) then
			table.insert(vowel_words, clean_word)
		end
	end

	assert(#vowel_words >= 1, "should find words starting with vowels")
end)

summary()
test_exit()