diff --git a/.gitignore b/.gitignore
index 2cd3656..2ec4485 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@
 go.work
 
 .idea
+
+bench/profile_results
diff --git a/bench/bench_profile.go b/bench/bench_profile.go
new file mode 100644
index 0000000..4f483f0
--- /dev/null
+++ b/bench/bench_profile.go
@@ -0,0 +1,171 @@
+package luajit_bench
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"runtime"
+	"runtime/pprof"
+	"testing"
+)
+
+// Profiling flags
+var (
+	cpuProfile   = flag.String("cpuprofile", "", "write cpu profile to `file`")
+	memProfile   = flag.String("memprofile", "", "write memory profile to `file`")
+	memProfileGC = flag.Bool("memprofilegc", false, "force GC before writing memory profile")
+	blockProfile = flag.String("blockprofile", "", "write block profile to `file`")
+	mutexProfile = flag.String("mutexprofile", "", "write mutex profile to `file`")
+)
+
+// setupTestMain configures profiling for benchmarks
+func setupTestMain() {
+	// Make sure the flags are parsed
+	if !flag.Parsed() {
+		flag.Parse()
+	}
+
+	// CPU profiling
+	if *cpuProfile != "" {
+		f, err := os.Create(*cpuProfile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create CPU profile: %v\n", err)
+			os.Exit(1)
+		}
+		if err := pprof.StartCPUProfile(f); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to start CPU profile: %v\n", err)
+			os.Exit(1)
+		}
+		fmt.Println("CPU profiling enabled")
+	}
+
+	// Block profiling (goroutine blocking)
+	if *blockProfile != "" {
+		runtime.SetBlockProfileRate(1)
+		fmt.Println("Block profiling enabled")
+	}
+
+	// Mutex profiling (lock contention)
+	if *mutexProfile != "" {
+		runtime.SetMutexProfileFraction(1)
+		fmt.Println("Mutex profiling enabled")
+	}
+}
+
+// teardownTestMain completes profiling and writes output files
+func teardownTestMain() {
+	// Stop CPU profile
+	if *cpuProfile != "" {
+		pprof.StopCPUProfile()
+		fmt.Println("CPU profile written to", *cpuProfile)
+	}
+
+	// Write memory profile
+	if *memProfile != "" {
+		f, err := os.Create(*memProfile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create memory profile: %v\n", err)
+			os.Exit(1)
+		}
+		defer f.Close()
+
+		// Force garbage collection before writing memory profile if requested
+		if *memProfileGC {
+			runtime.GC()
+		}
+
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to write memory profile: %v\n", err)
+			os.Exit(1)
+		}
+		fmt.Println("Memory profile written to", *memProfile)
+	}
+
+	// Write block profile
+	if *blockProfile != "" {
+		f, err := os.Create(*blockProfile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create block profile: %v\n", err)
+			os.Exit(1)
+		}
+		defer f.Close()
+
+		if err := pprof.Lookup("block").WriteTo(f, 0); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to write block profile: %v\n", err)
+			os.Exit(1)
+		}
+		fmt.Println("Block profile written to", *blockProfile)
+	}
+
+	// Write mutex profile
+	if *mutexProfile != "" {
+		f, err := os.Create(*mutexProfile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to create mutex profile: %v\n", err)
+			os.Exit(1)
+		}
+		defer f.Close()
+
+		if err := pprof.Lookup("mutex").WriteTo(f, 0); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to write mutex profile: %v\n", err)
+			os.Exit(1)
+		}
+		fmt.Println("Mutex profile written to", *mutexProfile)
+	}
+}
+
+// TestMain is the entry point for all tests in this package
+func TestMain(m *testing.M) {
+	setupTestMain()
+	code := m.Run()
+	teardownTestMain()
+	os.Exit(code)
+}
+
+// MemStats captures a snapshot of memory statistics
+type MemStats struct {
+	Alloc      uint64
+	TotalAlloc uint64
+	Sys        uint64
+	Mallocs    uint64
+	Frees      uint64
+	HeapAlloc  uint64
+}
+
+// CaptureMemStats returns current memory statistics
+func CaptureMemStats() MemStats {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	return MemStats{
+		Alloc:      m.Alloc,
+		TotalAlloc: m.TotalAlloc,
+		Sys:        m.Sys,
+		Mallocs:    m.Mallocs,
+		Frees:      m.Frees,
+		HeapAlloc:  m.HeapAlloc,
+	}
+}
+
+// TrackMemoryUsage runs fn and reports memory usage before and after
+func TrackMemoryUsage(b *testing.B, name string, fn func()) {
+	b.Helper()
+
+	// Force GC before measurement
+	runtime.GC()
+
+	// Capture memory stats before
+	before := CaptureMemStats()
+
+	// Run the function
+	fn()
+
+	// Force GC after measurement to get accurate stats
+	runtime.GC()
+
+	// Capture memory stats after
+	after := CaptureMemStats()
+
+	// Report stats
+	b.ReportMetric(float64(after.Mallocs-before.Mallocs), name+"-mallocs")
+	b.ReportMetric(float64(after.TotalAlloc-before.TotalAlloc)/float64(b.N), name+"-bytes/op")
+}
diff --git a/bench/bench_test.go b/bench/bench_test.go
index b4d93b6..b3f7483 100644
--- a/bench/bench_test.go
+++ b/bench/bench_test.go
@@ -16,11 +16,14 @@ func BenchmarkSimpleDoString(b *testing.B) {
 
 	code := "local x = 1 + 1"
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.DoString(code); err != nil {
-			b.Fatalf("DoString failed: %v", err)
+
+	TrackMemoryUsage(b, "dostring", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.DoString(code); err != nil {
+				b.Fatalf("DoString failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkSimpleCompileAndRun benchmarks compile and run of a simple expression
@@ -33,11 +36,14 @@ func BenchmarkSimpleCompileAndRun(b *testing.B) {
 
 	code := "local x = 1 + 1"
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.CompileAndRun(code, "simple"); err != nil {
-			b.Fatalf("CompileAndRun failed: %v", err)
+
+	TrackMemoryUsage(b, "compile-run", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.CompileAndRun(code, "simple"); err != nil {
+				b.Fatalf("CompileAndRun failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkSimpleCompileLoadRun benchmarks compile, load, and run of a simple expression
@@ -50,15 +56,18 @@ func BenchmarkSimpleCompileLoadRun(b *testing.B) {
 
 	code := "local x = 1 + 1"
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		bytecode, err := state.CompileBytecode(code, "simple")
-		if err != nil {
-			b.Fatalf("CompileBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "compile-load-run", func() {
+		for i := 0; i < b.N; i++ {
+			bytecode, err := state.CompileBytecode(code, "simple")
+			if err != nil {
+				b.Fatalf("CompileBytecode failed: %v", err)
+			}
+			if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-		if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
-		}
-	}
+	})
 }
 
 // BenchmarkSimplePrecompiledBytecode benchmarks running precompiled bytecode
@@ -75,11 +84,14 @@ func BenchmarkSimplePrecompiledBytecode(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "precompiled", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkFunctionCallDoString benchmarks direct execution of a function call
@@ -102,11 +114,14 @@ func BenchmarkFunctionCallDoString(b *testing.B) {
 
 	code := "local result = add(10, 20)"
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.DoString(code); err != nil {
-			b.Fatalf("DoString failed: %v", err)
+
+	TrackMemoryUsage(b, "func-dostring", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.DoString(code); err != nil {
+				b.Fatalf("DoString failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkFunctionCallPrecompiled benchmarks precompiled function call
@@ -133,11 +148,14 @@ func BenchmarkFunctionCallPrecompiled(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadAndRunBytecode(bytecode, "call"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "func-precompiled", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadAndRunBytecode(bytecode, "call"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkLoopDoString benchmarks direct execution of a loop
@@ -155,11 +173,14 @@ func BenchmarkLoopDoString(b *testing.B) {
 		end
 	`
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.DoString(code); err != nil {
-			b.Fatalf("DoString failed: %v", err)
+
+	TrackMemoryUsage(b, "loop-dostring", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.DoString(code); err != nil {
+				b.Fatalf("DoString failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkLoopPrecompiled benchmarks precompiled loop execution
@@ -181,11 +202,14 @@ func BenchmarkLoopPrecompiled(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadAndRunBytecode(bytecode, "loop"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "loop-precompiled", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadAndRunBytecode(bytecode, "loop"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkTableOperationsDoString benchmarks direct execution of table operations
@@ -207,11 +231,14 @@ func BenchmarkTableOperationsDoString(b *testing.B) {
 		end
 	`
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.DoString(code); err != nil {
-			b.Fatalf("DoString failed: %v", err)
+
+	TrackMemoryUsage(b, "table-dostring", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.DoString(code); err != nil {
+				b.Fatalf("DoString failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkTableOperationsPrecompiled benchmarks precompiled table operations
@@ -237,11 +264,14 @@ func BenchmarkTableOperationsPrecompiled(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadAndRunBytecode(bytecode, "table"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "table-precompiled", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadAndRunBytecode(bytecode, "table"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkGoFunctionCall benchmarks calling a Go function from Lua
@@ -269,11 +299,14 @@ func BenchmarkGoFunctionCall(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadAndRunBytecode(bytecode, "gofunc"); err != nil {
-			b.Fatalf("LoadAndRunBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "go-func-call", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadAndRunBytecode(bytecode, "gofunc"); err != nil {
+				b.Fatalf("LoadAndRunBytecode failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkComplexScript benchmarks a more complex script
@@ -322,11 +355,14 @@ func BenchmarkComplexScript(b *testing.B) {
 		return result
 	`
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if _, err := state.ExecuteWithResult(code); err != nil {
-			b.Fatalf("ExecuteWithResult failed: %v", err)
+
+	TrackMemoryUsage(b, "complex-script", func() {
+		for i := 0; i < b.N; i++ {
+			if _, err := state.ExecuteWithResult(code); err != nil {
+				b.Fatalf("ExecuteWithResult failed: %v", err)
+			}
 		}
-	}
+	})
 }
 
 // BenchmarkComplexScriptPrecompiled benchmarks a precompiled complex script
@@ -379,15 +415,18 @@ func BenchmarkComplexScriptPrecompiled(b *testing.B) {
 		b.Fatalf("CompileBytecode failed: %v", err)
 	}
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadBytecode(bytecode, "complex"); err != nil {
-			b.Fatalf("LoadBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "complex-precompiled", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadBytecode(bytecode, "complex"); err != nil {
+				b.Fatalf("LoadBytecode failed: %v", err)
+			}
+			if err := state.RunBytecodeWithResults(1); err != nil {
+				b.Fatalf("RunBytecodeWithResults failed: %v", err)
+			}
+			state.Pop(1) // Pop the result
 		}
-		if err := state.RunBytecodeWithResults(1); err != nil { // Assuming this method exists to get the return value
-			b.Fatalf("RunBytecodeWithResults failed: %v", err)
-		}
-		state.Pop(1) // Pop the result
-	}
+	})
 }
 
 // BenchmarkMultipleExecutions benchmarks executing the same bytecode multiple times
@@ -418,13 +457,16 @@ func BenchmarkMultipleExecutions(b *testing.B) {
 	}
 
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if err := state.LoadBytecode(bytecode, "increment"); err != nil {
-			b.Fatalf("LoadBytecode failed: %v", err)
+
+	TrackMemoryUsage(b, "multiple-executions", func() {
+		for i := 0; i < b.N; i++ {
+			if err := state.LoadBytecode(bytecode, "increment"); err != nil {
+				b.Fatalf("LoadBytecode failed: %v", err)
+			}
+			if err := state.RunBytecodeWithResults(1); err != nil {
+				b.Fatalf("RunBytecodeWithResults failed: %v", err)
+			}
+			state.Pop(1) // Pop the result
 		}
-		if err := state.RunBytecodeWithResults(1); err != nil { // Assuming this method exists
-			b.Fatalf("RunBytecodeWithResults failed: %v", err)
-		}
-		state.Pop(1) // Pop the result
-	}
+	})
 }
diff --git a/bench/ezbench_test.go b/bench/ezbench_test.go
index b35242a..5e570d2 100644
--- a/bench/ezbench_test.go
+++ b/bench/ezbench_test.go
@@ -72,14 +72,17 @@ func BenchmarkLuaDirectExecution(b *testing.B) {
 			}
 
 			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				// Execute string and get results
-				nresults, err := L.Execute(bc.code)
-				if err != nil {
-					b.Fatalf("Failed to execute code: %v", err)
+
+			TrackMemoryUsage(b, "direct-"+bc.name, func() {
+				for i := 0; i < b.N; i++ {
+					// Execute string and get results
+					nresults, err := L.Execute(bc.code)
+					if err != nil {
+						b.Fatalf("Failed to execute code: %v", err)
+					}
+					L.Pop(nresults) // Clean up any results
 				}
-				L.Pop(nresults) // Clean up any results
-			}
+			})
 		})
 	}
 }
@@ -123,11 +126,13 @@ func BenchmarkLuaBytecodeExecution(b *testing.B) {
 			b.ResetTimer()
 			b.SetBytes(int64(len(bytecode))) // Track bytecode size in benchmarks
 
-			for i := 0; i < b.N; i++ {
-				if err := L.LoadAndRunBytecode(bytecode, bc.name); err != nil {
-					b.Fatalf("Error executing bytecode: %v", err)
+			TrackMemoryUsage(b, "bytecode-"+bc.name, func() {
+				for i := 0; i < b.N; i++ {
+					if err := L.LoadAndRunBytecode(bytecode, bc.name); err != nil {
+						b.Fatalf("Error executing bytecode: %v", err)
+					}
 				}
-			}
+			})
 		})
 	}
 }
diff --git a/bench/profile.sh b/bench/profile.sh
new file mode 100755
index 0000000..06f1dfb
--- /dev/null
+++ b/bench/profile.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Easy script to run benchmarks with profiling enabled
+# Usage: ./profile_benchmarks.sh [benchmark_pattern]
+
+set -e
+
+# Default values
+BENCHMARK=${1:-"."}
+OUTPUT_DIR="./profile_results"
+CPU_PROFILE="$OUTPUT_DIR/cpu.prof"
+MEM_PROFILE="$OUTPUT_DIR/mem.prof"
+BLOCK_PROFILE="$OUTPUT_DIR/block.prof"
+MUTEX_PROFILE="$OUTPUT_DIR/mutex.prof"
+TRACE_FILE="$OUTPUT_DIR/trace.out"
+HTML_OUTPUT="$OUTPUT_DIR/profile_report.html"
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+
+echo "Running benchmarks with profiling enabled..."
+
+# Run benchmarks with profiling flags
+go test -bench="$BENCHMARK" -benchmem -cpuprofile="$CPU_PROFILE" -memprofile="$MEM_PROFILE" -blockprofile="$BLOCK_PROFILE" -mutexprofile="$MUTEX_PROFILE" -count=5 -timeout=30m
+
+echo "Generating CPU profile analysis..."
+go tool pprof -http=":1880" -output="$OUTPUT_DIR/cpu_graph.svg" "$CPU_PROFILE"
+
+echo "Generating memory profile analysis..."
+go tool pprof -http=":1880" -output="$OUTPUT_DIR/mem_graph.svg" "$MEM_PROFILE"
+
+# Generate a simple HTML report
+cat > "$HTML_OUTPUT" << EOF
+<!DOCTYPE html>
+<html>
+<head>
+    <title>LuaJIT Benchmark Profiling Results</title>
+    <style>
+        body { font-family: Arial, sans-serif; margin: 20px; }
+        h1, h2 { color: #333; }
+        .profile { margin-bottom: 30px; }
+        img { max-width: 100%; border: 1px solid #ddd; }
+    </style>
+</head>
+<body>
+    <h1>LuaJIT Benchmark Profiling Results</h1>
+    <p>Generated on: $(date)</p>
+    
+    <div class="profile">
+        <h2>CPU Profile</h2>
+        <img src="cpu_graph.svg" alt="CPU Profile Graph">
+        <p>Command to explore: <code>go tool pprof $CPU_PROFILE</code></p>
+    </div>
+    
+    <div class="profile">
+        <h2>Memory Profile</h2>
+        <img src="mem_graph.svg" alt="Memory Profile Graph">
+        <p>Command to explore: <code>go tool pprof $MEM_PROFILE</code></p>
+    </div>
+    
+    <div class="profile">
+        <h2>Tips for Profile Analysis</h2>
+        <ul>
+            <li>Use <code>go tool pprof -http=:8080 $CPU_PROFILE</code> for interactive web UI</li>
+            <li>Use <code>top10</code> in pprof to see the top 10 functions by CPU/memory usage</li>
+            <li>Use <code>list FunctionName</code> to see line-by-line stats for a specific function</li>
+        </ul>
+    </div>
+</body>
+</html>
+EOF
+
+echo "Profiling complete! Results available in $OUTPUT_DIR"
+echo "View the HTML report at $HTML_OUTPUT"
+echo ""
+echo "For detailed interactive analysis, run:"
+echo "  go tool pprof -http=:1880 $CPU_PROFILE  # For CPU profile"
+echo "  go tool pprof -http=:1880 $MEM_PROFILE  # For memory profile"
\ No newline at end of file