diff --git a/.gitignore b/.gitignore index 2cd3656..2ec4485 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ go.work .idea + +bench/profile_results diff --git a/bench/bench_profile.go b/bench/bench_profile.go new file mode 100644 index 0000000..4f483f0 --- /dev/null +++ b/bench/bench_profile.go @@ -0,0 +1,171 @@ +package luajit_bench + +import ( + "flag" + "fmt" + "os" + "runtime" + "runtime/pprof" + "testing" +) + +// Profiling flags +var ( + cpuProfile = flag.String("cpuprofile", "", "write cpu profile to `file`") + memProfile = flag.String("memprofile", "", "write memory profile to `file`") + memProfileGC = flag.Bool("memprofilegc", false, "force GC before writing memory profile") + blockProfile = flag.String("blockprofile", "", "write block profile to `file`") + mutexProfile = flag.String("mutexprofile", "", "write mutex profile to `file`") +) + +// setupTestMain configures profiling for benchmarks +func setupTestMain() { + // Make sure the flags are parsed + if !flag.Parsed() { + flag.Parse() + } + + // CPU profiling + if *cpuProfile != "" { + f, err := os.Create(*cpuProfile) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create CPU profile: %v\n", err) + os.Exit(1) + } + if err := pprof.StartCPUProfile(f); err != nil { + fmt.Fprintf(os.Stderr, "Failed to start CPU profile: %v\n", err) + os.Exit(1) + } + fmt.Println("CPU profiling enabled") + } + + // Block profiling (goroutine blocking) + if *blockProfile != "" { + runtime.SetBlockProfileRate(1) + fmt.Println("Block profiling enabled") + } + + // Mutex profiling (lock contention) + if *mutexProfile != "" { + runtime.SetMutexProfileFraction(1) + fmt.Println("Mutex profiling enabled") + } +} + +// teardownTestMain completes profiling and writes output files +func teardownTestMain() { + // Stop CPU profile + if *cpuProfile != "" { + pprof.StopCPUProfile() + fmt.Println("CPU profile written to", *cpuProfile) + } + + // Write memory profile + if *memProfile != "" { + f, err := os.Create(*memProfile) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create memory profile: %v\n", err) + os.Exit(1) + } + defer f.Close() + + // Force garbage collection before writing memory profile if requested + if *memProfileGC { + runtime.GC() + } + + if err := pprof.WriteHeapProfile(f); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write memory profile: %v\n", err) + os.Exit(1) + } + fmt.Println("Memory profile written to", *memProfile) + } + + // Write block profile + if *blockProfile != "" { + f, err := os.Create(*blockProfile) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create block profile: %v\n", err) + os.Exit(1) + } + defer f.Close() + + if err := pprof.Lookup("block").WriteTo(f, 0); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write block profile: %v\n", err) + os.Exit(1) + } + fmt.Println("Block profile written to", *blockProfile) + } + + // Write mutex profile + if *mutexProfile != "" { + f, err := os.Create(*mutexProfile) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create mutex profile: %v\n", err) + os.Exit(1) + } + defer f.Close() + + if err := pprof.Lookup("mutex").WriteTo(f, 0); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write mutex profile: %v\n", err) + os.Exit(1) + } + fmt.Println("Mutex profile written to", *mutexProfile) + } +} + +// TestMain is the entry point for all tests in this package +func TestMain(m *testing.M) { + setupTestMain() + code := m.Run() + teardownTestMain() + os.Exit(code) +} + +// MemStats captures a snapshot of memory statistics +type MemStats struct { + Alloc uint64 + TotalAlloc uint64 + Sys uint64 + Mallocs uint64 + Frees uint64 + HeapAlloc uint64 +} + +// CaptureMemStats returns current memory statistics +func CaptureMemStats() MemStats { + var m runtime.MemStats + runtime.ReadMemStats(&m) + return MemStats{ + Alloc: m.Alloc, + TotalAlloc: m.TotalAlloc, + Sys: m.Sys, + Mallocs: m.Mallocs, + Frees: m.Frees, + HeapAlloc: m.HeapAlloc, + } +} + +// TrackMemoryUsage runs fn and reports memory usage before and after +func TrackMemoryUsage(b *testing.B, name string, fn func()) { + b.Helper() + + // Force GC before measurement + runtime.GC() + + // Capture memory stats before + before := CaptureMemStats() + + // Run the function + fn() + + // Force GC after measurement to get accurate stats + runtime.GC() + + // Capture memory stats after + after := CaptureMemStats() + + // Report stats + b.ReportMetric(float64(after.Mallocs-before.Mallocs), name+"-mallocs") + b.ReportMetric(float64(after.TotalAlloc-before.TotalAlloc)/float64(b.N), name+"-bytes/op") +} diff --git a/bench/bench_test.go b/bench/bench_test.go index b4d93b6..b3f7483 100644 --- a/bench/bench_test.go +++ b/bench/bench_test.go @@ -16,11 +16,14 @@ func BenchmarkSimpleDoString(b *testing.B) { code := "local x = 1 + 1" b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.DoString(code); err != nil { - b.Fatalf("DoString failed: %v", err) + + TrackMemoryUsage(b, "dostring", func() { + for i := 0; i < b.N; i++ { + if err := state.DoString(code); err != nil { + b.Fatalf("DoString failed: %v", err) + } } - } + }) } // BenchmarkSimpleCompileAndRun benchmarks compile and run of a simple expression @@ -33,11 +36,14 @@ func BenchmarkSimpleCompileAndRun(b *testing.B) { code := "local x = 1 + 1" b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.CompileAndRun(code, "simple"); err != nil { - b.Fatalf("CompileAndRun failed: %v", err) + + TrackMemoryUsage(b, "compile-run", func() { + for i := 0; i < b.N; i++ { + if err := state.CompileAndRun(code, "simple"); err != nil { + b.Fatalf("CompileAndRun failed: %v", err) + } } - } + }) } // BenchmarkSimpleCompileLoadRun benchmarks compile, load, and run of a simple expression @@ -50,15 +56,18 @@ func BenchmarkSimpleCompileLoadRun(b *testing.B) { code := "local x = 1 + 1" b.ResetTimer() - for i := 0; i < b.N; i++ { - bytecode, err := state.CompileBytecode(code, "simple") - if err != nil { - b.Fatalf("CompileBytecode failed: %v", err) + + TrackMemoryUsage(b, "compile-load-run", func() { + for i := 0; i < b.N; i++ { + bytecode, err := state.CompileBytecode(code, "simple") + if err != nil { + b.Fatalf("CompileBytecode failed: %v", err) + } + if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) - } - } + }) } // BenchmarkSimplePrecompiledBytecode benchmarks running precompiled bytecode @@ -75,11 +84,14 @@ func BenchmarkSimplePrecompiledBytecode(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) + + TrackMemoryUsage(b, "precompiled", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadAndRunBytecode(bytecode, "simple"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - } + }) } // BenchmarkFunctionCallDoString benchmarks direct execution of a function call @@ -102,11 +114,14 @@ func BenchmarkFunctionCallDoString(b *testing.B) { code := "local result = add(10, 20)" b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.DoString(code); err != nil { - b.Fatalf("DoString failed: %v", err) + + TrackMemoryUsage(b, "func-dostring", func() { + for i := 0; i < b.N; i++ { + if err := state.DoString(code); err != nil { + b.Fatalf("DoString failed: %v", err) + } } - } + }) } // BenchmarkFunctionCallPrecompiled benchmarks precompiled function call @@ -133,11 +148,14 @@ func BenchmarkFunctionCallPrecompiled(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadAndRunBytecode(bytecode, "call"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) + + TrackMemoryUsage(b, "func-precompiled", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadAndRunBytecode(bytecode, "call"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - } + }) } // BenchmarkLoopDoString benchmarks direct execution of a loop @@ -155,11 +173,14 @@ func BenchmarkLoopDoString(b *testing.B) { end ` b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.DoString(code); err != nil { - b.Fatalf("DoString failed: %v", err) + + TrackMemoryUsage(b, "loop-dostring", func() { + for i := 0; i < b.N; i++ { + if err := state.DoString(code); err != nil { + b.Fatalf("DoString failed: %v", err) + } } - } + }) } // BenchmarkLoopPrecompiled benchmarks precompiled loop execution @@ -181,11 +202,14 @@ func BenchmarkLoopPrecompiled(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadAndRunBytecode(bytecode, "loop"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) + + TrackMemoryUsage(b, "loop-precompiled", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadAndRunBytecode(bytecode, "loop"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - } + }) } // BenchmarkTableOperationsDoString benchmarks direct execution of table operations @@ -207,11 +231,14 @@ func BenchmarkTableOperationsDoString(b *testing.B) { end ` b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.DoString(code); err != nil { - b.Fatalf("DoString failed: %v", err) + + TrackMemoryUsage(b, "table-dostring", func() { + for i := 0; i < b.N; i++ { + if err := state.DoString(code); err != nil { + b.Fatalf("DoString failed: %v", err) + } } - } + }) } // BenchmarkTableOperationsPrecompiled benchmarks precompiled table operations @@ -237,11 +264,14 @@ func BenchmarkTableOperationsPrecompiled(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadAndRunBytecode(bytecode, "table"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) + + TrackMemoryUsage(b, "table-precompiled", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadAndRunBytecode(bytecode, "table"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - } + }) } // BenchmarkGoFunctionCall benchmarks calling a Go function from Lua @@ -269,11 +299,14 @@ func BenchmarkGoFunctionCall(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadAndRunBytecode(bytecode, "gofunc"); err != nil { - b.Fatalf("LoadAndRunBytecode failed: %v", err) + + TrackMemoryUsage(b, "go-func-call", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadAndRunBytecode(bytecode, "gofunc"); err != nil { + b.Fatalf("LoadAndRunBytecode failed: %v", err) + } } - } + }) } // BenchmarkComplexScript benchmarks a more complex script @@ -322,11 +355,14 @@ func BenchmarkComplexScript(b *testing.B) { return result ` b.ResetTimer() - for i := 0; i < b.N; i++ { - if _, err := state.ExecuteWithResult(code); err != nil { - b.Fatalf("ExecuteWithResult failed: %v", err) + + TrackMemoryUsage(b, "complex-script", func() { + for i := 0; i < b.N; i++ { + if _, err := state.ExecuteWithResult(code); err != nil { + b.Fatalf("ExecuteWithResult failed: %v", err) + } } - } + }) } // BenchmarkComplexScriptPrecompiled benchmarks a precompiled complex script @@ -379,15 +415,18 @@ func BenchmarkComplexScriptPrecompiled(b *testing.B) { b.Fatalf("CompileBytecode failed: %v", err) } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadBytecode(bytecode, "complex"); err != nil { - b.Fatalf("LoadBytecode failed: %v", err) + + TrackMemoryUsage(b, "complex-precompiled", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadBytecode(bytecode, "complex"); err != nil { + b.Fatalf("LoadBytecode failed: %v", err) + } + if err := state.RunBytecodeWithResults(1); err != nil { + b.Fatalf("RunBytecodeWithResults failed: %v", err) + } + state.Pop(1) // Pop the result } - if err := state.RunBytecodeWithResults(1); err != nil { // Assuming this method exists to get the return value - b.Fatalf("RunBytecodeWithResults failed: %v", err) - } - state.Pop(1) // Pop the result - } + }) } // BenchmarkMultipleExecutions benchmarks executing the same bytecode multiple times @@ -418,13 +457,16 @@ func BenchmarkMultipleExecutions(b *testing.B) { } b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := state.LoadBytecode(bytecode, "increment"); err != nil { - b.Fatalf("LoadBytecode failed: %v", err) + + TrackMemoryUsage(b, "multiple-executions", func() { + for i := 0; i < b.N; i++ { + if err := state.LoadBytecode(bytecode, "increment"); err != nil { + b.Fatalf("LoadBytecode failed: %v", err) + } + if err := state.RunBytecodeWithResults(1); err != nil { + b.Fatalf("RunBytecodeWithResults failed: %v", err) + } + state.Pop(1) // Pop the result } - if err := state.RunBytecodeWithResults(1); err != nil { // Assuming this method exists - b.Fatalf("RunBytecodeWithResults failed: %v", err) - } - state.Pop(1) // Pop the result - } + }) } diff --git a/bench/ezbench_test.go b/bench/ezbench_test.go index b35242a..5e570d2 100644 --- a/bench/ezbench_test.go +++ b/bench/ezbench_test.go @@ -72,14 +72,17 @@ func BenchmarkLuaDirectExecution(b *testing.B) { } b.ResetTimer() - for i := 0; i < b.N; i++ { - // Execute string and get results - nresults, err := L.Execute(bc.code) - if err != nil { - b.Fatalf("Failed to execute code: %v", err) + + TrackMemoryUsage(b, "direct-"+bc.name, func() { + for i := 0; i < b.N; i++ { + // Execute string and get results + nresults, err := L.Execute(bc.code) + if err != nil { + b.Fatalf("Failed to execute code: %v", err) + } + L.Pop(nresults) // Clean up any results } - L.Pop(nresults) // Clean up any results - } + }) }) } } @@ -123,11 +126,13 @@ func BenchmarkLuaBytecodeExecution(b *testing.B) { b.ResetTimer() b.SetBytes(int64(len(bytecode))) // Track bytecode size in benchmarks - for i := 0; i < b.N; i++ { - if err := L.LoadAndRunBytecode(bytecode, bc.name); err != nil { - b.Fatalf("Error executing bytecode: %v", err) + TrackMemoryUsage(b, "bytecode-"+bc.name, func() { + for i := 0; i < b.N; i++ { + if err := L.LoadAndRunBytecode(bytecode, bc.name); err != nil { + b.Fatalf("Error executing bytecode: %v", err) + } } - } + }) }) } } diff --git a/bench/profile.sh b/bench/profile.sh new file mode 100755 index 0000000..06f1dfb --- /dev/null +++ b/bench/profile.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# Easy script to run benchmarks with profiling enabled +# Usage: ./profile_benchmarks.sh [benchmark_pattern] + +set -e + +# Default values +BENCHMARK=${1:-"."} +OUTPUT_DIR="./profile_results" +CPU_PROFILE="$OUTPUT_DIR/cpu.prof" +MEM_PROFILE="$OUTPUT_DIR/mem.prof" +BLOCK_PROFILE="$OUTPUT_DIR/block.prof" +MUTEX_PROFILE="$OUTPUT_DIR/mutex.prof" +TRACE_FILE="$OUTPUT_DIR/trace.out" +HTML_OUTPUT="$OUTPUT_DIR/profile_report.html" + +# Create output directory +mkdir -p "$OUTPUT_DIR" + +echo "Running benchmarks with profiling enabled..." + +# Run benchmarks with profiling flags +go test -bench="$BENCHMARK" -benchmem -cpuprofile="$CPU_PROFILE" -memprofile="$MEM_PROFILE" -blockprofile="$BLOCK_PROFILE" -mutexprofile="$MUTEX_PROFILE" -count=5 -timeout=30m + +echo "Generating CPU profile analysis..." +go tool pprof -http=":1880" -output="$OUTPUT_DIR/cpu_graph.svg" "$CPU_PROFILE" + +echo "Generating memory profile analysis..." +go tool pprof -http=":1880" -output="$OUTPUT_DIR/mem_graph.svg" "$MEM_PROFILE" + +# Generate a simple HTML report +cat > "$HTML_OUTPUT" << EOF + + +
+Generated on: $(date)
+ +Command to explore: go tool pprof $CPU_PROFILE
Command to explore: go tool pprof $MEM_PROFILE
go tool pprof -http=:8080 $CPU_PROFILE
for interactive web UItop10
in pprof to see the top 10 functions by CPU/memory usagelist FunctionName
to see line-by-line stats for a specific function