Supervision Testing Templates and Future Patterns
Document Version: 1.0
Date: 2025-07-02
Purpose: Templates, patterns, and guidelines for implementing new supervision tests and extending the testing framework
Table of Contents
- Test Templates
- Service Integration Patterns
- Advanced Testing Patterns
- Extension Guidelines
- Performance Testing Templates
- CI/CD Integration Patterns
- Future Enhancement Roadmap
Test Templates
1. Basic Service Crash Recovery Test Template
defmodule MyApp.ServiceNameSupervisionTest do
@moduledoc """
Supervision crash recovery tests for MyApp.ServiceName.
Tests verify that the service properly restarts after crashes,
maintains functionality, and integrates correctly with the supervision tree.
"""
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag timeout: 30_000
describe "MyApp.ServiceName crash recovery" do
test "service restarts after crash and maintains functionality",
%{supervision_tree: sup_tree} do
# 1. Get service from isolated supervision tree
{:ok, service_pid} = get_service(sup_tree, :my_service_name)
assert is_pid(service_pid)
# 2. Test functionality before crash
result = call_service(sup_tree, :my_service_name, :get_status)
assert result == :ok # or whatever expected result
# 3. Kill the service
Process.exit(service_pid, :kill)
# 4. Wait for restart
{:ok, new_pid} = wait_for_service_restart(sup_tree, :my_service_name, service_pid)
# 5. Verify restart
assert new_pid != service_pid
assert Process.alive?(new_pid)
# 6. Test functionality after restart
new_result = call_service(sup_tree, :my_service_name, :get_status)
assert new_result == :ok
end
test "service handles multiple rapid crashes gracefully",
%{supervision_tree: sup_tree} do
# Test rapid crash/restart cycles
for i <- 1..3 do
{:ok, service_pid} = get_service(sup_tree, :my_service_name)
Process.exit(service_pid, :kill)
{:ok, new_pid} = wait_for_service_restart(sup_tree, :my_service_name, service_pid)
assert new_pid != service_pid
# Verify service is functional after each restart
result = call_service(sup_tree, :my_service_name, :get_status)
assert result == :ok
end
end
test "service state recovery after restart", %{supervision_tree: sup_tree} do
# 1. Set up initial state
:ok = call_service(sup_tree, :my_service_name, {:configure, %{setting: "test_value"}})
# 2. Verify initial state
{:ok, config} = call_service(sup_tree, :my_service_name, :get_config)
assert config.setting == "test_value"
# 3. Crash the service
{:ok, service_pid} = get_service(sup_tree, :my_service_name)
Process.exit(service_pid, :kill)
# 4. Wait for restart
{:ok, _new_pid} = wait_for_service_restart(sup_tree, :my_service_name, service_pid)
# 5. Verify state recovery (if persistent) or reset (if stateless)
{:ok, new_config} = call_service(sup_tree, :my_service_name, :get_config)
# Adjust assertion based on whether service maintains state
assert new_config.setting == "default_value" # For stateless services
# OR: assert new_config.setting == "test_value" # For persistent services
end
end
describe "Resource management" do
test "no resource leaks after service restarts", %{supervision_tree: sup_tree} do
initial_count = :erlang.system_info(:process_count)
# Multiple crash/restart cycles
for _i <- 1..5 do
{:ok, service_pid} = get_service(sup_tree, :my_service_name)
Process.exit(service_pid, :kill)
{:ok, _new_pid} = wait_for_service_restart(sup_tree, :my_service_name, service_pid)
end
# Allow system to stabilize
Process.sleep(1000)
final_count = :erlang.system_info(:process_count)
assert final_count - initial_count < 20,
"Process leak detected: #{initial_count} -> #{final_count}"
end
end
end
2. Complex Service Integration Test Template
defmodule MyApp.ServiceIntegrationSupervisionTest do
@moduledoc """
Integration supervision tests for multiple interdependent services.
Tests verify that services properly handle dependencies during
restart scenarios and maintain system consistency.
"""
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag timeout: 30_000
describe "Service dependency management" do
test "dependent services restart correctly with rest_for_one",
%{supervision_tree: sup_tree} do
# Monitor all relevant services
monitors = monitor_all_services(sup_tree)
# Get the service to crash (adjust based on your supervision order)
{crash_service_pid, _} = monitors[:service_to_crash]
# Kill the service
Process.exit(crash_service_pid, :kill)
# Verify rest_for_one cascade behavior
verify_rest_for_one_cascade(monitors, :service_to_crash)
# Verify dependent services restarted correctly
wait_for_services_restart(sup_tree, %{
dependent_service_1: monitors[:dependent_service_1] |> elem(0),
dependent_service_2: monitors[:dependent_service_2] |> elem(0)
})
# Test integration functionality after restart
result = call_service(sup_tree, :dependent_service_1, :test_dependency)
assert result == :ok
end
test "system maintains consistency during cascading failures",
%{supervision_tree: sup_tree} do
# Set up cross-service state
:ok = call_service(sup_tree, :service_a, {:register, :key1, "value1"})
:ok = call_service(sup_tree, :service_b, {:register, :key2, "value2"})
# Verify initial state
{:ok, value1} = call_service(sup_tree, :service_a, {:get, :key1})
{:ok, value2} = call_service(sup_tree, :service_b, {:get, :key2})
assert value1 == "value1"
assert value2 == "value2"
# Cause cascading failure
{:ok, service_a_pid} = get_service(sup_tree, :service_a)
Process.exit(service_a_pid, :kill)
# Wait for cascade to complete
{:ok, _new_a_pid} = wait_for_service_restart(sup_tree, :service_a, service_a_pid)
{:ok, _new_b_pid} = wait_for_service_restart(sup_tree, :service_b,
call_service(sup_tree, :service_b, :get_pid))
# Verify system consistency (adjust based on your requirements)
{:ok, new_value1} = call_service(sup_tree, :service_a, {:get, :key1})
{:ok, new_value2} = call_service(sup_tree, :service_b, {:get, :key2})
# State may be reset or recovered depending on design
assert new_value1 == nil # For stateless reset
assert new_value2 == nil # For stateless reset
end
end
end
3. Performance and Load Testing Template
defmodule MyApp.ServicePerformanceSupervisionTest do
@moduledoc """
Performance-focused supervision tests.
Tests verify that supervision behavior performs adequately under
various load conditions and stress scenarios.
"""
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag :performance
@moduletag timeout: 60_000
describe "Performance under load" do
test "restart time remains consistent under high load",
%{supervision_tree: sup_tree} do
# Measure restart times under various conditions
restart_times = []
# Baseline measurement
baseline_times = measure_restart_times(sup_tree, :my_service, 5)
baseline_avg = Enum.sum(baseline_times) / length(baseline_times)
# Create background load
load_tasks = create_background_load(sup_tree, 10)
# Measure restart times under load
load_times = measure_restart_times(sup_tree, :my_service, 5)
load_avg = Enum.sum(load_times) / length(load_times)
# Clean up background load
Enum.each(load_tasks, &Task.shutdown/1)
# Performance should not degrade significantly
degradation = (load_avg - baseline_avg) / baseline_avg
assert degradation < 0.50, # Allow up to 50% degradation
"Restart time degraded too much: #{baseline_avg}ms -> #{load_avg}ms"
end
test "memory usage remains stable during restart cycles",
%{supervision_tree: sup_tree} do
initial_memory = :erlang.memory(:total)
# Perform many restart cycles
for _i <- 1..20 do
{:ok, service_pid} = get_service(sup_tree, :my_service)
Process.exit(service_pid, :kill)
{:ok, _new_pid} = wait_for_service_restart(sup_tree, :my_service, service_pid)
# Trigger garbage collection periodically
if rem(_i, 5) == 0, do: :erlang.garbage_collect()
end
# Allow memory stabilization
:erlang.garbage_collect()
Process.sleep(2000)
final_memory = :erlang.memory(:total)
memory_growth = final_memory - initial_memory
# Memory growth should be minimal (adjust threshold as needed)
assert memory_growth < 10_000_000, # 10MB
"Excessive memory growth: #{initial_memory} -> #{final_memory}"
end
end
# Helper functions for performance testing
defp measure_restart_times(sup_tree, service_name, count) do
for _i <- 1..count do
{:ok, service_pid} = get_service(sup_tree, service_name)
start_time = :erlang.monotonic_time(:millisecond)
Process.exit(service_pid, :kill)
{:ok, _new_pid} = wait_for_service_restart(sup_tree, service_name, service_pid)
end_time = :erlang.monotonic_time(:millisecond)
end_time - start_time
end
end
defp create_background_load(sup_tree, task_count) do
for _i <- 1..task_count do
Task.async(fn ->
# Create load appropriate for your services
for _j <- 1..100 do
call_service(sup_tree, :my_service, :get_status)
Process.sleep(10)
end
end)
end
end
end
Service Integration Patterns
1. Adding New Service to Supervision Testing
When adding a new service to the supervision testing framework:
Step 1: Update Service Mappings
# In test/support/supervision_test_helpers.ex
@service_modules %{
# Existing services...
task_pool_manager: JidoFoundation.TaskPoolManager,
system_command_manager: JidoFoundation.SystemCommandManager,
coordination_manager: JidoFoundation.CoordinationManager,
scheduler_manager: JidoFoundation.SchedulerManager,
# Add your new service
my_new_service: JidoFoundation.MyNewService
}
@supervision_order [
# Existing order...
:scheduler_manager,
:task_pool_manager,
:system_command_manager,
:coordination_manager,
# Add your service in the correct position
:my_new_service
]
Step 2: Update Type Specifications
# In test/support/supervision_test_helpers.ex
@type service_name ::
:task_pool_manager
| :system_command_manager
| :coordination_manager
| :scheduler_manager
| :my_new_service # Add your service
Step 3: Create Test Service Implementation
# In test/support/foundation_test_services.ex (if it doesn't exist, create it)
defmodule Foundation.TestServices.MyNewService do
@moduledoc """
Test double for JidoFoundation.MyNewService in isolated supervision testing.
"""
use GenServer
def start_link(opts \\ []) do
name = Keyword.get(opts, :name, __MODULE__)
registry = Keyword.get(opts, :registry, nil)
GenServer.start_link(__MODULE__, {opts, registry}, name: name)
end
def init({opts, registry}) do
# Register with test registry if provided
if registry do
Registry.register(registry, {:service, JidoFoundation.MyNewService}, %{test_instance: true})
end
# Initialize service state (minimal for testing)
{:ok, %{config: %{}, started_at: DateTime.utc_now()}}
end
# Implement minimal interface needed for testing
def handle_call(:get_status, _from, state) do
{:reply, :ok, state}
end
def handle_call(:get_config, _from, state) do
{:reply, {:ok, state.config}, state}
end
def handle_call({:configure, config}, _from, state) do
new_state = Map.put(state, :config, config)
{:reply, :ok, new_state}
end
# Handle other calls as needed...
end
Step 4: Update Supervision Setup
# In test/support/supervision_test_setup.ex
@core_services [
# Existing services...
Foundation.TestServices.SchedulerManager,
Foundation.TestServices.TaskPoolManager,
Foundation.TestServices.SystemCommandManager,
Foundation.TestServices.CoordinationManager,
# Add your test service
Foundation.TestServices.MyNewService
]
# Update the isolated supervisor children list
defp start_isolated_jido_supervisor(supervisor_name, registry_name) do
children = [
# ... existing children ...
# Add your service
{Foundation.TestServices.MyNewService,
name: :"#{supervisor_name}_my_new_service", registry: registry_name}
]
# ... rest of function
end
2. Custom Service Discovery Pattern
For services that need special discovery mechanisms:
defmodule MyApp.CustomServiceDiscovery do
@moduledoc """
Custom service discovery for specialized services.
"""
import Foundation.SupervisionTestHelpers
def get_custom_service(sup_tree, service_identifier) do
# Custom lookup logic
case Registry.lookup(sup_tree.registry, {:custom_service, service_identifier}) do
[{pid, _}] when is_pid(pid) -> {:ok, pid}
[] -> {:error, :custom_service_not_found}
end
end
def wait_for_custom_service_restart(sup_tree, service_identifier, old_pid, timeout \\ 5000) do
wait_for(
fn ->
case get_custom_service(sup_tree, service_identifier) do
{:ok, new_pid} when new_pid != old_pid -> {:ok, new_pid}
_ -> nil
end
end,
timeout
)
end
end
Advanced Testing Patterns
1. Property-Based Supervision Testing
defmodule MyApp.PropertyBasedSupervisionTest do
use Foundation.UnifiedTestFoundation, :supervision_testing
use ExUnitProperties
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag :property_based
property "services always restart after any kind of crash", %{supervision_tree: sup_tree} do
check all service_name <- member_of(get_supported_services()),
kill_signal <- member_of([:kill, :shutdown, :normal, {:shutdown, :reason}]),
max_runs: 25 do
# Get service before crash
{:ok, original_pid} = get_service(sup_tree, service_name)
# Crash it with various signals
Process.exit(original_pid, kill_signal)
# Should always restart
{:ok, new_pid} = wait_for_service_restart(sup_tree, service_name, original_pid, 10_000)
assert new_pid != original_pid
assert Process.alive?(new_pid)
# Should be functional after restart
case call_service(sup_tree, service_name, :get_status, [], 5000) do
{:error, _} -> :ok # Service may not implement get_status
result -> assert result != nil
end
end
end
property "supervision trees are always consistent", %{supervision_tree: sup_tree} do
check all services_to_crash <- uniq_list_of(member_of(get_supported_services()),
min_length: 1, max_length: 3),
max_runs: 15 do
# Get initial state
initial_pids = for service <- services_to_crash do
{:ok, pid} = get_service(sup_tree, service)
{service, pid}
end
# Crash all services simultaneously
for {service, pid} <- initial_pids do
Process.exit(pid, :kill)
end
# Wait for all to restart
for {service, old_pid} <- initial_pids do
{:ok, new_pid} = wait_for_service_restart(sup_tree, service, old_pid, 10_000)
assert new_pid != old_pid
end
# Verify supervision tree consistency
stats = Foundation.SupervisionTestSetup.get_supervision_stats(sup_tree)
assert stats.supervisor_alive == true
assert stats.registered_services >= length(services_to_crash)
end
end
end
2. Chaos Engineering Pattern
defmodule MyApp.ChaosSupervisionTest do
@moduledoc """
Chaos engineering tests for supervision resilience.
"""
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag :chaos
@moduletag timeout: 120_000
describe "Chaos testing" do
test "system survives random service crashes", %{supervision_tree: sup_tree} do
# Run chaos for 30 seconds
end_time = System.monotonic_time(:millisecond) + 30_000
chaos_task = Task.async(fn ->
run_chaos_loop(sup_tree, end_time)
end)
# Monitor system health during chaos
health_task = Task.async(fn ->
monitor_system_health(sup_tree, end_time)
end)
# Wait for both tasks to complete
chaos_events = Task.await(chaos_task, 35_000)
health_results = Task.await(health_task, 35_000)
# Verify system survived
assert length(chaos_events) > 0, "No chaos events occurred"
assert Enum.all?(health_results, & &1.healthy), "System became unhealthy"
# Verify final state
final_stats = Foundation.SupervisionTestSetup.get_supervision_stats(sup_tree)
assert final_stats.supervisor_alive == true
end
end
defp run_chaos_loop(sup_tree, end_time) do
run_chaos_loop(sup_tree, end_time, [])
end
defp run_chaos_loop(sup_tree, end_time, events) do
current_time = System.monotonic_time(:millisecond)
if current_time >= end_time do
events
else
# Random chaos action
action = Enum.random([:kill_service, :overload_service, :pause])
event = case action do
:kill_service ->
service = Enum.random(get_supported_services())
case get_service(sup_tree, service) do
{:ok, pid} ->
Process.exit(pid, :kill)
%{action: :kill, service: service, time: current_time}
_ ->
%{action: :kill_failed, service: service, time: current_time}
end
:overload_service ->
service = Enum.random(get_supported_services())
# Send many concurrent requests
for _i <- 1..10 do
spawn(fn ->
call_service(sup_tree, service, :get_status, [], 100)
end)
end
%{action: :overload, service: service, time: current_time}
:pause ->
Process.sleep(Enum.random(100..500))
%{action: :pause, time: current_time}
end
Process.sleep(Enum.random(200..1000))
run_chaos_loop(sup_tree, end_time, [event | events])
end
end
defp monitor_system_health(sup_tree, end_time) do
monitor_system_health(sup_tree, end_time, [])
end
defp monitor_system_health(sup_tree, end_time, results) do
current_time = System.monotonic_time(:millisecond)
if current_time >= end_time do
results
else
health_check = %{
time: current_time,
healthy: system_healthy?(sup_tree)
}
Process.sleep(1000) # Check every second
monitor_system_health(sup_tree, end_time, [health_check | results])
end
end
defp system_healthy?(sup_tree) do
try do
stats = Foundation.SupervisionTestSetup.get_supervision_stats(sup_tree)
stats.supervisor_alive && stats.registered_services > 0
rescue
_ -> false
end
end
end
Extension Guidelines
1. Creating New Test Modes
To create a new testing mode for Foundation.UnifiedTestFoundation
:
# In test/support/unified_test_foundation.ex
defmacro __using__(mode) when mode == :my_custom_testing do
quote do
use ExUnit.Case, async: false # or true if safe
import Foundation.AsyncTestHelpers
import MyApp.CustomTestHelpers # Your custom helpers
setup do
MyApp.CustomTestSetup.create_custom_context()
end
end
end
2. Custom Test Helper Modules
Structure for creating specialized test helpers:
defmodule MyApp.CustomTestHelpers do
@moduledoc """
Custom test helpers for specialized testing scenarios.
"""
import Foundation.AsyncTestHelpers
import ExUnit.Assertions
@type custom_context :: %{
custom_field: term(),
# ... other fields
}
@spec custom_helper_function(custom_context(), term()) :: term()
def custom_helper_function(context, params) do
# Implementation
end
# ... other helper functions
end
3. Test Setup Modules
Pattern for creating custom test setup:
defmodule MyApp.CustomTestSetup do
@moduledoc """
Custom test setup for specialized testing scenarios.
"""
import Foundation.AsyncTestHelpers
import ExUnit.Callbacks
@spec create_custom_context() :: %{custom_context: map()}
def create_custom_context do
# Setup logic
custom_context = %{
# ... context fields
}
on_exit(fn ->
cleanup_custom_context(custom_context)
end)
%{custom_context: custom_context}
end
@spec cleanup_custom_context(map()) :: :ok
def cleanup_custom_context(context) do
# Cleanup logic
:ok
end
end
Performance Testing Templates
1. Benchmark Template
defmodule MyApp.SupervisionBenchmarkTest do
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag :benchmark
@moduletag timeout: 300_000 # 5 minutes
describe "Supervision performance benchmarks" do
test "restart time benchmarks", %{supervision_tree: sup_tree} do
services = get_supported_services()
results = for service <- services do
times = measure_restart_times(sup_tree, service, 10)
%{
service: service,
min_time: Enum.min(times),
max_time: Enum.max(times),
avg_time: Enum.sum(times) / length(times),
median_time: median(times),
p95_time: percentile(times, 0.95),
p99_time: percentile(times, 0.99)
}
end
# Log results for performance tracking
for result <- results do
IO.puts("#{result.service}: avg=#{result.avg_time}ms, p95=#{result.p95_time}ms, p99=#{result.p99_time}ms")
end
# Assert performance requirements
for result <- results do
assert result.avg_time < 1000, "#{result.service} restart too slow: #{result.avg_time}ms"
assert result.p99_time < 3000, "#{result.service} p99 restart too slow: #{result.p99_time}ms"
end
end
end
defp measure_restart_times(sup_tree, service, count) do
for _i <- 1..count do
{:ok, pid} = get_service(sup_tree, service)
start_time = :erlang.monotonic_time(:microsecond)
Process.exit(pid, :kill)
{:ok, _new_pid} = wait_for_service_restart(sup_tree, service, pid)
end_time = :erlang.monotonic_time(:microsecond)
(end_time - start_time) / 1000 # Convert to milliseconds
end
end
defp median(list) do
sorted = Enum.sort(list)
length = length(sorted)
if rem(length, 2) == 0 do
(Enum.at(sorted, div(length, 2) - 1) + Enum.at(sorted, div(length, 2))) / 2
else
Enum.at(sorted, div(length, 2))
end
end
defp percentile(list, p) do
sorted = Enum.sort(list)
index = round(p * (length(sorted) - 1))
Enum.at(sorted, index)
end
end
2. Memory Profiling Template
defmodule MyApp.SupervisionMemoryTest do
use Foundation.UnifiedTestFoundation, :supervision_testing
import Foundation.SupervisionTestHelpers
@moduletag :supervision_testing
@moduletag :memory_profiling
@moduletag timeout: 180_000
describe "Memory usage profiling" do
test "memory usage during supervision lifecycle", %{supervision_tree: sup_tree} do
# Take initial memory snapshot
initial_memory = memory_snapshot()
# Perform typical supervision operations
for i <- 1..50 do
service = Enum.random(get_supported_services())
{:ok, pid} = get_service(sup_tree, service)
# Every 10 operations, crash and restart
if rem(i, 10) == 0 do
Process.exit(pid, :kill)
{:ok, _new_pid} = wait_for_service_restart(sup_tree, service, pid)
end
# Take periodic snapshots
if rem(i, 10) == 0 do
snapshot = memory_snapshot()
log_memory_usage(i, snapshot, initial_memory)
end
end
# Force garbage collection and take final snapshot
:erlang.garbage_collect()
Process.sleep(1000)
final_memory = memory_snapshot()
# Analyze memory usage
analyze_memory_usage(initial_memory, final_memory)
end
end
defp memory_snapshot do
%{
total: :erlang.memory(:total),
processes: :erlang.memory(:processes),
system: :erlang.memory(:system),
atom: :erlang.memory(:atom),
binary: :erlang.memory(:binary),
ets: :erlang.memory(:ets),
process_count: :erlang.system_info(:process_count),
ets_count: :erlang.system_info(:ets_count)
}
end
defp log_memory_usage(iteration, current, initial) do
growth = current.total - initial.total
IO.puts("Iteration #{iteration}: Total memory #{current.total}, Growth: #{growth}")
end
defp analyze_memory_usage(initial, final) do
growth_total = final.total - initial.total
growth_processes = final.processes - initial.processes
growth_ets = final.ets - initial.ets
# Assert reasonable memory usage
assert growth_total < 50_000_000, # 50MB
"Excessive total memory growth: #{growth_total}"
assert growth_processes < 20_000_000, # 20MB
"Excessive process memory growth: #{growth_processes}"
assert growth_ets < 10_000_000, # 10MB
"Excessive ETS memory growth: #{growth_ets}"
IO.puts("Memory analysis complete:")
IO.puts(" Total growth: #{growth_total} bytes")
IO.puts(" Process growth: #{growth_processes} bytes")
IO.puts(" ETS growth: #{growth_ets} bytes")
end
end
CI/CD Integration Patterns
1. GitHub Actions Integration
# .github/workflows/supervision_tests.yml
name: Supervision Tests
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
supervision_tests:
name: Supervision Crash Recovery Tests
runs-on: ubuntu-latest
strategy:
matrix:
elixir: ['1.15.7']
otp: ['26.1']
steps:
- uses: actions/checkout@v3
- name: Set up Elixir
uses: erlef/setup-beam@v1
with:
elixir-version: ${{ matrix.elixir }}
otp-version: ${{ matrix.otp }}
- name: Restore dependencies cache
uses: actions/cache@v3
with:
path: deps
key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }}
restore-keys: ${{ runner.os }}-mix-
- name: Install dependencies
run: mix deps.get
- name: Compile code
run: mix compile --warnings-as-errors
- name: Run supervision tests
run: |
mix test test/jido_foundation/supervision_crash_recovery_test.exs --trace
mix test test/jido_foundation/supervision_crash_recovery_test.exs --seed 12345
mix test test/jido_foundation/supervision_crash_recovery_test.exs --seed 67890
- name: Run supervision tests with parallel execution
run: mix test test/jido_foundation/supervision_crash_recovery_test.exs --max-cases 4
- name: Run memory profiling tests
run: mix test --only memory_profiling
- name: Upload test artifacts
if: failure()
uses: actions/upload-artifact@v3
with:
name: test-logs
path: |
_build/test/logs/
test_output.log
2. Performance Regression Detection
# test/support/performance_tracker.ex
defmodule Foundation.PerformanceTracker do
@moduledoc """
Track and compare supervision test performance across builds.
"""
@results_file "test/performance_results.json"
def record_results(test_name, results) do
existing = load_existing_results()
new_entry = %{
test: test_name,
timestamp: DateTime.utc_now(),
results: results,
git_sha: get_git_sha(),
elixir_version: System.version(),
otp_version: System.otp_release()
}
updated = [new_entry | existing]
save_results(updated)
end
def check_regression(test_name, current_results) do
historical = load_existing_results()
|> Enum.filter(&(&1.test == test_name))
|> Enum.take(10) # Last 10 runs
if length(historical) >= 3 do
baseline = calculate_baseline(historical)
regression = detect_regression(baseline, current_results)
if regression do
IO.warn("Performance regression detected in #{test_name}: #{inspect(regression)}")
end
regression
else
nil # Not enough data
end
end
defp load_existing_results do
case File.read(@results_file) do
{:ok, content} -> Jason.decode!(content)
{:error, _} -> []
end
end
defp save_results(results) do
content = Jason.encode!(results, pretty: true)
File.write!(@results_file, content)
end
defp get_git_sha do
case System.cmd("git", ["rev-parse", "HEAD"]) do
{sha, 0} -> String.trim(sha)
_ -> "unknown"
end
end
defp calculate_baseline(historical) do
# Calculate averages from historical data
%{
avg_restart_time: historical
|> Enum.map(&get_in(&1, [:results, :avg_restart_time]))
|> Enum.filter(& &1)
|> average(),
avg_memory_usage: historical
|> Enum.map(&get_in(&1, [:results, :avg_memory_usage]))
|> Enum.filter(& &1)
|> average()
}
end
defp detect_regression(baseline, current) do
regressions = []
# Check restart time regression (20% threshold)
if current.avg_restart_time > baseline.avg_restart_time * 1.2 do
regressions = [{:restart_time_regression,
%{baseline: baseline.avg_restart_time, current: current.avg_restart_time}} | regressions]
end
# Check memory regression (30% threshold)
if current.avg_memory_usage > baseline.avg_memory_usage * 1.3 do
regressions = [{:memory_regression,
%{baseline: baseline.avg_memory_usage, current: current.avg_memory_usage}} | regressions]
end
case regressions do
[] -> nil
_ -> regressions
end
end
defp average([]), do: 0
defp average(list), do: Enum.sum(list) / length(list)
end
Future Enhancement Roadmap
Phase 1: Advanced Monitoring (Q1 2025)
Real-time Telemetry Integration
# Enhanced telemetry for supervision events :telemetry.execute([:supervision, :service, :restart], %{ service: service_name, restart_time: restart_time, restart_count: restart_count })
Distributed Supervision Testing
# Test supervision across multiple nodes defmodule DistributedSupervisionTest do use Foundation.UnifiedTestFoundation, :distributed_supervision_testing test "service failover across nodes", %{cluster: cluster} do # Test supervision behavior in distributed environment end end
Phase 2: AI-Powered Testing (Q2 2025)
Intelligent Chaos Generation
- ML-based chaos pattern generation
- Adaptive chaos based on system behavior
- Predictive failure scenario testing
Automated Performance Optimization
- AI-driven supervision tuning recommendations
- Automatic detection of optimal restart strategies
- Dynamic timeout adjustment based on load patterns
Phase 3: Production Integration (Q3 2025)
Shadow Testing Framework
# Run supervision tests against production traffic shadows defmodule ProductionShadowTest do use Foundation.UnifiedTestFoundation, :shadow_testing test "production workload supervision behavior" do # Test with real production patterns end end
Continuous Supervision Validation
- Background supervision health monitoring
- Real-time regression detection
- Automated rollback triggers
Phase 4: Advanced Patterns (Q4 2025)
Multi-Language Supervision Testing
- Support for testing Rust NIFs supervision
- Erlang port supervision testing
- Cross-language supervision coordination
Advanced Resource Management
- GPU resource supervision testing
- Network resource supervision patterns
- Storage supervision testing
Conclusion
This template collection provides:
- Ready-to-use templates for common supervision testing scenarios
- Extension patterns for adding new services and test modes
- Advanced testing techniques including chaos engineering and property-based testing
- Performance monitoring and regression detection
- CI/CD integration patterns for automated testing
- Future roadmap for continued enhancement
Usage Guidelines
- Start with basic templates for new services
- Extend gradually with advanced patterns as needed
- Integrate performance monitoring from the beginning
- Use property-based testing for comprehensive coverage
- Monitor for regressions in CI/CD pipelines
Contributing New Patterns
When contributing new patterns to this collection:
- Follow the established naming conventions
- Include comprehensive documentation
- Provide working examples
- Add type specifications
- Include performance considerations
- Document any limitations or caveats
Document Version: 1.0
Last Updated: 2025-07-02
Next Review: Q1 2025
Maintainer: Foundation Supervision Testing Team