JIDO TESTING STRATEGY

Documentation for JIDO_TESTING_STRATEGY from the Foundation repository.

JidoSystem Testing Strategy - From Mocks to Production

Executive Summary

The current testing approach relies heavily on mocks, creating a dangerous gap between test and production behavior. This document outlines a comprehensive testing strategy that ensures production readiness while maintaining fast, reliable tests.

Current Testing Problems

1. Mock-Reality Divergence

Current State:

# Test passes with mock
defmodule CacheTest do
  test "caches values" do
    Foundation.Cache.put(:key, :value)  # Uses Process dictionary
    assert Foundation.Cache.get(:key) == :value
  end
end

# Production fails
Foundation.Cache.put(:key, :value)  # No real implementation!

Root Cause: Tests written against mock behavior, not contracts.

2. Missing Failure Scenarios

Tests assume happy path:

No network failures
No resource exhaustion
No concurrent conflicts
No distributed system issues

3. Incomplete Integration Testing

Current integration tests:

Single node only
Synchronous execution
No cross-system boundaries
Mock external dependencies

Proposed Testing Architecture

Testing Pyramid

         ┌─────────────┐
         │    E2E      │  5%
         │   Tests     │
       ┌─┴─────────────┴─┐
       │  Integration    │  20%
       │     Tests       │
     ┌─┴─────────────────┴─┐
     │   Contract Tests    │  25%
     │  (Mock Validation)  │
   ┌─┴─────────────────────┴─┐
   │     Unit Tests         │  50%
   │  (Pure Functions)      │
   └─────────────────────────┘

1. Contract Tests (New Layer)

Purpose: Ensure mocks match production behavior.

defmodule Foundation.CacheContractTest do
  use Foundation.ContractTest, 
    module: Foundation.Cache,
    implementations: [
      Foundation.Infrastructure.Cache,
      Foundation.MockCache
    ]
  
  # Define contract all implementations must satisfy
  describe "cache contract" do
    property "get returns what was put" do
      check all key <- term(),
                value <- term() do
        
        # Test against ALL implementations
        for impl <- implementations() do
          impl.clear()
          assert impl.put(key, value) == :ok
          assert impl.get(key) == value
        end
      end
    end
    
    property "TTL expires values" do
      check all key <- term(),
                value <- term(),
                ttl <- positive_integer() do
        
        for impl <- implementations() do
          impl.put(key, value, ttl: ttl)
          Process.sleep(ttl + 10)
          assert impl.get(key) == nil
        end
      end
    end
    
    property "concurrent operations are safe" do
      # Test concurrent access patterns
    end
  end
end

2. Behavior Verification Tests

defmodule CircuitBreakerBehaviorTest do
  use ExUnit.Case
  
  # Test the behavior, not the implementation
  describe "circuit breaker behavior" do
    test "opens after threshold failures" do
      # Create test harness
      {:ok, breaker} = start_breaker(threshold: 3)
      
      # Inject failures
      for _ <- 1..3 do
        execute_and_fail(breaker)
      end
      
      # Verify circuit is open
      assert {:error, :circuit_open} = execute(breaker, fn -> :ok end)
    end
    
    test "recovers after timeout" do
      {:ok, breaker} = start_breaker(threshold: 1, recovery: 100)
      
      # Open circuit
      execute_and_fail(breaker)
      assert circuit_state(breaker) == :open
      
      # Wait for recovery
      Process.sleep(150)
      
      # Should allow one test call
      assert {:ok, :success} = execute(breaker, fn -> :success end)
      assert circuit_state(breaker) == :closed
    end
  end
  
  # Helper to start any breaker implementation
  defp start_breaker(opts) do
    impl = Application.get_env(:test, :circuit_breaker_impl, RealCircuitBreaker)
    impl.start_link(opts)
  end
end

3. Property-Based Testing

defmodule AgentPropertyTest do
  use ExUnit.Case
  use ExUnitProperties
  
  property "agents handle any valid instruction sequence" do
    check all instructions <- list_of(valid_instruction()) do
      {:ok, agent} = start_test_agent()
      
      # Send all instructions
      results = Enum.map(instructions, &send_instruction(agent, &1))
      
      # Verify invariants hold
      assert agent_invariants_hold?(agent)
      assert all_results_valid?(results)
    end
  end
  
  property "agents recover from any error" do
    check all good_instructions <- list_of(valid_instruction()),
              bad_instruction <- error_instruction(),
              more_good <- list_of(valid_instruction()) do
      
      {:ok, agent} = start_test_agent()
      
      # Send good instructions
      Enum.each(good_instructions, &send_instruction(agent, &1))
      
      # Send bad instruction (should not crash agent)
      send_instruction(agent, bad_instruction)
      
      # Agent should still process good instructions
      results = Enum.map(more_good, &send_instruction(agent, &1))
      assert all_results_valid?(results)
    end
  end
end

4. Chaos Testing

defmodule ChaosSuite do
  use ExUnit.Case
  
  @tag :chaos
  test "system survives random node failures" do
    # Start cluster
    nodes = start_test_cluster(5)
    
    # Start workload
    workload = start_continuous_workload()
    
    # Inject chaos
    chaos_loop(10, fn ->
      node = Enum.random(nodes)
      kill_node(node)
      Process.sleep(5000)
      restart_node(node)
    end)
    
    # Verify system health
    assert workload_completed?(workload)
    assert data_consistent?(nodes)
    assert no_messages_lost?()
  end
  
  @tag :chaos
  test "graceful degradation under resource pressure" do
    # Limit resources
    constrain_memory("500MB")
    constrain_cpu("50%")
    
    # Apply increasing load
    results = for load <- [100, 500, 1000, 5000] do
      apply_load(load)
    end
    
    # Should degrade gracefully, not crash
    assert Enum.all?(results, &match?({:ok, _} | {:degraded, _}, &1))
  end
end

5. Load and Performance Tests

defmodule LoadTest do
  use ExUnit.Case
  
  @tag :load
  test "handles target throughput" do
    target_rps = 10_000
    duration = 60 # seconds
    
    # Start system
    {:ok, system} = start_production_like_system()
    
    # Generate load
    results = LoadGenerator.run(
      target_rps: target_rps,
      duration: duration,
      scenario: :mixed_workload
    )
    
    # Verify SLAs
    assert results.success_rate > 0.99
    assert results.p99_latency < 100 # ms
    assert results.error_rate < 0.01
  end
  
  @tag :load  
  test "auto-scales under variable load" do
    # Start with minimal resources
    {:ok, system} = start_system(agents: 2)
    
    # Apply variable load pattern
    load_pattern = [
      {100, 30},   # 100 RPS for 30s
      {1000, 30},  # Spike to 1000 RPS
      {100, 30},   # Back to 100 RPS
      {5000, 60}   # Major spike
    ]
    
    results = LoadGenerator.run_pattern(load_pattern)
    
    # System should scale to meet demand
    assert Enum.all?(results, fn r -> r.success_rate > 0.95 end)
    
    # Verify scaling happened
    assert get_agent_count(system) > 2
  end
end

6. Integration Test Harness

defmodule IntegrationHarness do
  @moduledoc """
  Realistic test environment for integration testing.
  """
  
  def start_test_environment(opts \\ []) do
    # Start external dependencies
    {:ok, _postgres} = start_postgres()
    {:ok, _redis} = start_redis()
    {:ok, _kafka} = start_kafka()
    
    # Start Foundation services
    {:ok, _foundation} = Foundation.start_link(
      registry_impl: MABEAM.AgentRegistry,
      infrastructure_impl: Foundation.Infrastructure.Real
    )
    
    # Start JidoSystem
    {:ok, _system} = JidoSystem.start(
      environment: :test,
      clustering: Keyword.get(opts, :clustering, false)
    )
    
    # Wait for system ready
    wait_for_ready()
    
    :ok
  end
  
  def with_test_cluster(node_count, fun) do
    # Start distributed test cluster
    {:ok, nodes} = LocalCluster.start_nodes(:jido_test, node_count)
    
    # Start system on each node
    for node <- nodes do
      rpc(node, IntegrationHarness, :start_test_environment, [[clustering: true]])
    end
    
    # Run test
    try do
      fun.(nodes)
    after
      # Cleanup
      LocalCluster.stop()
    end
  end
end

7. Test Categorization and Execution

# mix.exs
def project do
  [
    # ... 
    test_coverage: [tool: ExCoveralls],
    preferred_cli_env: [
      "test.unit": :test,
      "test.integration": :test,
      "test.contract": :test,
      "test.chaos": :test,
      "test.load": :test,
      "test.e2e": :test
    ]
  ]
end

defp aliases do
  [
    "test.unit": ["test --only unit"],
    "test.integration": ["test --only integration"],
    "test.contract": ["test --only contract"],
    "test.chaos": ["test --only chaos --max-cases 1"],
    "test.load": ["test --only load --timeout 300000"],
    "test.e2e": ["test --only e2e"],
    "test.all": ["test.unit", "test.contract", "test.integration"],
    "test.ci": ["test.all", "dialyzer", "credo --strict"]
  ]
end

Test Data Management

1. Fixtures and Factories

defmodule TestFactory do
  use ExMachina
  
  def agent_factory do
    %{
      name: sequence(:name, &"agent_#{&1}"),
      capabilities: build_list(3, :capability),
      resources: build(:resource_spec),
      metadata: %{}
    }
  end
  
  def instruction_factory do
    %Jido.Instruction{
      id: UUID.uuid4(),
      action: sequence(:action, [ProcessAction, ValidateAction, TransformAction]),
      params: build(:instruction_params),
      context: build(:context)
    }
  end
  
  # Scenario builders
  def order_processing_scenario do
    %{
      agents: [
        build(:agent, name: "validator", capabilities: [:validation]),
        build(:agent, name: "processor", capabilities: [:processing]),
        build(:agent, name: "notifier", capabilities: [:notification])
      ],
      workflow: build(:workflow, :order_processing),
      test_data: build_list(100, :order)
    }
  end
end

2. Test Containers

defmodule TestContainers do
  @moduledoc """
  Manage external dependencies for testing.
  """
  
  def postgres_container do
    %{
      image: "postgres:14",
      environment: %{
        "POSTGRES_PASSWORD" => "test",
        "POSTGRES_DB" => "jido_test"
      },
      ports: [{5432, :random}],
      wait_strategy: :port
    }
  end
  
  def redis_container do
    %{
      image: "redis:7-alpine",
      ports: [{6379, :random}],
      wait_strategy: :log_message,
      wait_for: "Ready to accept connections"
    }
  end
  
  def start_containers do
    containers = %{
      postgres: postgres_container(),
      redis: redis_container()
    }
    
    TestContainers.start_containers(containers)
  end
end

Continuous Integration Pipeline

GitHub Actions Workflow

name: JidoSystem CI

on: [push, pull_request]

jobs:
  unit-tests:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        elixir: [1.14, 1.15]
        otp: [25, 26]
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
        with:
          elixir-version: ${{ matrix.elixir }}
          otp-version: ${{ matrix.otp }}
      - run: mix deps.get
      - run: mix test.unit

  contract-tests:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
      - run: mix deps.get
      - run: mix test.contract

  integration-tests:
    runs-on: ubuntu-latest
    services:
      postgres:
        image: postgres:14
        env:
          POSTGRES_PASSWORD: test
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
      redis:
        image: redis:7
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
      - run: mix deps.get
      - run: mix test.integration

  dialyzer:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
      - uses: actions/cache@v3
        with:
          path: priv/plts
          key: plts-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
      - run: mix deps.get
      - run: mix dialyzer --halt-exit-status

  chaos-tests:
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
      - run: mix deps.get
      - run: mix test.chaos
      
  load-tests:
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
      - uses: actions/checkout@v3
      - uses: erlef/setup-beam@v1
      - run: mix deps.get
      - run: mix test.load

Test Environment Configuration

1. Development Environment

# config/test.exs
config :jido_system,
  environment: :test,
  use_mocks: true,
  async_testing: true

config :foundation,
  registry_impl: Foundation.MockRegistry,
  infrastructure_impl: Foundation.MockInfrastructure

2. Integration Environment

# config/integration_test.exs
config :jido_system,
  environment: :integration_test,
  use_mocks: false,
  async_testing: false

config :foundation,
  registry_impl: MABEAM.AgentRegistry,
  infrastructure_impl: Foundation.Infrastructure.Real

config :logger, level: :warning

3. Performance Environment

# config/perf_test.exs
config :jido_system,
  environment: :perf_test,
  use_mocks: false,
  metrics_enabled: true,
  profiling_enabled: true

Monitoring Test Health

Test Metrics Dashboard

defmodule TestMetrics do
  def collect do
    %{
      coverage: get_test_coverage(),
      flakiness: calculate_flakiness(),
      duration: get_test_duration_trends(),
      failures: get_failure_patterns(),
      performance: get_performance_regression()
    }
  end
  
  def report do
    metrics = collect()
    
    # Alert on concerning trends
    if metrics.flakiness > 0.05 do
      alert("Test flakiness above 5%: #{metrics.flakiness}")
    end
    
    if metrics.duration.trend == :increasing do
      alert("Test duration increasing: #{metrics.duration.change}%")
    end
  end
end

Migration Strategy

Phase 1: Add Contract Tests (Week 1)

Create contract test framework
Write contracts for all mocked modules
Verify mocks match contracts

Phase 2: Enhance Integration Tests (Week 2)

Add test containers
Create integration test harness
Test distributed scenarios

Phase 3: Add Chaos Tests (Week 3)

Implement chaos injection
Add failure scenario tests
Test recovery mechanisms

Phase 4: Performance Testing (Week 4)

Create load test suite
Establish performance baselines
Add regression detection

Success Criteria

Coverage: >90% code coverage with meaningful tests
Flakiness: <1% flaky tests
Speed: Unit tests <1 min, Integration <5 min
Reliability: No false positives in CI
Realism: Tests reflect production scenarios

Conclusion

This testing strategy bridges the gap between mock-based testing and production reality. By introducing contract tests, property-based testing, chaos testing, and realistic integration environments, we ensure that passing tests actually mean the system will work in production.

The key is to test behaviors and contracts, not implementations, while progressively adding more realistic test scenarios that match production conditions.