061 INTEGRATION BOUNDARIES

Documentation for 061_INTEGRATION_BOUNDARIES from the Foundation repository.

System Integration Boundaries & Data Flow

Overview

This document defines the integration boundaries between all major system components, showing how Foundation services, MABEAM coordination, agent management, and external systems work together to form a cohesive multi-agent ML platform.

System Integration Map

┌─────────────────────────────────────────────────────────────────────────────┐
│                           External Boundaries                               │
│                                                                             │
│  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐           │
│  │   Phoenix   │ │   Python    │ │ LLM APIs    │ │ BEAM Cluster│           │
│  │ HTTP/LiveVw │ │   Bridge    │ │  (GPT/etc)  │ │   Nodes     │           │
│  └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘           │
└─────────────────────────────────────────────────────────────────────────────┘
                                   │
┌─────────────────────────────────────────────────────────────────────────────┐
│                          MABEAM Application Layer                           │
│                                                                             │
│  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐           │
│  │Agent Manager│ │Coordination │ │  Variable   │ │   ML Type   │           │
│  │  Registry   │ │  Protocols  │ │Orchestrator │ │   System    │           │
│  │   Lifecycle │ │  Consensus  │ │ Parameters  │ │ Validation  │           │
│  └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘           │
└─────────────────────────────────────────────────────────────────────────────┘
                                   │
┌─────────────────────────────────────────────────────────────────────────────┐
│                       Foundation Infrastructure Layer                       │
│                                                                             │
│  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐           │
│  │  Service    │ │Coordination │ │ Event Store │ │  Telemetry  │           │
│  │ Discovery   │ │ Primitives  │ │    &        │ │     &       │           │
│  │   Config    │ │Distributed  │ │   Config    │ │ Monitoring  │           │
│  └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘           │
└─────────────────────────────────────────────────────────────────────────────┘
                                   │
┌─────────────────────────────────────────────────────────────────────────────┐
│                              BEAM/OTP Platform                              │
│                Process Management │ Fault Tolerance │ Distribution           │
└─────────────────────────────────────────────────────────────────────────────┘

Integration Boundaries Definition

1. Foundation ↔ MABEAM Boundary

Service Discovery Interface

# MABEAM uses Foundation services via ProcessRegistry
defmodule MABEAM.Foundation.Interface do
  @namespace Application.compile_env(:foundation, :namespace, :production)

  def get_config_service() do
    Foundation.ProcessRegistry.lookup({@namespace, :config_server})
  end

  def get_event_store() do
    Foundation.ProcessRegistry.lookup({@namespace, :event_store})
  end

  def get_telemetry_service() do
    Foundation.ProcessRegistry.lookup({@namespace, :telemetry_service})
  end

  # Graceful fallback when Foundation services unavailable
  def with_service_fallback(service_name, primary_fun, fallback_fun) do
    case Foundation.ProcessRegistry.lookup({@namespace, service_name}) do
      {:ok, pid} when is_pid(pid) ->
        try do
          primary_fun.(pid)
        catch
          :exit, {:noproc, _} -> fallback_fun.()
          :exit, {:timeout, _} -> fallback_fun.()
        end
      
      {:error, _} -> 
        fallback_fun.()
    end
  end
end

Configuration Integration

# MABEAM gets configuration through Foundation ConfigServer
defmodule MABEAM.Config do
  def get_agent_config(agent_type) do
    MABEAM.Foundation.Interface.with_service_fallback(
      :config_server,
      fn config_pid ->
        GenServer.call(config_pid, {:get_config, [:mabeam, :agents, agent_type]})
      end,
      fn ->
        # Fallback to application environment
        Application.get_env(:mabeam, :agents)[agent_type]
      end
    )
  end

  def update_coordination_config(new_config) do
    case MABEAM.Foundation.Interface.get_config_service() do
      {:ok, config_pid} ->
        GenServer.call(config_pid, {:update_config, [:mabeam, :coordination], new_config})
      {:error, _} ->
        # Store pending update for when service becomes available
        :ets.insert(:mabeam_pending_config, {:coordination, new_config})
        {:error, :config_service_unavailable}
    end
  end
end

Event Integration

# MABEAM events flow through Foundation EventStore
defmodule MABEAM.Events do
  def emit_agent_event(agent_id, event_type, event_data) do
    event = %{
      source: :mabeam,
      type: event_type,
      agent_id: agent_id,
      timestamp: DateTime.utc_now(),
      data: event_data
    }
    
    MABEAM.Foundation.Interface.with_service_fallback(
      :event_store,
      fn event_store_pid ->
        Foundation.EventStore.append(event_store_pid, event)
      end,
      fn ->
        # Fallback: Store locally and sync later
        :ets.insert(:mabeam_pending_events, {DateTime.utc_now(), event})
        :ok
      end
    )
  end

  def query_agent_history(agent_id, time_range) do
    case MABEAM.Foundation.Interface.get_event_store() do
      {:ok, event_store_pid} ->
        Foundation.EventStore.query(event_store_pid, %{
          source: :mabeam,
          agent_id: agent_id,
          time_range: time_range
        })
      {:error, _} ->
        {:error, :event_store_unavailable}
    end
  end
end

2. Agent ↔ Coordination Boundary

Agent Registration Interface

# Clean interface between individual agents and coordination system
defmodule MABEAM.Agent.CoordinationInterface do
  def register_for_coordination(agent_pid, capabilities) do
    coordination_registration = %{
      agent_pid: agent_pid,
      capabilities: capabilities,
      consensus_eligible: capabilities[:consensus_eligible] || false,
      leadership_eligible: capabilities[:leadership_eligible] || false,
      variable_interests: capabilities[:variable_interests] || [],
      coordination_preferences: capabilities[:coordination_preferences] || %{}
    }
    
    GenServer.call(MABEAM.Coordination, {:register_agent, coordination_registration})
  end

  def participate_in_consensus(agent_pid, proposal_id, vote, rationale) do
    GenServer.cast(MABEAM.Coordination, {
      :consensus_vote, 
      proposal_id, 
      vote, 
      %{agent_pid: agent_pid, rationale: rationale}
    })
  end

  def subscribe_to_variable_updates(agent_pid, variable_names) do
    GenServer.call(MABEAM.Coordination, {
      :subscribe_variables, 
      agent_pid, 
      variable_names
    })
  end
end

Variable Synchronization Interface

defmodule MABEAM.Agent.VariableInterface do
  def update_local_variable(agent_pid, variable_name, new_value, metadata \\ %{}) do
    # Agent proposes variable change to coordination system
    proposal = %{
      variable: variable_name,
      value: new_value,
      proposer: agent_pid,
      metadata: metadata,
      local_update: true
    }
    
    GenServer.call(MABEAM.Coordination, {:propose_variable_change, proposal})
  end

  def handle_variable_update(agent_state, variable_name, new_value, coordination_metadata) do
    # Agent receives variable update from coordination system
    case Map.get(agent_state.variable_handlers, variable_name) do
      handler_fun when is_function(handler_fun) ->
        # Custom handler for this variable
        handler_fun.(agent_state, new_value, coordination_metadata)
        
      nil ->
        # Default handler: update agent's variable map
        new_variables = Map.put(agent_state.variables, variable_name, new_value)
        %{agent_state | variables: new_variables}
    end
  end
end

3. MABEAM ↔ External Systems Boundary

Phoenix Integration Interface

# Phoenix LiveView integration for real-time agent monitoring
defmodule MABEAMWeb.AgentMonitorLive do
  use Phoenix.LiveView
  
  def mount(_params, _session, socket) do
    # Subscribe to MABEAM events
    :ok = MABEAM.Events.subscribe_to_agent_events()
    
    # Get initial agent state
    agents = MABEAM.AgentRegistry.list_all_agents()
    coordination_state = MABEAM.Coordination.get_system_state()
    
    socket = assign(socket, agents: agents, coordination: coordination_state)
    {:ok, socket}
  end

  def handle_info({:agent_event, event}, socket) do
    # Update LiveView with real-time agent events
    updated_agents = update_agent_display(socket.assigns.agents, event)
    {:noreply, assign(socket, agents: updated_agents)}
  end

  def handle_event("start_agent", %{"agent_type" => type}, socket) do
    # Handle user request to start new agent
    case MABEAM.AgentRegistry.start_agent_of_type(type) do
      {:ok, agent_id} ->
        {:noreply, put_flash(socket, :info, "Agent #{agent_id} started")}
      {:error, reason} ->
        {:noreply, put_flash(socket, :error, "Failed to start agent: #{reason}")}
    end
  end
end

Python Bridge Interface

# Python integration for ML library interop
defmodule MABEAM.PythonBridge do
  use GenServer
  
  def start_link(opts \\ []) do
    GenServer.start_link(__MODULE__, opts, name: __MODULE__)
  end

  def execute_python_ml_task(agent_id, python_module, function, args) do
    task_spec = %{
      agent_id: agent_id,
      module: python_module,
      function: function,
      args: args,
      timeout: 30_000
    }
    
    GenServer.call(__MODULE__, {:execute_task, task_spec})
  end

  def init(opts) do
    # Start Python process with ML environment
    python_path = Keyword.get(opts, :python_path, "python3")
    ml_script_path = Keyword.get(opts, :ml_script_path, "./python/ml_bridge.py")
    
    port = Port.open({:spawn, "#{python_path} #{ml_script_path}"}, [
      {:packet, 2},
      :binary,
      :exit_status
    ])
    
    {:ok, %{port: port, pending_tasks: %{}}}
  end

  def handle_call({:execute_task, task_spec}, from, state) do
    task_id = generate_task_id()
    
    # Send task to Python process
    python_request = %{
      id: task_id,
      module: task_spec.module,
      function: task_spec.function,
      args: task_spec.args
    }
    
    Port.command(state.port, :erlang.term_to_binary(python_request))
    
    # Track pending task
    pending_tasks = Map.put(state.pending_tasks, task_id, from)
    
    {:noreply, %{state | pending_tasks: pending_tasks}}
  end

  def handle_info({port, {:data, data}}, %{port: port} = state) do
    # Receive response from Python
    response = :erlang.binary_to_term(data)
    
    case Map.pop(state.pending_tasks, response.id) do
      {from, remaining_tasks} when from != nil ->
        GenServer.reply(from, {:ok, response.result})
        {:noreply, %{state | pending_tasks: remaining_tasks}}
        
      {nil, _} ->
        # Unexpected response
        {:noreply, state}
    end
  end
end

LLM Service Integration

# LLM API integration with provider abstraction
defmodule MABEAM.LLMAdapter do
  @behaviour MABEAM.LLMProvider
  
  def complete_text(provider, prompt, options \\ %{}) do
    adapter_module = get_provider_adapter(provider)
    adapter_module.complete_text(prompt, options)
  end

  def embed_text(provider, text, options \\ %{}) do
    adapter_module = get_provider_adapter(provider)
    adapter_module.embed_text(text, options)
  end

  defp get_provider_adapter(:openai), do: MABEAM.LLMAdapters.OpenAI
  defp get_provider_adapter(:anthropic), do: MABEAM.LLMAdapters.Anthropic
  defp get_provider_adapter(:gemini), do: MABEAM.LLMAdapters.Gemini
end

defmodule MABEAM.LLMAdapters.OpenAI do
  @behaviour MABEAM.LLMProvider

  def complete_text(prompt, options) do
    # OpenAI API integration
    request_body = %{
      model: options[:model] || "gpt-4",
      messages: [%{role: "user", content: prompt}],
      temperature: options[:temperature] || 0.7,
      max_tokens: options[:max_tokens] || 1000
    }
    
    case HTTPoison.post(
      "https://api.openai.com/v1/chat/completions",
      Jason.encode!(request_body),
      [{"authorization", "Bearer #{get_api_key()}"}, {"content-type", "application/json"}]
    ) do
      {:ok, %{status_code: 200, body: body}} ->
        response = Jason.decode!(body)
        {:ok, response["choices"][0]["message"]["content"]}
        
      {:ok, %{status_code: status_code, body: body}} ->
        {:error, {:http_error, status_code, body}}
        
      {:error, reason} ->
        {:error, {:request_failed, reason}}
    end
  end
end

Data Flow Patterns

Configuration Flow

1. Application Start → Foundation.ConfigServer loads base config
2. MABEAM.Application → Requests MABEAM-specific config via Foundation interface
3. Agent Spawn → Agent requests config through MABEAM.Config → Foundation.ConfigServer
4. Runtime Updates → Config changes flow through Foundation → MABEAM → Agents
5. Fallback → If Foundation unavailable, use local/ETS cache

Event Flow

1. Agent Event → MABEAM.Events.emit_agent_event()
2. Foundation Interface → Routes to Foundation.EventStore (if available)
3. Event Processing → Foundation processes and potentially triggers alerts
4. Telemetry → Events converted to metrics via Foundation.TelemetryService  
5. External Systems → Phoenix LiveView, monitoring dashboards receive updates
6. Fallback → Events cached locally if Foundation unavailable

Coordination Flow

1. Variable Change → Agent proposes through MABEAM.Agent.CoordinationInterface
2. Consensus → MABEAM.Coordination coordinates agreement across agents
3. Decision → Consensus result broadcast to all interested agents
4. Synchronization → Agents update local state with new variable values
5. Persistence → Variable changes logged to Foundation.EventStore
6. Telemetry → Coordination metrics emitted to Foundation.TelemetryService

Task Execution Flow

1. Task Request → External system requests ML task via Phoenix/API
2. Agent Selection → MABEAM.LoadBalancer selects appropriate agent
3. Task Assignment → Task routed to selected agent process  
4. Execution → Agent executes task (potentially using Python bridge)
5. Coordination → Agent coordinates with others if needed (consensus/barriers)
6. Result → Task result returned through Phoenix/API
7. Logging → Execution logged to Foundation.EventStore

Integration Testing Strategy

Boundary Testing

defmodule MABEAMIntegrationTest do
  use ExUnit.Case
  
  test "MABEAM operates with Foundation services available" do
    # Start both Foundation and MABEAM applications
    {:ok, _} = Foundation.Application.start(:normal, [])
    {:ok, _} = MABEAM.Application.start(:normal, [])
    
    # Test service discovery works
    {:ok, config_pid} = MABEAM.Foundation.Interface.get_config_service()
    assert is_pid(config_pid)
    
    # Test configuration flow
    config = MABEAM.Config.get_agent_config(:coder_agent)
    assert config != nil
    
    # Test event flow
    :ok = MABEAM.Events.emit_agent_event(:test_agent, :started, %{})
    
    # Verify event was stored
    events = Foundation.EventStore.query(%{source: :mabeam})
    assert length(events) > 0
  end
  
  test "MABEAM operates with Foundation services unavailable" do
    # Start only MABEAM (Foundation services not available)
    {:ok, _} = MABEAM.Application.start(:normal, [])
    
    # Test graceful fallback for configuration
    config = MABEAM.Config.get_agent_config(:coder_agent)
    assert config != nil  # Should fallback to application env
    
    # Test graceful fallback for events
    :ok = MABEAM.Events.emit_agent_event(:test_agent, :started, %{})
    
    # Verify event was cached locally
    cached_events = :ets.tab2list(:mabeam_pending_events)
    assert length(cached_events) > 0
  end
  
  test "end-to-end agent coordination" do
    setup_test_environment()
    
    # Create multiple agents
    {:ok, agent1} = MABEAM.AgentRegistry.start_agent(:coder_agent)
    {:ok, agent2} = MABEAM.AgentRegistry.start_agent(:reviewer_agent)
    
    # Test variable coordination
    :ok = MABEAM.Coordination.propose_variable_change(:learning_rate, 0.01)
    
    # Wait for consensus
    :timer.sleep(1000)
    
    # Verify all agents received update
    assert get_agent_variable(agent1, :learning_rate) == 0.01
    assert get_agent_variable(agent2, :learning_rate) == 0.01
    
    # Test coordination survives agent restart
    MABEAM.AgentRegistry.stop_agent(agent1)
    {:ok, agent1_restarted} = MABEAM.AgentRegistry.start_agent(:coder_agent)
    
    # New agent should receive current variable state
    assert get_agent_variable(agent1_restarted, :learning_rate) == 0.01
  end
end

Fault Tolerance Testing

defmodule MABEAMFaultToleranceTest do
  test "system handles Foundation service crashes" do
    setup_integrated_system()
    
    # Get Foundation ConfigServer PID
    {:ok, config_pid} = Foundation.ProcessRegistry.lookup({:test, :config_server})
    
    # Crash the service
    Process.exit(config_pid, :kill)
    
    # MABEAM should continue operating with fallback
    config = MABEAM.Config.get_agent_config(:coder_agent)
    assert config != nil
    
    # Service should restart automatically
    :timer.sleep(1000)
    {:ok, new_config_pid} = Foundation.ProcessRegistry.lookup({:test, :config_server})
    assert new_config_pid != config_pid
    assert Process.alive?(new_config_pid)
  end
  
  test "coordination handles agent crashes during consensus" do
    agents = setup_consensus_test_agents(5)
    
    # Start consensus
    proposal_id = start_test_consensus(:learning_rate, 0.01)
    
    # Crash two agents during voting
    [agent1, agent2 | _] = agents
    Process.exit(agent1, :kill)
    Process.exit(agent2, :kill)
    
    # Consensus should still complete with remaining agents
    assert_consensus_completes(proposal_id, :accept)
    
    # Verify variable updated on surviving agents
    for agent <- agents -- [agent1, agent2] do
      if Process.alive?(agent) do
        assert get_agent_variable(agent, :learning_rate) == 0.01
      end
    end
  end
end

Performance Characteristics

Latency Profiles

Service Discovery: <1ms (ETS lookup)
Configuration Retrieval: <5ms (GenServer call)
Event Emission: <2ms (async cast)
Variable Synchronization: <10ms (consensus completion)
Agent Coordination: <50ms (multi-agent consensus)

Throughput Characteristics

Agent Events: 10,000+ events/second
Variable Updates: 1,000+ updates/second
Consensus Operations: 100+ consensus/second
Service Calls: 50,000+ calls/second

Resource Usage

Memory per Agent: ~1MB baseline
CPU per Coordination: ~1% single core
Network Bandwidth: <100KB/s per node for coordination
Storage Growth: ~1GB/day events with 1000 agents

Summary

This integration architecture provides:

Clean Boundaries: Well-defined interfaces between all major components
Fault Tolerance: Graceful fallbacks when services unavailable
Scalability: Efficient communication patterns for large agent networks
Testability: Integration boundaries enable comprehensive testing
Extensibility: Clear extension points for new services and agents
Performance: Optimized data flows with minimal overhead
Observability: Comprehensive event/telemetry integration

The boundary design ensures that:

Components can be developed and tested independently
System continues operating with partial service availability
New capabilities can be added without disrupting existing functionality
Performance characteristics are predictable and measurable
Operational monitoring provides full system visibility

This architecture foundation enables building sophisticated multi-agent ML systems with enterprise-grade reliability and performance.