DUAL IMPLEMENTATION SUPPORT SEAMLESS 20250719

Documentation for DUAL_IMPLEMENTATION_SUPPORT_SEAMLESS_20250719 from the Dspex repository.

DSPex Dual Implementation Support Architecture

Executive Summary

This document outlines the architecture for seamlessly supporting both native Elixir and Python DSPy implementations within DSPex. The design allows gradual migration from Python-backed modules to native implementations while maintaining a stable, unified API for users.

Architecture Overview

Core Principle

DSPex provides a unified interface that automatically routes to the best available implementation:

Native Elixir: Direct BEAM execution for maximum performance
Python DSPy: Via Snakepit for modules not yet ported

Key Design Decisions

No Snakepit for Native Code: Native implementations run directly in BEAM
Transparent Routing: Users don’t need to know which implementation is used
Gradual Migration: Port modules one at a time without breaking changes
Performance First: Native implementations get priority when available

Implementation Architecture

1. Module Reference Structure

Each DSPex module returns a tagged reference indicating its implementation:

# Native implementation returns
{:ok, {:native, %NativeModuleState{}}}

# Python implementation returns  
{:ok, {:python, session_id, module_id}}

2. Unified Module Interface

All convenience wrappers follow this pattern:

defmodule DSPex.Modules.ChainOfThought do
  @moduledoc """
  Chain of Thought reasoning module.
  Automatically routes to native or Python implementation.
  """
  
  def create(signature, opts \\ []) do
    case select_implementation(opts) do
      :native ->
        DSPex.Native.ChainOfThought.create(signature, opts)
        
      :python ->
        session_id = ensure_session(opts)
        id = DSPex.Utils.ID.generate("cot")
        
        case Snakepit.Python.call("dspy.ChainOfThought", 
          %{signature: signature}, 
          [store_as: id, session_id: session_id] ++ opts) do
          {:ok, _} -> {:ok, {:python, session_id, id}}
          error -> error
        end
    end
  end
  
  def execute({:native, module_state}, inputs, opts) do
    DSPex.Native.ChainOfThought.execute(module_state, inputs, opts)
  end
  
  def execute({:python, session_id, module_id}, inputs, opts) do
    Snakepit.Python.call("stored.#{module_id}.__call__", inputs, 
      [session_id: session_id] ++ opts)
  end
  
  defp select_implementation(opts) do
    cond do
      opts[:implementation] == :native && native_available?() -> :native
      opts[:implementation] == :python -> :python
      native_available?() && prefer_native?() -> :native
      true -> :python
    end
  end
  
  defp native_available? do
    Code.ensure_loaded?(DSPex.Native.ChainOfThought)
  end
  
  defp prefer_native? do
    Application.get_env(:dspex, :prefer_native, true)
  end
end

3. Implementation Registry

Track which modules have native implementations:

defmodule DSPex.Implementation.Registry do
  @moduledoc """
  Registry of available implementations for each DSPex module.
  """
  
  @native_implementations %{
    # Already native
    signature: true,
    template: true,
    validator: true,
    metrics: true,
    
    # Python only (for now)
    predict: false,
    chain_of_thought: false,
    react: false,
    program_of_thought: false,
    multi_chain_comparison: false,
    retry: false,
    
    # Optimizers
    bootstrap_few_shot: false,
    mipro: false,
    mipro_v2: false,
    copro: false,
    
    # Retrievers
    colbert_v2: false,
    retrieve: false
  }
  
  def has_native?(module_type) do
    Map.get(@native_implementations, module_type, false)
  end
  
  def list_by_implementation do
    Enum.group_by(@native_implementations, fn {_, native} -> 
      if native, do: :native, else: :python
    end)
  end
end

4. Router Enhancement

The existing Router module handles implementation selection:

defmodule DSPex.Router do
  @moduledoc """
  Routes operations to appropriate implementations with telemetry.
  """
  
  def route(operation, module_type, args, opts) do
    implementation = select_implementation(module_type, opts)
    
    :telemetry.execute(
      [:dspex, :router, :route],
      %{timestamp: System.monotonic_time()},
      %{
        operation: operation,
        module_type: module_type,
        implementation: implementation,
        native_available: DSPex.Implementation.Registry.has_native?(module_type)
      }
    )
    
    case implementation do
      :native ->
        route_to_native(operation, module_type, args, opts)
        
      :python ->
        route_to_python(operation, module_type, args, opts)
    end
  end
  
  defp route_to_native(operation, module_type, args, opts) do
    module = Module.concat([DSPex, Native, Macro.camelize(to_string(module_type))])
    apply(module, operation, [args, opts])
  end
  
  defp route_to_python(operation, module_type, args, opts) do
    # Use existing Python bridge via Snakepit
    DSPex.Python.Bridge.execute(nil, operation, args, opts)
  end
end

5. Session Management Abstraction

Hide session complexity from users:

defmodule DSPex.Session do
  @moduledoc """
  Unified session management for both native and Python implementations.
  """
  
  def with_session(fun, opts \\ []) do
    case determine_session_need(opts) do
      :none ->
        # Native implementation, no session needed
        fun.(opts)
        
      :python ->
        # Python implementation needs Snakepit session
        session_id = Snakepit.Python.create_session()
        try do
          fun.(Keyword.put(opts, :session_id, session_id))
        after
          Snakepit.Python.destroy_session(session_id)
        end
    end
  end
  
  defp determine_session_need(opts) do
    if opts[:implementation] == :native || all_native?(opts[:modules]) do
      :none
    else
      :python
    end
  end
end

Migration Path

Phase 1: Python-First (Current State)

All DSPy modules implemented via Python wrappers
Snakepit handles all execution
Native only for signatures, templates, validators

Phase 2: High-Value Native Modules

Priority modules to implement natively:

Predict - Most basic, high-frequency operation
ChainOfThought - Popular reasoning module
Evaluation.Metrics - Performance-critical scoring

Phase 3: Optimizers

Native implementations of optimization algorithms:

BootstrapFewShot - Most commonly used
MIPRO/MIPROv2 - Complex but high-value

Phase 4: Advanced Modules

ReAct - Requires tool integration design
Retrievers - May keep some as Python for vector DB compatibility

Configuration

# config/config.exs
config :dspex,
  # Implementation preferences
  prefer_native: true,              # Use native when available
  allow_fallback: true,            # Fall back to Python if native fails
  
  # Module-specific overrides
  implementation_overrides: %{
    # Force specific implementations
    chain_of_thought: :python,     # Use Python even if native exists
    predict: :native               # Use native even in Python-first mode
  },
  
  # Performance settings
  implementation_cache: true,       # Cache implementation decisions
  telemetry_enabled: true          # Track implementation usage

Usage Examples

Basic Usage (Implementation Transparent)

# Users don't need to know which implementation is used
{:ok, cot} = DSPex.Modules.ChainOfThought.create("question -> answer")
{:ok, result} = DSPex.Modules.ChainOfThought.execute(cot, %{
  question: "What is machine learning?"
})

Forcing Implementation

# Force native implementation
{:ok, cot} = DSPex.Modules.ChainOfThought.create(
  "question -> answer",
  implementation: :native
)

# Force Python implementation
{:ok, cot} = DSPex.Modules.ChainOfThought.create(
  "question -> answer", 
  implementation: :python
)

Mixed Pipeline

# Pipeline automatically uses best implementation for each step
pipeline = DSPex.pipeline([
  # Native signature parsing
  {:native, DSPex.Native.Signature, spec: "query -> keywords: list[str]"},
  
  # Python ChainOfThought (not yet native)
  {:auto, DSPex.Modules.ChainOfThought, signature: "keywords -> analysis"},
  
  # Native template rendering
  {:native, DSPex.Native.Template, template: "Analysis: <%= @analysis %>"}
])

{:ok, result} = DSPex.run_pipeline(pipeline, %{query: "explain DSPy"})

Performance Characteristics

Native Implementations

Latency: < 0.1ms for most operations
Throughput: 500k+ ops/sec
Memory: Shared BEAM memory, no serialization
Scaling: Limited by BEAM scheduler

Python Implementations (via Snakepit)

Latency: 2-100ms depending on operation
Throughput: 1k-50k ops/sec
Memory: Separate Python processes, serialization overhead
Scaling: Limited by pool size and Python GIL

Testing Strategy

1. Implementation Parity Tests

defmodule DSPex.Test.ParityTest do
  use ExUnit.Case
  
  @modules_with_native [:predict, :chain_of_thought]
  
  for module <- @modules_with_native do
    test "#{module} native and Python produce equivalent results" do
      signature = "input -> output"
      inputs = %{input: "test"}
      
      # Test Python implementation
      {:ok, py_mod} = DSPex.Modules.unquote(module).create(
        signature, 
        implementation: :python
      )
      {:ok, py_result} = DSPex.Modules.unquote(module).execute(py_mod, inputs)
      
      # Test native implementation
      {:ok, native_mod} = DSPex.Modules.unquote(module).create(
        signature,
        implementation: :native
      )
      {:ok, native_result} = DSPex.Modules.unquote(module).execute(
        native_mod, 
        inputs
      )
      
      # Results should be equivalent
      assert equivalent_results?(py_result, native_result)
    end
  end
end

2. Performance Benchmarks

defmodule DSPex.Benchmark do
  def compare_implementations(module_type, inputs) do
    Benchee.run(%{
      "native" => fn ->
        {:ok, m} = DSPex.Modules.create(module_type, "input -> output", 
          implementation: :native)
        DSPex.Modules.execute(m, inputs)
      end,
      "python" => fn ->
        {:ok, m} = DSPex.Modules.create(module_type, "input -> output",
          implementation: :python)
        DSPex.Modules.execute(m, inputs)
      end
    })
  end
end

Benefits

Seamless Migration: Port modules without changing user code
Performance Optimization: Native modules run at full BEAM speed
Flexibility: Choose implementation based on needs
Gradual Adoption: No “big bang” migration required
Fallback Safety: Python implementation always available
Clean Architecture: Clear separation of concerns

Future Considerations

WebAssembly Integration

Potential third implementation type for compute-intensive operations
Would follow same pattern: {:wasm, module_ref}

Distributed Execution

Native implementations can leverage distributed BEAM
Python implementations could use distributed Snakepit pools

Hot Code Reloading

Native implementations support BEAM hot code reloading
Python implementations require session restart

Conclusion

This dual implementation architecture allows DSPex to evolve from a Python DSPy bridge to a native Elixir implementation while maintaining API stability. Users get the best of both worlds: immediate access to all DSPy functionality via Python, with gradual performance improvements as modules are ported to native Elixir.