103 CLAUDE TEST

Documentation for 103_CLAUDE_TEST from the Ds ex repository.

Of course. Here is the continuation of the exhaustive test structure for DSPEx, covering the advanced modules from Stage 6 of your implementation plan.

This completes the test suite skeleton, providing a clear roadmap for test-driven development of the entire framework.

`test/dspex/chain_of_thought_test.exs`

This suite tests the ChainOfThought program, which extends Predict by prompting for an intermediate reasoning step.

# test/dspex/chain_of_thought_test.exs
defmodule DSPEx.ChainOfThoughtTest do
  use ExUnit.Case, async: true
  import Mox

  alias DSPEx.ChainOfThought

  defmodule QASig do
    use DSPEx.Signature, "question -> answer"
  end

  describe "ChainOfThought Initialization" do
    test "new/2 creates a CoT struct with a base signature and client"
    test "automatically creates an extended signature with a 'rationale' field"
    test "the extended signature places 'rationale' before other output fields"
    test "can be initialized with a custom rationale field name (e.g., :reasoning)"
  end

  describe "forward/2 Execution" do
    # These tests require mocking the adapter and client.
    setup :verify_on_exit!

    test "successfully executes the full pipeline for a CoT prompt" do
      # Mock the adapter to check that it's called with the *extended* signature.
      # Mock the client to return a response with both rationale and answer.
    end

    test "returns a Prediction struct containing both :rationale and :answer in its outputs"
    test "propagates errors from the client or adapter"
  end

  describe "Integration with Adapters" do
    test "the default Chat adapter formats a prompt that explicitly asks for a rationale"
    test "the default Chat adapter can parse both a rationale and an answer"
  end
end

`test/dspex/multi_chain_comparison_test.exs`

This suite tests the MultiChainComparison module, which is a “meta-program” for refining answers.

# test/dspex/multi_chain_comparison_test.exs
defmodule DSPEx.MultiChainComparisonTest do
  use ExUnit.Case, async: true
  import Mox

  alias DSPEx.MultiChainComparison
  alias DSPEx.Prediction

  defmodule CompareSig do
    # The signature for the comparison task itself
    use DSPEx.Signature, "question, candidate_answers -> best_answer"
  end

  setup do
    %{
      predictions: [
        %Prediction{outputs: %{answer: "Paris"}},
        %Prediction{outputs: %{answer: "The City of Light"}},
        %Prediction{outputs: %{answer: "paris"}}
      ]
    }
  end

  describe "MultiChainComparison Initialization" do
    test "initializes with a signature for the comparison logic"
  end

  describe "forward/2 Execution Logic" do
    setup :verify_on_exit!

    test "formats a prompt containing the question and all candidate answers"
    test "calls the LLM client to get the best answer"
    test "parses the final 'best_answer' from the LLM's response"
    test "returns a final Prediction struct"
  end

  describe "Edge Case Handling" do
    test "handles an empty list of candidate predictions"
    test "handles a list with only one candidate prediction"
  end
end

`test/dspex/parallel_test.exs`

This suite tests the Parallel executor, focusing on concurrency and result aggregation.

# test/dspex/parallel_test.exs
defmodule DSPEx.ParallelTest do
  use ExUnit.Case

  alias DSPEx.Parallel

  defmodule MockSlowProgram do
    @behaviour DSPEx.Program
    def forward(_program, %{id: id, sleep: sleep_ms}) do
      Process.sleep(sleep_ms)
      {:ok, %{result: "Program #{id} done"}}
    end
    def forward(_program, %{id: id, should_fail: true}) do
       Process.sleep(10)
      {:error, "Program #{id} failed"}
    end
    def configure(p, c), do: Map.merge(p, c)
  end

  describe "Parallel Execution" do
    test "executes a list of programs concurrently" do
      programs = [
        {%MockSlowProgram{}, %{id: 1, sleep: 100}},
        {%MockSlowProgram{}, %{id: 2, sleep: 100}},
        {%MockSlowProgram{}, %{id: 3, sleep: 100}}
      ]

      # The total time should be slightly more than 100ms, not 300ms.
      assert {:ok, _results} = :timer.tc(fn -> Parallel.run(programs) end) |> then(fn {time, res} ->
        assert time < 200_000 # Time in microseconds
        res
      end)
    end

    test "returns results in the same order as the input programs"
  end

  describe "Result and Error Handling" do
    test "aggregates both :ok and :error tuples from the program executions"
    test "a single failing program does not stop the execution of others"
  end
end

`test/dspex/retriever_test.exs`

This suite tests the Retriever behaviour and its integration into a RAG program.

# test/dspex/retriever_test.exs
defmodule DSPEx.RetrieverTest do
  use ExUnit.Case, async: true
  import Mox

  # --- Mocks and Test Modules ---
  defmodule MockRetriever do
    @behaviour DSPEx.Retriever
    def forward(_re, "query1"), do: {:ok, [%{text: "Passage A"}, %{text: "Passage B"}]}
    def forward(_re, _any_query), do: {:ok, []}
  end

  defmodule RAG do
    @behaviour DSPEx.Program
    # A simplified RAG module for testing.
    defstruct [:retriever, :predictor]

    def forward(%{retriever: r, predictor: p} = _program, inputs) do
      with {:ok, passages} <- DSPEx.Retriever.forward(r, inputs.question),
           # Format context for the predictor
           context = Enum.map_join(passages, "\n", & &1.text),
           {:ok, prediction} <- DSPEx.Program.forward(p, Map.put(inputs, :context, context)) do
        {:ok, prediction}
      else
        error -> error
      end
    end
    def configure(p, c), do: Map.merge(p, c)
  end
  # --- End of Test Modules ---

  describe "Retriever Behaviour" do
    test "forward/2 takes a query and returns a list of passages"
  end

  describe "RAG Program Integration" do
    setup :verify_on_exit!

    test "a RAG program successfully orchestrates retrieval and prediction" do
      # Mock the Predictor's forward call to assert it receives the context.
      predictor_mock = mock_program()
      expect(DSPEx.Program, :forward, fn ^predictor_mock, %{context: context, question: "query1"} ->
        assert context == "Passage A\nPassage B"
        {:ok, :final_prediction}
      end)

      rag_program = %RAG{retriever: %MockRetriever{}, predictor: predictor_mock}
      assert {:ok, :final_prediction} = DSPEx.Program.forward(rag_program, %{question: "query1"})
    end
  end
end

`test/integration/full_pipeline_test.exs`

This suite performs a full end-to-end integration test of a common use-case, like an optimized RAG pipeline.

# test/integration/full_pipeline_test.exs
defmodule DSPEx.Integration.FullPipelineTest do
  use ExUnit.Case
  # Do not run tests concurrently as they involve multiple components
  # and could rely on shared mocks or state.

  # This test suite will be more complex, requiring mocks for the client,
  # a functional in-memory retriever, and real program/teleprompter modules.

  describe "Optimized RAG Pipeline" do
    test "BootstrapFewShot can successfully compile a RAG program"
    test "the compiled RAG program produces better results than the uncompiled one"
    test "the full flow from question -> retrieve -> predict -> evaluate works"
  end

  describe "Optimized ChainOfThought Pipeline" do
    test "BootstrapFewShot can successfully compile a ChainOfThought program"
    test "the compiled CoT program generates better rationales and answers"
  end
end

`test/property/dspex_property_test.exs`

This suite uses property-based testing to verify system invariants and find edge cases automatically.

# test/property/dspex_property_test.exs
defmodule DSPEx.PropertyTest do
  use ExUnit.Case
  use PropCheck

  # --- Generators for property testing ---
  # Example generator:
  let :field_name, do: string(?a..?z, min: 1, max: 10) |> map(&String.to_atom/1)
  let :field_list, do: list_of(:field_name) |> uniq()
  # ---

  describe "Signature Properties" do
    property "parsing is robust against unusual but valid spacing"
    property "input and output fields of a parsed signature are always disjoint"
  end

  describe "Example Properties" do
    property "for any example, inputs() and labels() are always disjoint"
    property "for any example, the keys of inputs() and labels() combined equal all data keys"
  end

  describe "Adapter Properties" do
    # This property checks for a form of round-tripping.
    property "parsing a formatted prompt recovers the original data structure"
  end
end