Of course. Here is the continuation of the exhaustive test structure for DSPEx
, covering the advanced modules from Stage 6 of your implementation plan.
This completes the test suite skeleton, providing a clear roadmap for test-driven development of the entire framework.
test/dspex/chain_of_thought_test.exs
This suite tests the ChainOfThought
program, which extends Predict
by prompting for an intermediate reasoning step.
# test/dspex/chain_of_thought_test.exs
defmodule DSPEx.ChainOfThoughtTest do
use ExUnit.Case, async: true
import Mox
alias DSPEx.ChainOfThought
defmodule QASig do
use DSPEx.Signature, "question -> answer"
end
describe "ChainOfThought Initialization" do
test "new/2 creates a CoT struct with a base signature and client"
test "automatically creates an extended signature with a 'rationale' field"
test "the extended signature places 'rationale' before other output fields"
test "can be initialized with a custom rationale field name (e.g., :reasoning)"
end
describe "forward/2 Execution" do
# These tests require mocking the adapter and client.
setup :verify_on_exit!
test "successfully executes the full pipeline for a CoT prompt" do
# Mock the adapter to check that it's called with the *extended* signature.
# Mock the client to return a response with both rationale and answer.
end
test "returns a Prediction struct containing both :rationale and :answer in its outputs"
test "propagates errors from the client or adapter"
end
describe "Integration with Adapters" do
test "the default Chat adapter formats a prompt that explicitly asks for a rationale"
test "the default Chat adapter can parse both a rationale and an answer"
end
end
test/dspex/multi_chain_comparison_test.exs
This suite tests the MultiChainComparison
module, which is a “meta-program” for refining answers.
# test/dspex/multi_chain_comparison_test.exs
defmodule DSPEx.MultiChainComparisonTest do
use ExUnit.Case, async: true
import Mox
alias DSPEx.MultiChainComparison
alias DSPEx.Prediction
defmodule CompareSig do
# The signature for the comparison task itself
use DSPEx.Signature, "question, candidate_answers -> best_answer"
end
setup do
%{
predictions: [
%Prediction{outputs: %{answer: "Paris"}},
%Prediction{outputs: %{answer: "The City of Light"}},
%Prediction{outputs: %{answer: "paris"}}
]
}
end
describe "MultiChainComparison Initialization" do
test "initializes with a signature for the comparison logic"
end
describe "forward/2 Execution Logic" do
setup :verify_on_exit!
test "formats a prompt containing the question and all candidate answers"
test "calls the LLM client to get the best answer"
test "parses the final 'best_answer' from the LLM's response"
test "returns a final Prediction struct"
end
describe "Edge Case Handling" do
test "handles an empty list of candidate predictions"
test "handles a list with only one candidate prediction"
end
end
test/dspex/parallel_test.exs
This suite tests the Parallel
executor, focusing on concurrency and result aggregation.
# test/dspex/parallel_test.exs
defmodule DSPEx.ParallelTest do
use ExUnit.Case
alias DSPEx.Parallel
defmodule MockSlowProgram do
@behaviour DSPEx.Program
def forward(_program, %{id: id, sleep: sleep_ms}) do
Process.sleep(sleep_ms)
{:ok, %{result: "Program #{id} done"}}
end
def forward(_program, %{id: id, should_fail: true}) do
Process.sleep(10)
{:error, "Program #{id} failed"}
end
def configure(p, c), do: Map.merge(p, c)
end
describe "Parallel Execution" do
test "executes a list of programs concurrently" do
programs = [
{%MockSlowProgram{}, %{id: 1, sleep: 100}},
{%MockSlowProgram{}, %{id: 2, sleep: 100}},
{%MockSlowProgram{}, %{id: 3, sleep: 100}}
]
# The total time should be slightly more than 100ms, not 300ms.
assert {:ok, _results} = :timer.tc(fn -> Parallel.run(programs) end) |> then(fn {time, res} ->
assert time < 200_000 # Time in microseconds
res
end)
end
test "returns results in the same order as the input programs"
end
describe "Result and Error Handling" do
test "aggregates both :ok and :error tuples from the program executions"
test "a single failing program does not stop the execution of others"
end
end
test/dspex/retriever_test.exs
This suite tests the Retriever
behaviour and its integration into a RAG
program.
# test/dspex/retriever_test.exs
defmodule DSPEx.RetrieverTest do
use ExUnit.Case, async: true
import Mox
# --- Mocks and Test Modules ---
defmodule MockRetriever do
@behaviour DSPEx.Retriever
def forward(_re, "query1"), do: {:ok, [%{text: "Passage A"}, %{text: "Passage B"}]}
def forward(_re, _any_query), do: {:ok, []}
end
defmodule RAG do
@behaviour DSPEx.Program
# A simplified RAG module for testing.
defstruct [:retriever, :predictor]
def forward(%{retriever: r, predictor: p} = _program, inputs) do
with {:ok, passages} <- DSPEx.Retriever.forward(r, inputs.question),
# Format context for the predictor
context = Enum.map_join(passages, "\n", & &1.text),
{:ok, prediction} <- DSPEx.Program.forward(p, Map.put(inputs, :context, context)) do
{:ok, prediction}
else
error -> error
end
end
def configure(p, c), do: Map.merge(p, c)
end
# --- End of Test Modules ---
describe "Retriever Behaviour" do
test "forward/2 takes a query and returns a list of passages"
end
describe "RAG Program Integration" do
setup :verify_on_exit!
test "a RAG program successfully orchestrates retrieval and prediction" do
# Mock the Predictor's forward call to assert it receives the context.
predictor_mock = mock_program()
expect(DSPEx.Program, :forward, fn ^predictor_mock, %{context: context, question: "query1"} ->
assert context == "Passage A\nPassage B"
{:ok, :final_prediction}
end)
rag_program = %RAG{retriever: %MockRetriever{}, predictor: predictor_mock}
assert {:ok, :final_prediction} = DSPEx.Program.forward(rag_program, %{question: "query1"})
end
end
end
test/integration/full_pipeline_test.exs
This suite performs a full end-to-end integration test of a common use-case, like an optimized RAG pipeline.
# test/integration/full_pipeline_test.exs
defmodule DSPEx.Integration.FullPipelineTest do
use ExUnit.Case
# Do not run tests concurrently as they involve multiple components
# and could rely on shared mocks or state.
# This test suite will be more complex, requiring mocks for the client,
# a functional in-memory retriever, and real program/teleprompter modules.
describe "Optimized RAG Pipeline" do
test "BootstrapFewShot can successfully compile a RAG program"
test "the compiled RAG program produces better results than the uncompiled one"
test "the full flow from question -> retrieve -> predict -> evaluate works"
end
describe "Optimized ChainOfThought Pipeline" do
test "BootstrapFewShot can successfully compile a ChainOfThought program"
test "the compiled CoT program generates better rationales and answers"
end
end
test/property/dspex_property_test.exs
This suite uses property-based testing to verify system invariants and find edge cases automatically.
# test/property/dspex_property_test.exs
defmodule DSPEx.PropertyTest do
use ExUnit.Case
use PropCheck
# --- Generators for property testing ---
# Example generator:
let :field_name, do: string(?a..?z, min: 1, max: 10) |> map(&String.to_atom/1)
let :field_list, do: list_of(:field_name) |> uniq()
# ---
describe "Signature Properties" do
property "parsing is robust against unusual but valid spacing"
property "input and output fields of a parsed signature are always disjoint"
end
describe "Example Properties" do
property "for any example, inputs() and labels() are always disjoint"
property "for any example, the keys of inputs() and labels() combined equal all data keys"
end
describe "Adapter Properties" do
# This property checks for a form of round-tripping.
property "parsing a formatted prompt recovers the original data structure"
end
end