Phase 1: Schema Engine Implementation Guide
Detailed Implementation Strategy for Sinter-Powered Schema System
Implementation Overview
This document provides the detailed implementation strategy for the Schema Engine component of Phase 1, including code examples, integration patterns, and step-by-step development approach.
Core Implementation Components
1. Schema Definition Module
defmodule ElixirML.Schema.Definition do
@moduledoc """
Core module for schema definition functionality.
Provides the foundation for schema creation and validation.
"""
defmacro __using__(_opts) do
quote do
# Initialize schema attributes
Module.register_attribute(__MODULE__, :fields, accumulate: true)
Module.register_attribute(__MODULE__, :validations, accumulate: true)
Module.register_attribute(__MODULE__, :transforms, accumulate: true)
# Import schema DSL
import ElixirML.Schema.DSL
# Ensure compilation hooks
@before_compile ElixirML.Schema.Definition
end
end
defmacro __before_compile__(env) do
fields = Module.get_attribute(env.module, :fields, []) |> Enum.reverse()
validations = Module.get_attribute(env.module, :validations, []) |> Enum.reverse()
transforms = Module.get_attribute(env.module, :transforms, []) |> Enum.reverse()
quote do
def __schema_fields__, do: unquote(Macro.escape(fields))
def __schema_validations__, do: unquote(Macro.escape(validations))
def __schema_transforms__, do: unquote(Macro.escape(transforms))
def __schema_metadata__ do
%{
fields: __schema_fields__(),
validations: __schema_validations__(),
transforms: __schema_transforms__(),
module: __MODULE__,
compiled_at: unquote(DateTime.utc_now())
}
end
end
end
end
2. Schema DSL Implementation
defmodule ElixirML.Schema.DSL do
@moduledoc """
Domain Specific Language for schema definition.
Provides macros for field definition, validation, and transformation.
"""
defmacro field(name, type, opts \\ []) do
quote do
@fields {unquote(name), unquote(type), unquote(opts)}
end
end
defmacro validation(name, do: block) do
quote do
@validations {unquote(name), fn data ->
unquote(block)
end}
end
end
defmacro transform(name, do: block) do
quote do
@transforms {unquote(name), fn data ->
unquote(block)
end}
end
end
# Helper functions for use within validations and transforms
defmacro field(field_name) do
quote do
Map.get(var!(data), unquote(field_name))
end
end
defmacro update_field(field_name, transform_fn) do
quote do
Map.update(var!(data), unquote(field_name), nil, unquote(transform_fn))
end
end
defmacro require_field(field_name) do
quote do
case Map.get(var!(data), unquote(field_name)) do
nil -> {:error, "Required field #{unquote(field_name)} is missing"}
value -> {:ok, value}
end
end
end
end
3. Validation Engine Implementation
defmodule ElixirML.Schema.Validator do
@moduledoc """
High-performance validation engine with comprehensive error reporting.
"""
alias ElixirML.Schema.Types
@doc """
Validate data against a schema module.
"""
def validate(data, schema_module, opts \\ []) do
metadata = schema_module.__schema_metadata__()
context = %{
schema_module: schema_module,
strict: Keyword.get(opts, :strict, true),
transform: Keyword.get(opts, :transform, true),
path: [],
errors: []
}
with {:ok, field_validated} <- validate_fields(data, metadata.fields, context),
{:ok, custom_validated} <- apply_custom_validations(field_validated, metadata.validations, context) do
final_data = if context.transform do
apply_transforms(custom_validated, metadata.transforms)
else
custom_validated
end
{:ok, final_data}
else
{:error, errors} -> {:error, format_validation_errors(errors)}
end
end
defp validate_fields(data, fields, context) do
{validated_data, errors} =
Enum.reduce(fields, {%{}, []}, fn {field_name, field_type, field_opts}, {acc_data, acc_errors} ->
case validate_single_field(data, field_name, field_type, field_opts, context) do
{:ok, validated_value} ->
{Map.put(acc_data, field_name, validated_value), acc_errors}
{:error, error} ->
{acc_data, [error | acc_errors]}
end
end)
case errors do
[] -> {:ok, validated_data}
_ -> {:error, Enum.reverse(errors)}
end
end
defp validate_single_field(data, field_name, field_type, field_opts, context) do
field_path = [field_name | context.path]
case Map.get(data, field_name) do
nil ->
handle_missing_field(field_name, field_opts, field_path)
value ->
validate_field_value(value, field_type, field_opts, field_path)
end
end
defp handle_missing_field(field_name, field_opts, field_path) do
cond do
Keyword.get(field_opts, :required, false) ->
{:error, %{
path: Enum.reverse(field_path),
field: field_name,
message: "Required field is missing",
code: :required_field_missing
}}
Keyword.has_key?(field_opts, :default) ->
{:ok, Keyword.get(field_opts, :default)}
true ->
{:ok, nil}
end
end
defp validate_field_value(value, field_type, field_opts, field_path) do
with {:ok, type_validated} <- Types.validate_type(value, field_type),
{:ok, constraint_validated} <- validate_field_constraints(type_validated, field_opts) do
{:ok, constraint_validated}
else
{:error, message} ->
{:error, %{
path: Enum.reverse(field_path),
message: message,
value: value,
expected_type: field_type,
code: :validation_failed
}}
end
end
defp validate_field_constraints(value, field_opts) do
constraints = Keyword.get(field_opts, :constraints, [])
Enum.reduce_while(constraints, {:ok, value}, fn constraint, {:ok, val} ->
case apply_constraint(val, constraint) do
{:ok, new_val} -> {:cont, {:ok, new_val}}
{:error, _} = error -> {:halt, error}
end
end)
end
defp apply_constraint(value, {:min, min_val}) when is_number(value) do
if value >= min_val do
{:ok, value}
else
{:error, "Value #{value} is less than minimum #{min_val}"}
end
end
defp apply_constraint(value, {:max, max_val}) when is_number(value) do
if value <= max_val do
{:ok, value}
else
{:error, "Value #{value} is greater than maximum #{max_val}"}
end
end
defp apply_constraint(value, {:in, allowed_values}) do
if value in allowed_values do
{:ok, value}
else
{:error, "Value #{inspect(value)} not in allowed values #{inspect(allowed_values)}"}
end
end
defp apply_constraint(value, {:length, length_constraint}) do
actual_length = case value do
v when is_binary(v) -> String.length(v)
v when is_list(v) -> length(v)
v when is_map(v) -> map_size(v)
_ -> nil
end
case {actual_length, length_constraint} do
{nil, _} -> {:error, "Cannot validate length for this type"}
{len, {:min, min_len}} when len >= min_len -> {:ok, value}
{len, {:max, max_len}} when len <= max_len -> {:ok, value}
{len, {:exact, exact_len}} when len == exact_len -> {:ok, value}
{len, {:range, {min_len, max_len}}} when len >= min_len and len <= max_len -> {:ok, value}
{len, constraint} -> {:error, "Length #{len} does not satisfy constraint #{inspect(constraint)}"}
end
end
defp apply_constraint(value, {:custom, validator_fn}) when is_function(validator_fn, 1) do
validator_fn.(value)
end
defp apply_constraint(value, _constraint), do: {:ok, value}
defp apply_custom_validations(data, validations, _context) do
errors =
validations
|> Enum.reduce([], fn {validation_name, validation_fn}, acc_errors ->
case validation_fn.(data) do
true -> acc_errors
false -> [%{validation: validation_name, message: "Custom validation failed"} | acc_errors]
{:error, message} -> [%{validation: validation_name, message: message} | acc_errors]
_ -> acc_errors
end
end)
case errors do
[] -> {:ok, data}
_ -> {:error, Enum.reverse(errors)}
end
end
defp apply_transforms(data, transforms) do
Enum.reduce(transforms, data, fn {_transform_name, transform_fn}, acc_data ->
case transform_fn.(acc_data) do
{:ok, transformed} -> transformed
transformed when is_map(transformed) -> transformed
_ -> acc_data
end
end)
end
defp format_validation_errors(errors) do
%{
errors: errors,
error_count: length(errors),
formatted_message: format_error_message(errors)
}
end
defp format_error_message(errors) do
errors
|> Enum.map(fn error ->
path_str = case Map.get(error, :path, []) do
[] -> "root"
path -> Enum.join(path, ".")
end
"#{path_str}: #{error.message}"
end)
|> Enum.join("; ")
end
end
4. Enhanced Type System
defmodule ElixirML.Schema.Types do
@moduledoc """
Comprehensive type system for ElixirML schemas.
Includes basic types, complex types, and ML-specific types.
"""
# Basic type validation
def validate_type(value, :string) when is_binary(value), do: {:ok, value}
def validate_type(value, :integer) when is_integer(value), do: {:ok, value}
def validate_type(value, :float) when is_float(value), do: {:ok, value}
def validate_type(value, :number) when is_number(value), do: {:ok, value}
def validate_type(value, :boolean) when is_boolean(value), do: {:ok, value}
def validate_type(value, :atom) when is_atom(value), do: {:ok, value}
def validate_type(value, :binary) when is_binary(value), do: {:ok, value}
def validate_type(value, :map) when is_map(value), do: {:ok, value}
def validate_type(value, :list) when is_list(value), do: {:ok, value}
# Complex type validation
def validate_type(value, {:array, inner_type}) when is_list(value) do
validated_items =
value
|> Enum.with_index()
|> Enum.map(fn {item, index} ->
case validate_type(item, inner_type) do
{:ok, validated_item} -> {:ok, validated_item}
{:error, error} -> {:error, "Item at index #{index}: #{error}"}
end
end)
case Enum.find(validated_items, &match?({:error, _}, &1)) do
nil -> {:ok, Enum.map(validated_items, fn {:ok, item} -> item end)}
{:error, error} -> {:error, error}
end
end
def validate_type(value, {:union, types}) do
# Try each type until one succeeds
case Enum.find_value(types, fn type ->
case validate_type(value, type) do
{:ok, validated} -> {:ok, validated}
{:error, _} -> nil
end
end) do
{:ok, validated} -> {:ok, validated}
nil -> {:error, "Value does not match any of the union types: #{inspect(types)}"}
end
end
def validate_type(value, {:enum, choices}) do
if value in choices do
{:ok, value}
else
{:error, "Value #{inspect(value)} not in enum choices: #{inspect(choices)}"}
end
end
def validate_type(value, {:struct, module}) do
if is_struct(value, module) do
{:ok, value}
else
{:error, "Value is not a struct of type #{module}"}
end
end
# ML-specific type validation
def validate_type(value, :embedding) do
case value do
list when is_list(list) and length(list) > 0 ->
if Enum.all?(list, &is_number/1) do
{:ok, value}
else
{:error, "Embedding must be a list of numbers"}
end
_ -> {:error, "Embedding must be a non-empty list"}
end
end
def validate_type(value, :tensor) do
# Basic tensor validation - in real implementation would use Nx
case value do
%{shape: shape, data: data} when is_tuple(shape) and is_list(data) ->
{:ok, value}
_ -> {:error, "Tensor must have shape and data fields"}
end
end
def validate_type(value, :token_list) do
case value do
list when is_list(list) ->
if Enum.all?(list, &(is_binary(&1) or is_integer(&1))) do
{:ok, value}
else
{:error, "Token list must contain strings or integers"}
end
_ -> {:error, "Token list must be a list"}
end
end
def validate_type(value, :probability) do
case value do
num when is_number(num) and num >= 0.0 and num <= 1.0 ->
{:ok, value}
_ -> {:error, "Probability must be a number between 0.0 and 1.0"}
end
end
def validate_type(value, :confidence_score) do
validate_type(value, :probability)
end
def validate_type(value, :model_response) do
case value do
%{content: content} when is_binary(content) ->
{:ok, value}
_ -> {:error, "Model response must have content field"}
end
end
def validate_type(value, :variable_config) do
case value do
%{name: name, type: type, value: _value} when is_atom(name) and is_atom(type) ->
{:ok, value}
_ -> {:error, "Variable config must have name (atom), type (atom), and value fields"}
end
end
# Fallback for unknown types
def validate_type(value, type) do
{:error, "Unknown type: #{inspect(type)} for value: #{inspect(value)}"}
end
end
5. Runtime Schema Creation
defmodule ElixirML.Schema.Runtime do
@moduledoc """
Runtime schema creation and validation for dynamic schemas.
"""
defstruct [
:fields,
:validations,
:transforms,
:metadata
]
def new(opts \\ []) do
%__MODULE__{
fields: Keyword.get(opts, :fields, []),
validations: Keyword.get(opts, :validations, []),
transforms: Keyword.get(opts, :transforms, []),
metadata: Keyword.get(opts, :metadata, %{})
}
end
def add_field(schema, name, type, opts \\ []) do
new_field = {name, type, opts}
%{schema | fields: [new_field | schema.fields]}
end
def add_validation(schema, name, validation_fn) do
new_validation = {name, validation_fn}
%{schema | validations: [new_validation | schema.validations]}
end
def add_transform(schema, name, transform_fn) do
new_transform = {name, transform_fn}
%{schema | transforms: [new_transform | schema.transforms]}
end
def validate(schema, data, opts \\ []) do
# Create a temporary module-like structure for validation
temp_metadata = %{
fields: Enum.reverse(schema.fields),
validations: Enum.reverse(schema.validations),
transforms: Enum.reverse(schema.transforms)
}
ElixirML.Schema.Validator.validate_with_metadata(data, temp_metadata, opts)
end
def compile(schema, module_name) do
# Generate a real module from runtime schema
fields_ast = Enum.map(schema.fields, fn {name, type, opts} ->
quote do
field unquote(name), unquote(type), unquote(opts)
end
end)
validations_ast = Enum.map(schema.validations, fn {name, validation_fn} ->
quote do
validation unquote(name) do
unquote(validation_fn).(data)
end
end
end)
transforms_ast = Enum.map(schema.transforms, fn {name, transform_fn} ->
quote do
transform unquote(name) do
unquote(transform_fn).(data)
end
end
end)
module_ast = quote do
defmodule unquote(module_name) do
use ElixirML.Schema
defschema do
unquote_splicing(fields_ast)
unquote_splicing(validations_ast)
unquote_splicing(transforms_ast)
end
end
end
Code.eval_quoted(module_ast)
module_name
end
end
Integration Implementation
1. Variable System Integration
defmodule ElixirML.Schema.VariableIntegration do
@moduledoc """
Integration layer between Schema Engine and Variable System.
"""
alias ElixirML.Schema.Runtime
alias ElixirML.Variable
def schema_from_variable_space(variable_space) do
fields = Enum.map(variable_space.variables, &variable_to_field/1)
Runtime.new(
fields: fields,
validations: generate_variable_validations(variable_space),
transforms: generate_variable_transforms(variable_space),
metadata: %{
variable_space_id: variable_space.id,
generated_at: DateTime.utc_now()
}
)
end
defp variable_to_field(%Variable{} = variable) do
{variable.name, variable_type_to_schema_type(variable), variable_to_field_opts(variable)}
end
defp variable_type_to_schema_type(%Variable{type: :float, constraints: %{range: {min, max}}}),
do: :float
defp variable_type_to_schema_type(%Variable{type: :integer, constraints: %{range: {min, max}}}),
do: :integer
defp variable_type_to_schema_type(%Variable{type: :choice, constraints: %{choices: choices}}),
do: {:enum, choices}
defp variable_type_to_schema_type(%Variable{type: :module, constraints: %{modules: modules}}),
do: {:union, Enum.map(modules, &{:struct, &1})}
defp variable_type_to_schema_type(%Variable{type: type}), do: type
defp variable_to_field_opts(%Variable{} = variable) do
opts = []
opts = if variable.default do
[{:default, variable.default} | opts]
else
opts
end
opts = case variable.constraints do
%{range: {min, max}} -> [{:constraints, [min: min, max: max]} | opts]
%{choices: choices} -> [{:constraints, [in: choices]} | opts]
%{length: length_constraint} -> [{:constraints, [length: length_constraint]} | opts]
_ -> opts
end
if variable.metadata[:required] do
[{:required, true} | opts]
else
opts
end
end
defp generate_variable_validations(variable_space) do
# Generate cross-variable validations
Enum.flat_map(variable_space.constraints, fn constraint ->
case constraint do
%{type: :dependency, from: from_var, to: to_var, condition: condition_fn} ->
[{:"dependency_#{from_var}_#{to_var}", fn data ->
from_value = Map.get(data, from_var)
to_value = Map.get(data, to_var)
condition_fn.(from_value, to_value)
end}]
_ -> []
end
end)
end
defp generate_variable_transforms(variable_space) do
# Generate variable-specific transforms
variable_space.variables
|> Enum.flat_map(fn variable ->
case variable.metadata[:transform] do
nil -> []
transform_fn -> [{variable.name, transform_fn}]
end
end)
end
def validate_variable_configuration(variable_space, configuration) do
schema = schema_from_variable_space(variable_space)
Runtime.validate(schema, configuration)
end
def generate_variable_configuration_schema(variable_space, opts \\ []) do
schema = schema_from_variable_space(variable_space)
if Keyword.get(opts, :compile, false) do
module_name = Keyword.get(opts, :module_name,
Module.concat([ElixirML.Schemas.Generated, "VariableSpace#{variable_space.id}"]))
Runtime.compile(schema, module_name)
else
schema
end
end
end
2. Resource Framework Integration
defmodule ElixirML.Schema.ResourceIntegration do
@moduledoc """
Integration with Ash Resource Framework for schema-validated resources.
"""
defmacro __using__(opts) do
quote do
use Ash.Resource, unquote(opts)
import ElixirML.Schema.ResourceIntegration
# Add schema validation capabilities
@schema_validations []
@before_compile ElixirML.Schema.ResourceIntegration
end
end
defmacro __before_compile__(env) do
schema_validations = Module.get_attribute(env.module, :schema_validations, [])
validation_functions = Enum.map(schema_validations, fn {attr_name, schema_module} ->
generate_validation_function(attr_name, schema_module)
end)
quote do
unquote_splicing(validation_functions)
end
end
defmacro schema_attribute(name, schema_module, opts \\ []) do
quote do
attribute unquote(name), :map, unquote(opts)
@schema_validations [{unquote(name), unquote(schema_module)} | @schema_validations]
validate unquote(:"validate_#{name}_schema") do
unquote(:"__validate_#{name}_schema__")
end
end
end
defp generate_validation_function(attr_name, schema_module) do
function_name = :"__validate_#{attr_name}_schema__"
quote do
def unquote(function_name) do
fn changeset ->
case Ash.Changeset.get_attribute(changeset, unquote(attr_name)) do
nil -> changeset
value ->
case unquote(schema_module).validate(value) do
{:ok, validated_value} ->
Ash.Changeset.change_attribute(changeset, unquote(attr_name), validated_value)
{:error, error_details} ->
error_message = format_schema_errors(error_details)
Ash.Changeset.add_error(changeset,
field: unquote(attr_name),
message: error_message)
end
end
end
end
end
end
def format_schema_errors(%{formatted_message: message}), do: message
def format_schema_errors(errors) when is_list(errors) do
errors
|> Enum.map(fn error -> "#{error.path}: #{error.message}" end)
|> Enum.join(", ")
end
def format_schema_errors(error), do: inspect(error)
# Helper for creating schema-validated actions
defmacro schema_action(name, schema_module, opts \\ []) do
quote do
action unquote(name), unquote(opts[:return_type] || :map) do
argument :data, :map, allow_nil?: false
run fn input, context ->
case unquote(schema_module).validate(input.data) do
{:ok, validated_data} ->
# Continue with action logic
{:ok, validated_data}
{:error, error_details} ->
{:error, format_schema_errors(error_details)}
end
end
end
end
end
end
Testing Implementation
1. Schema Testing Framework
defmodule ElixirML.Schema.TestHelpers do
@moduledoc """
Testing utilities for schema validation and development.
"""
import ExUnit.Assertions
import StreamData
def assert_valid_schema(schema_module, valid_data) do
case schema_module.validate(valid_data) do
{:ok, _validated} -> :ok
{:error, errors} ->
flunk("Expected valid data but got errors: #{inspect(errors)}")
end
end
def assert_invalid_schema(schema_module, invalid_data, expected_error_pattern \\ nil) do
case schema_module.validate(invalid_data) do
{:ok, _} ->
flunk("Expected invalid data but validation succeeded")
{:error, errors} ->
if expected_error_pattern do
assert errors.formatted_message =~ expected_error_pattern
end
:ok
end
end
def property_test_schema(schema_module, opts \\ []) do
iterations = Keyword.get(opts, :iterations, 100)
valid_data_generator = generate_valid_data_for_schema(schema_module)
property_test = property([valid_data <- valid_data_generator], do: (
assert_valid_schema(schema_module, valid_data)
))
check_all(property_test, iterations: iterations)
end
def generate_valid_data_for_schema(schema_module) do
metadata = schema_module.__schema_metadata__()
field_generators =
metadata.fields
|> Enum.map(fn {field_name, field_type, field_opts} ->
{field_name, generate_data_for_type(field_type, field_opts)}
end)
|> Map.new()
fixed_map(field_generators)
end
defp generate_data_for_type(:string, opts) do
min_length = get_constraint_value(opts, :length, :min, 0)
max_length = get_constraint_value(opts, :length, :max, 100)
string(:alphanumeric, min_length: min_length, max_length: max_length)
end
defp generate_data_for_type(:integer, opts) do
min_val = get_constraint_value(opts, :min, nil, 0)
max_val = get_constraint_value(opts, :max, nil, 1000)
integer(min_val..max_val)
end
defp generate_data_for_type(:float, opts) do
min_val = get_constraint_value(opts, :min, nil, 0.0)
max_val = get_constraint_value(opts, :max, nil, 1.0)
float(min: min_val, max: max_val)
end
defp generate_data_for_type(:boolean, _opts) do
boolean()
end
defp generate_data_for_type({:enum, choices}, _opts) do
member_of(choices)
end
defp generate_data_for_type({:array, inner_type}, opts) do
min_length = get_constraint_value(opts, :length, :min, 0)
max_length = get_constraint_value(opts, :length, :max, 10)
list_of(generate_data_for_type(inner_type, []),
min_length: min_length,
max_length: max_length)
end
defp generate_data_for_type(:probability, _opts) do
float(min: 0.0, max: 1.0)
end
defp generate_data_for_type(:embedding, _opts) do
list_of(float(), min_length: 1, max_length: 512)
end
defp generate_data_for_type(type, _opts) do
# Fallback for unknown types
constant("generated_#{type}")
end
defp get_constraint_value(opts, constraint_type, sub_key, default) do
constraints = Keyword.get(opts, :constraints, [])
case Keyword.get(constraints, constraint_type) do
nil -> default
value when sub_key == nil -> value
constraint_map when is_map(constraint_map) -> Map.get(constraint_map, sub_key, default)
{min, max} when sub_key == :min -> min
{min, max} when sub_key == :max -> max
_ -> default
end
end
# Performance testing utilities
def benchmark_schema_validation(schema_module, data, opts \\ []) do
iterations = Keyword.get(opts, :iterations, 1000)
{time_us, _result} = :timer.tc(fn ->
Enum.each(1..iterations, fn _ ->
schema_module.validate(data)
end)
end)
avg_time_us = time_us / iterations
%{
total_time_us: time_us,
average_time_us: avg_time_us,
iterations: iterations,
validations_per_second: 1_000_000 / avg_time_us
}
end
end
Implementation Roadmap
Week 1: Core Implementation
- Schema Definition module and DSL
- Basic Validation Engine
- Type System implementation
- Runtime Schema creation
Week 2: Advanced Features
- Complex type validation (arrays, unions, structs)
- Custom validation and transformation functions
- Error handling and reporting
- Performance optimization basics
Week 3: Integration Implementation
- Variable System integration
- Resource Framework integration
- Program schema binding
- Adapter schema validation
Week 4: Testing and Tools
- Testing framework implementation
- Property-based testing setup
- Performance benchmarking
- Documentation and examples
Performance Considerations
Optimization Strategies
- Compile-Time Optimization: Generate optimized validation functions
- Caching: Cache validation results for repeated data patterns
- Lazy Validation: Skip expensive validations when possible
- Batch Processing: Validate multiple items efficiently
Memory Management
- Minimal Copying: Avoid unnecessary data copying during validation
- Streaming Validation: Process large datasets in chunks
- Garbage Collection: Minimize temporary object creation
Monitoring and Metrics
- Validation Performance: Track validation times and throughput
- Cache Hit Rates: Monitor caching effectiveness
- Error Patterns: Analyze common validation failures
- Memory Usage: Track memory consumption patterns
This implementation guide provides the foundation for building a robust, high-performance schema engine that integrates seamlessly with the Variable System and Resource Framework while maintaining excellent developer experience and runtime performance.