Comprehensive Test Plan for JsonRemedy
Test Categories Overview
1. Layer 1: Content Cleaning Tests
2. Layer 2: Structural Repair Tests
3. Layer 3: Syntax Normalization Tests
4. Layer 4: Validation Tests
5. Layer 5: Tolerant Parsing Tests
6. Integration Tests
7. Performance Tests
8. Edge Case Tests
9. Real-World Scenario Tests
10. Error Handling Tests
1. Layer 1: Content Cleaning Tests
Code Fence Removal
describe "code fence removal" do
test "simple json fences" do
input = """
```json
{"name": "Alice"}
```
"""
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice"}}
end
test "fences with language variants" do
inputs = [
"```JSON\n{\"a\": 1}\n```",
"```javascript\n{\"a\": 1}\n```",
"```js\n{\"a\": 1}\n```"
]
for input <- inputs do
assert {:ok, %{"a" => 1}} = JsonRemedy.repair(input)
end
end
test "nested code fences" do
input = """
Here's some JSON:
```json
{"description": "Use ```json for highlighting"}
```
"""
expected = %{"description" => "Use ```json for highlighting"}
assert JsonRemedy.repair(input) == {:ok, expected}
end
test "malformed fences" do
inputs = [
"```json\n{\"a\": 1}", # Missing closing fence
"{\"a\": 1}\n```", # Missing opening fence
"``json\n{\"a\": 1}```" # Malformed opening
]
for input <- inputs do
assert {:ok, %{"a" => 1}} = JsonRemedy.repair(input)
end
end
end
Comment Removal
describe "comment removal" do
test "line comments" do
inputs = [
"// This is JSON\n{\"name\": \"Alice\"}",
"{\"name\": \"Alice\"} // End comment",
"{\n \"name\": \"Alice\", // Inline comment\n \"age\": 30\n}"
]
expected = %{"name" => "Alice", "age" => 30}
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["name"] == "Alice"
end
end
test "block comments" do
inputs = [
"/* Header comment */ {\"name\": \"Alice\"}",
"{\"name\": \"Alice\" /* middle */}",
"{\n /* Multi\n line\n comment */\n \"name\": \"Alice\"\n}"
]
for input <- inputs do
assert {:ok, %{"name" => "Alice"}} = JsonRemedy.repair(input)
end
end
test "nested block comments" do
input = "{\"name\": \"Alice\" /* outer /* inner */ still outer */}"
assert {:ok, %{"name" => "Alice"}} = JsonRemedy.repair(input)
end
test "comments inside strings should be preserved" do
input = "{\"message\": \"This // is not a comment\"}"
expected = %{"message" => "This // is not a comment"}
assert JsonRemedy.repair(input) == {:ok, expected}
end
end
Wrapper Text Extraction
describe "wrapper text extraction" do
test "prose before and after" do
input = """
Here's your JSON data as requested:
{"name": "Alice", "age": 30}
That should help with your project!
"""
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice", "age" => 30}}
end
test "multiple json objects in text" do
input = """
First user: {"name": "Alice"}
Second user: {"name": "Bob"}
"""
# Should extract the first valid JSON
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice"}}
end
test "html/xml wrapper" do
input = "<pre>{\"name\": \"Alice\"}</pre>"
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice"}}
end
end
2. Layer 2: Structural Repair Tests
Missing Closing Delimiters
describe "missing closing delimiters" do
test "missing closing brace - simple" do
inputs = [
"{\"name\": \"Alice\"",
"{\"name\": \"Alice\", \"age\": 30",
"{\"users\": [{\"name\": \"Alice\"}]"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert is_map(result)
assert result["name"] == "Alice" or result["users"] |> hd() |> Map.get("name") == "Alice"
end
end
test "missing closing bracket - simple" do
inputs = [
"[1, 2, 3",
"[{\"name\": \"Alice\"}, {\"name\": \"Bob\"}",
"[[1, 2], [3, 4]"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert is_list(result)
end
end
test "nested missing delimiters" do
input = "{\"users\": [{\"name\": \"Alice\", \"profile\": {\"city\": \"Portland\""
{:ok, result} = JsonRemedy.repair(input)
assert result["users"] |> hd() |> get_in(["profile", "city"]) == "Portland"
end
test "mixed missing delimiters" do
input = "{\"data\": [1, 2, {\"nested\": [3, 4"
{:ok, result} = JsonRemedy.repair(input)
assert result["data"] |> Enum.at(2) |> Map.get("nested") == [3, 4]
end
end
Unmatched Nesting
describe "unmatched nesting" do
test "extra closing brace" do
input = "{\"name\": \"Alice\"}}"
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice"}}
end
test "extra closing bracket" do
input = "[1, 2, 3]]"
assert JsonRemedy.repair(input) == {:ok, [1, 2, 3]}
end
test "mismatched delimiters" do
inputs = [
"{\"name\": \"Alice\"]", # Object opened, array closed
"[\"item1\", \"item2\"}", # Array opened, object closed
"{\"data\": [1, 2}]" # Missing bracket, extra brace
]
for input <- inputs do
{:ok, _result} = JsonRemedy.repair(input)
# Should not crash, exact result depends on strategy
end
end
end
3. Layer 3: Syntax Normalization Tests
Quote Normalization
describe "quote normalization" do
test "single quotes to double quotes" do
inputs = [
"{'name': 'Alice'}",
"{'name': 'Alice', 'age': 30}",
"{'users': [{'name': 'Alice'}, {'name': 'Bob'}]}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["name"] == "Alice" or result["users"] |> hd() |> Map.get("name") == "Alice"
end
end
test "smart quotes to regular quotes" do
inputs = [
"{"name": "Alice"}", # Smart double quotes
"{'name': 'Alice'}", # Smart single quotes
"{"name": 'Alice'}" # Mixed smart quotes
]
for input <- inputs do
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice"}}
end
end
test "mixed quote styles" do
input = "{\"name\": 'Alice', 'age': \"30\"}"
assert JsonRemedy.repair(input) == {:ok, %{"name" => "Alice", "age" => "30"}}
end
test "quotes in strings should be preserved" do
input = "{\"message\": \"She said 'hello' to me\"}"
expected = %{"message" => "She said 'hello' to me"}
assert JsonRemedy.repair(input) == {:ok, expected}
end
end
Unquoted Keys
describe "unquoted keys" do
test "simple unquoted keys" do
inputs = [
"{name: \"Alice\"}",
"{name: \"Alice\", age: 30}",
"{user_name: \"Alice\", user_age: 30}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert Map.has_key?(result, "name") or Map.has_key?(result, "user_name")
end
end
test "mixed quoted and unquoted keys" do
input = "{\"name\": \"Alice\", age: 30, \"active\": true}"
expected = %{"name" => "Alice", "age" => 30, "active" => true}
assert JsonRemedy.repair(input) == {:ok, expected}
end
test "complex unquoted keys" do
inputs = [
"{user_name_1: \"Alice\"}",
"{user$name: \"Alice\"}",
"{userName: \"Alice\"}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert map_size(result) == 1
end
end
end
Boolean and Null Normalization
describe "boolean and null normalization" do
test "python-style booleans" do
inputs = [
"{\"active\": True}",
"{\"active\": False}",
"{\"verified\": True, \"deleted\": False}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert is_boolean(result["active"]) or is_boolean(result["verified"])
end
end
test "case variants" do
inputs = [
"{\"active\": TRUE}",
"{\"active\": FALSE}",
"{\"active\": true}", # Should remain unchanged
"{\"active\": false}" # Should remain unchanged
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert is_boolean(result["active"])
end
end
test "null variants" do
inputs = [
"{\"value\": None}",
"{\"value\": NULL}",
"{\"value\": Null}",
"{\"value\": null}" # Should remain unchanged
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["value"] == nil
end
end
test "booleans in strings should be preserved" do
input = "{\"message\": \"The value is True\"}"
expected = %{"message" => "The value is True"}
assert JsonRemedy.repair(input) == {:ok, expected}
end
end
Comma and Colon Fixes
describe "comma and colon fixes" do
test "trailing commas in objects" do
inputs = [
"{\"name\": \"Alice\",}",
"{\"name\": \"Alice\", \"age\": 30,}",
"{\"users\": [{\"name\": \"Alice\",}, {\"name\": \"Bob\",}],}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["name"] == "Alice" or is_list(result["users"])
end
end
test "trailing commas in arrays" do
inputs = [
"[1, 2, 3,]",
"[\"a\", \"b\", \"c\",]",
"[[1, 2,], [3, 4,],]"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert is_list(result)
end
end
test "missing commas in objects" do
inputs = [
"{\"name\": \"Alice\" \"age\": 30}",
"{\"a\": 1 \"b\": 2 \"c\": 3}",
"{\"users\": [{\"name\": \"Alice\"} {\"name\": \"Bob\"}]}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert map_size(result) >= 2 or is_list(result["users"])
end
end
test "missing commas in arrays" do
inputs = [
"[1 2 3]",
"[\"a\" \"b\" \"c\"]",
"[{\"name\": \"Alice\"} {\"name\": \"Bob\"}]"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert length(result) >= 2
end
end
test "missing colons" do
inputs = [
"{\"name\" \"Alice\"}",
"{\"name\" \"Alice\", \"age\" 30}",
"{\"user\" {\"name\" \"Alice\"}}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["name"] == "Alice" or Map.has_key?(result, "user")
end
end
end
4. Layer 4: Validation Tests
Fast Path Testing
describe "fast path validation" do
test "valid json uses jason directly" do
valid_inputs = [
"{\"name\": \"Alice\"}",
"[1, 2, 3]",
"true",
"null",
"\"hello\"",
"42"
]
for input <- valid_inputs do
assert {:ok, _result} = JsonRemedy.repair(input)
# Should use Jason.decode fast path
end
end
test "complex valid json" do
input = """
{
"users": [
{"name": "Alice", "age": 30, "active": true},
{"name": "Bob", "age": 25, "active": false}
],
"metadata": {
"total": 2,
"generated": "2024-01-15T10:00:00Z"
}
}
"""
{:ok, result} = JsonRemedy.repair(input)
assert length(result["users"]) == 2
assert result["metadata"]["total"] == 2
end
end
5. Layer 5: Tolerant Parsing Tests
Edge Case Parsing
describe "tolerant parsing fallback" do
test "severely malformed structures" do
inputs = [
"{name Alice age 30}", # No quotes, no colons, no commas
"[1 2 {name Alice} 3]", # Mixed array with unquoted object
"name: Alice, age: 30", # No outer braces
"{{{nested: deep}}}", # Over-nested
"{\"incomplete" # Severely truncated
]
for input <- inputs do
result = JsonRemedy.repair(input)
# Should either succeed or fail gracefully
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
end
test "mixed data types" do
input = "[1, \"text\", true, null, {\"nested\": [2, 3]}]"
{:ok, result} = JsonRemedy.repair(input)
assert length(result) == 5
assert is_integer(Enum.at(result, 0))
assert is_binary(Enum.at(result, 1))
assert is_boolean(Enum.at(result, 2))
assert is_nil(Enum.at(result, 3))
assert is_map(Enum.at(result, 4))
end
end
6. Integration Tests
End-to-End Repair Scenarios
describe "end-to-end integration" do
test "llm output with multiple issues" do
input = """
Here's the user data:
```json
{
users: [
{
name: "Alice Johnson",
email: '[email protected]',
age: 30,
active: True,
scores: [95, 87, 92,], // Recent test scores
profile: {
city: "New York",
interests: ["coding", "music", "travel",]
},
},
{
name: "Bob Smith",
email: "[email protected]",
age: 25,
active: False,
scores: [88, 91, 89]
// Missing comma above
}
],
metadata: {
total: 2,
updated: "2024-01-15"
// Missing closing brace
```
Hope this helps!
"""
{:ok, result, repairs} = JsonRemedy.repair(input, logging: true)
# Verify structure
assert length(result["users"]) == 2
assert result["users"] |> hd() |> Map.get("name") == "Alice Johnson"
assert result["metadata"]["total"] == 2
# Verify repairs were logged
assert is_list(repairs)
assert length(repairs) > 5 # Should have multiple repairs
end
test "legacy python system output" do
input = "{'users': [{'name': 'Alice', 'active': True, 'metadata': None}], 'success': True}"
{:ok, result} = JsonRemedy.repair(input)
assert result["users"] |> hd() |> Map.get("name") == "Alice"
assert result["users"] |> hd() |> Map.get("active") == true
assert result["users"] |> hd() |> Map.get("metadata") == nil
assert result["success"] == true
end
test "incomplete streaming data" do
inputs = [
"{\"status\": \"processing\", \"data\": [1, 2, 3", # Missing bracket
"{\"status\": \"processing\", \"data\": [1, 2, 3]", # Missing brace
"{\"status\": \"processing\", \"data\": [1, 2, 3], \"id\"", # Incomplete key-value
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert result["status"] == "processing"
assert result["data"] == [1, 2, 3]
end
end
end
File Operations
describe "file operations" do
test "repair from file" do
content = "{name: 'Alice', age: 30, active: True}"
path = "/tmp/test_malformed.json"
File.write!(path, content)
{:ok, result} = JsonRemedy.from_file(path)
assert result["name"] == "Alice"
assert result["active"] == true
File.rm!(path)
end
test "large file handling" do
# Generate a large malformed JSON file
users = for i <- 1..1000 do
"{name: 'User#{i}', id: #{i}, active: True}"
end
content = "[" <> Enum.join(users, ",\n") <> "]"
path = "/tmp/large_malformed.json"
File.write!(path, content)
{:ok, result} = JsonRemedy.from_file(path)
assert length(result) == 1000
assert hd(result)["name"] == "User1"
File.rm!(path)
end
end
7. Performance Tests
Benchmarking Different Input Types
describe "performance characteristics" do
test "valid json performance" do
valid_json = Jason.encode!(%{name: "Alice", age: 30})
{time, {:ok, _result}} = :timer.tc(fn ->
JsonRemedy.repair(valid_json)
end)
# Should be very fast (using Jason fast path)
assert time < 10 # Less than 10 microseconds
end
test "simple malformed json performance" do
malformed = "{name: 'Alice', age: 30}"
{time, {:ok, _result}} = :timer.tc(fn ->
JsonRemedy.repair(malformed)
end)
# Should be reasonably fast
assert time < 1000 # Less than 1 millisecond
end
test "complex malformed json performance" do
complex_malformed = """
// Complex malformed JSON
{
users: [
{name: 'Alice', active: True, scores: [1, 2, 3,]},
{name: 'Bob', active: False, scores: [4, 5, 6,]}
],
metadata: {total: 2, generated: None
"""
{time, {:ok, _result}} = :timer.tc(fn ->
JsonRemedy.repair(complex_malformed)
end)
# Should complete in reasonable time
assert time < 5000 # Less than 5 milliseconds
end
test "memory usage stays reasonable" do
large_json = """
{
users: [
""" <>
(for i <- 1..100 do
"{name: 'User#{i}', id: #{i}, active: True}"
end |> Enum.join(",\n")) <>
"""
],
metadata: {total: 100}
"""
:erlang.garbage_collect()
{memory_before, _} = :erlang.process_info(self(), :memory)
{:ok, _result} = JsonRemedy.repair(large_json)
:erlang.garbage_collect()
{memory_after, _} = :erlang.process_info(self(), :memory)
memory_used = memory_after - memory_before
# Should not use excessive memory
assert memory_used < 100_000 # Less than 100KB
end
end
8. Edge Case Tests
Unicode and Encoding
describe "unicode and encoding" do
test "unicode characters in strings" do
inputs = [
"{\"name\": \"JosΓ©\", \"city\": \"SΓ£o Paulo\"}",
"{name: 'εδΊ¬', country: 'δΈε½'}",
"{\"emoji\": \"ππ―β¨\"}"
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
# Should preserve unicode correctly
assert byte_size(Jason.encode!(result)) > 0
end
end
test "escaped unicode sequences" do
input = "{\"unicode\": \"\\u0048\\u0065\\u006c\\u006c\\u006f\"}" # "Hello"
{:ok, result} = JsonRemedy.repair(input)
assert result["unicode"] == "Hello"
end
test "malformed unicode escapes" do
inputs = [
"{\"bad\": \"\\u004\"}", # Incomplete escape
"{\"bad\": \"\\uGGGG\"}", # Invalid hex
"{\"bad\": \"\\u\"}" # Truncated escape
]
for input <- inputs do
result = JsonRemedy.repair(input)
# Should either repair or fail gracefully
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
end
end
Deeply Nested Structures
describe "deeply nested structures" do
test "deep object nesting" do
deep_json = """
{
level1: {
level2: {
level3: {
level4: {
level5: {
value: "deep",
array: [1, 2, 3,]
}
}
}
}
}
"""
{:ok, result} = JsonRemedy.repair(deep_json)
deep_value = get_in(result, ["level1", "level2", "level3", "level4", "level5", "value"])
assert deep_value == "deep"
end
test "deep array nesting" do
deep_array = "[[[[[1, 2, 3,]]]]]"
{:ok, result} = JsonRemedy.repair(deep_array)
# Should unwrap to the deepest level
unwrapped = result |> hd() |> hd() |> hd() |> hd() |> hd()
assert unwrapped == [1, 2, 3]
end
test "mixed deep nesting with repairs" do
mixed = """
{
data: [
{
nested: {
items: [
{name: 'item1', active: True},
{name: 'item2', active: False}
],
metadata: None
}
}
]
"""
{:ok, result} = JsonRemedy.repair(mixed)
items = get_in(result, ["data"]) |> hd() |> get_in(["nested", "items"])
assert length(items) == 2
assert hd(items)["active"] == true
end
end
Malformed Numbers and Strings
describe "malformed numbers and strings" do
test "number variations" do
inputs = [
"{\"int\": 42}",
"{\"float\": 3.14}",
"{\"exp\": 1.23e4}",
"{\"negative\": -42}",
"{\"leading_zero\": 01}" # Invalid but should be repaired
]
for input <- inputs do
{:ok, result} = JsonRemedy.repair(input)
assert map_size(result) == 1
end
end
test "malformed strings" do
inputs = [
"{\"unclosed\": \"hello}", # Missing closing quote
"{\"escaped\": \"hello\\nworld\"}", # Valid escapes
"{\"bad_escape\": \"hello\\xworld\"}", # Invalid escape
"{\"mixed_quotes\": \"hello'world\"}", # Mixed quotes in string
]
for input <- inputs do
result = JsonRemedy.repair(input)
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
end
test "string with special characters" do
input = "{\"special\": \"tab\\there\\nnewline\\\"quote\"}"
{:ok, result} = JsonRemedy.repair(input)
assert String.contains?(result["special"], "tab")
end
end
9. Real-World Scenario Tests
LLM Output Patterns
describe "llm output patterns" do
test "chatgpt style output" do
chatgpt_output = """
Based on your request, here's the JSON data:
```json
{
"response": {
"status": "success",
"data": {
"users": [
{
"id": 1,
"name": "Alice Johnson",
"email": "[email protected]",
"preferences": {
"theme": "dark",
"notifications": true
}
}
]
}
}
}
```
This structure should work well for your application.
"""
{:ok, result} = JsonRemedy.repair(chatgpt_output)
assert result["response"]["status"] == "success"
assert length(result["response"]["data"]["users"]) == 1
end
test "claude style output with reasoning" do
claude_output = """
I'll help you create that JSON structure. Let me think through the requirements:
1. User information
2. Preferences
3. Metadata
Here's the resulting JSON:
{
"user": {
"name": "Alice",
"age": 30,
"preferences": {
"theme": "dark",
"language": "en"
}
},
"metadata": {
"created": "2024-01-15",
"version": "1.0"
}
}
This should meet your needs!
"""
{:ok, result} = JsonRemedy.repair(claude_output)
assert result["user"]["name"] == "Alice"
assert result["metadata"]["version"] == "1.0"
end
test "truncated llm response" do
truncated = """
{
"users": [
{"name": "Alice", "active": true},
{"name": "Bob", "active":
"""
{:ok, result} = JsonRemedy.repair(truncated)
assert length(result["users"]) >= 1
assert result["users"] |> hd() |> Map.get("name") == "Alice"
end
end
Legacy System Outputs
describe "legacy system outputs" do
test "python pickle-style output" do
python_style = """
{
'users': [
{
'name': 'Alice',
'active': True,
'metadata': None,
'scores': [95, 87, 92]
}
],
'timestamp': '2024-01-15T10:00:00Z',
'success': True
}
"""
{:ok, result} = JsonRemedy.repair(python_style)
assert result["users"] |> hd() |> Map.get("active") == true
assert result["success"] == true
end
test "javascript object literal" do
js_object = """
{
name: "Alice",
getValue: function() { return this.name; },
data: {
items: [1, 2, 3],
active: true
}
}
"""
# Should extract the data parts and ignore functions
{:ok, result} = JsonRemedy.repair(js_object)
assert result["name"] == "Alice"
assert result["data"]["active"] == true
end
test "yaml-like structure" do
yaml_like = """
name: Alice Johnson
age: 30
active: true
preferences:
theme: dark
notifications: true
"""
# May not fully support YAML, but should attempt repair
result = JsonRemedy.repair(yaml_like)
# Should either succeed with partial data or fail gracefully
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
end
API Response Patterns
describe "api response patterns" do
test "rest api with extra wrapper" do
api_response = """
HTTP/1.1 200 OK
Content-Type: application/json
{
"status": "success",
"data": {
"users": [
{"name": "Alice", "id": 1}
]
},
"meta": {
"total": 1,
"page": 1
}
}
"""
{:ok, result} = JsonRemedy.repair(api_response)
assert result["status"] == "success"
assert result["meta"]["total"] == 1
end
test "graphql response with errors" do
graphql_response = """
{
"data": {
"user": {
"name": "Alice",
"email": "[email protected]"
}
},
"errors": [
{
"message": "Field 'phone' is deprecated",
"locations": [{"line": 3, "column": 5}]
}
]
}
"""
{:ok, result} = JsonRemedy.repair(graphql_response)
assert result["data"]["user"]["name"] == "Alice"
assert length(result["errors"]) == 1
end
end
10. Error Handling Tests
Graceful Failure Modes
describe "error handling" do
test "completely invalid input" do
invalid_inputs = [
"", # Empty string
" ", # Whitespace only
"not json at all", # Plain text
"12345 abcdef", # Mixed invalid
"<!DOCTYPE html>", # HTML
"SELECT * FROM users;", # SQL
]
for input <- invalid_inputs do
result = JsonRemedy.repair(input)
assert match?({:error, _reason}, result)
end
end
test "infinite recursion protection" do
# Potentially problematic recursive structures
recursive_attempts = [
String.duplicate("{", 1000) <> "\"key\": \"value\"" <> String.duplicate("}", 1000),
String.duplicate("[", 1000) <> "1" <> String.duplicate("]", 1000)
]
for input <- recursive_attempts do
result = JsonRemedy.repair(input)
# Should either succeed or fail gracefully, not hang
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
end
test "memory exhaustion protection" do
# Very large malformed inputs
huge_input = """
{
"data": [
""" <>
(1..10_000 |> Enum.map(fn i -> "\"item#{i}\"" end) |> Enum.join(",")) <>
"""
]
}
"""
result = JsonRemedy.repair(huge_input)
# Should handle large inputs gracefully
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
test "circular reference detection" do
# This is tricky to create in JSON, but test repair robustness
problematic = """
{
"a": {
"b": {
"c": "see a"
}
}
}
"""
{:ok, result} = JsonRemedy.repair(problematic)
assert result["a"]["b"]["c"] == "see a"
end
end
Logging and Debugging
describe "logging and debugging" do
test "repair logging captures all actions" do
complex_input = """
// Comment at start
{
name: 'Alice', // Unquoted key, single quotes
age: 30, // Valid
active: True, // Python boolean
scores: [1, 2, 3,], // Trailing comma
metadata: None // Python None
// Missing closing brace
"""
{:ok, _result, repairs} = JsonRemedy.repair(complex_input, logging: true)
# Should log all the repairs made
assert is_list(repairs)
assert length(repairs) >= 5
# Check for specific repair types
repair_strings = Enum.join(repairs, " ")
assert String.contains?(repair_strings, "comment")
assert String.contains?(repair_strings, "quote")
assert String.contains?(repair_strings, "boolean") or String.contains?(repair_strings, "True")
assert String.contains?(repair_strings, "comma")
assert String.contains?(repair_strings, "null") or String.contains?(repair_strings, "None")
end
test "repair position tracking" do
input = "{name: 'Alice', age: 30, active: True}"
{:ok, _result, repairs} = JsonRemedy.repair(input, logging: true)
# Repairs should ideally include position information
# (This depends on implementation - may be future enhancement)
assert is_list(repairs)
assert length(repairs) > 0
end
test "layer-specific logging" do
input = """
```json
// This has issues in multiple layers
{name: 'Alice', active: True}
```
"""
{:ok, _result, repairs} = JsonRemedy.repair(input, logging: true)
# Should log repairs from multiple layers
assert length(repairs) >= 3 # Code fence + quotes + boolean
end
end
11. Streaming and Large File Tests
Streaming Support
describe "streaming support" do
test "repair stream of individual json objects" do
json_lines = [
"{name: 'Alice'}",
"{name: 'Bob', active: True}",
"[1, 2, 3,]"
]
results = json_lines
|> Stream.map(&JsonRemedy.repair/1)
|> Enum.to_list()
assert length(results) == 3
assert match?([{:ok, _}, {:ok, _}, {:ok, _}], results)
end
test "repair stream with errors" do
mixed_lines = [
"{\"valid\": true}", # Valid JSON
"{name: 'Alice'}", # Needs repair
"invalid data", # Invalid
"[1, 2, 3,]" # Needs repair
]
results = mixed_lines
|> Stream.map(&JsonRemedy.repair/1)
|> Enum.to_list()
# Should have mix of successes and failures
successes = Enum.count(results, &match?({:ok, _}, &1))
failures = Enum.count(results, &match?({:error, _}, &1))
assert successes >= 3
assert failures >= 1
end
end
Sample JSON Test Data
Comprehensive Test Fixtures
# test/support/json_fixtures.ex
defmodule JsonRemedy.TestFixtures do
# Valid JSON samples
def valid_simple, do: "{\"name\": \"Alice\", \"age\": 30}"
def valid_complex do
"""
{
"users": [
{
"id": 1,
"name": "Alice Johnson",
"email": "[email protected]",
"profile": {
"city": "New York",
"interests": ["coding", "music", "travel"]
},
"settings": {
"theme": "dark",
"notifications": true
}
}
],
"metadata": {
"total": 1,
"page": 1,
"generated": "2024-01-15T10:00:00Z"
}
}
"""
end
# LLM-style outputs
def llm_with_fences do
"""
Here's your JSON data:
```json
{
"result": "success",
"data": {
"users": [
{"name": "Alice", "active": true}
]
}
}
```
Hope this helps!
"""
end
def llm_with_comments do
"""
{
// User information
"name": "Alice",
"age": 30,
/*
* Settings object
*/
"settings": {
"theme": "dark" // Dark mode preferred
}
}
"""
end
def llm_truncated do
"""
{
"users": [
{"name": "Alice", "active": true},
{"name": "Bob", "active":
"""
end
# Legacy system outputs
def python_style do
"""
{
'users': [
{
'name': 'Alice',
'active': True,
'metadata': None,
'scores': [95, 87, 92]
}
],
'success': True
}
"""
end
def javascript_object do
"""
{
name: "Alice",
age: 30,
getData: function() { return this.name; },
preferences: {
theme: "dark",
notifications: true
}
}
"""
end
# Structural issues
def missing_delimiters do
"""
{
"users": [
{"name": "Alice", "profile": {"city": "NYC"
],
"total": 1
"""
end
def extra_delimiters do
"""
{
"name": "Alice"
}}]
"""
end
def mismatched_delimiters do
"""
{
"data": [
{"name": "Alice"}
}
]
"""
end
# Syntax issues
def trailing_commas do
"""
{
"users": [
{"name": "Alice", "age": 30,},
{"name": "Bob", "age": 25,},
],
"total": 2,
}
"""
end
def missing_commas do
"""
{
"name": "Alice"
"age": 30
"preferences": {
"theme": "dark"
"notifications": true
}
}
"""
end
def unquoted_keys do
"""
{
name: "Alice",
age: 30,
user_preferences: {
preferred_theme: "dark",
email_notifications: true
}
}
"""
end
def mixed_quotes do
"""
{
"name": 'Alice',
'age': "30",
"preferences": {
'theme': "dark"
}
}
"""
end
# Complex real-world examples
def api_response_malformed do
"""
HTTP/1.1 200 OK
Content-Type: application/json
{
status: "success",
data: {
users: [
{name: 'Alice', id: 1, active: True},
{name: 'Bob', id: 2, active: False}
],
pagination: {
page: 1,
total: 2,
hasMore: False
}
},
errors: None
"""
end
def config_file_malformed do
"""
// Application configuration
{
database: {
host: "localhost",
port: 5432,
ssl: True,
credentials: {
username: "app_user",
password: "secret123"
}
},
cache: {
enabled: True,
ttl: 3600,
backend: "redis"
},
logging: {
level: "info",
outputs: ["console", "file",]
}
// Missing closing brace
"""
end
def deeply_nested_malformed do
"""
{
level1: {
level2: {
level3: {
level4: {
level5: {
data: [
{item: 1, active: True},
{item: 2, active: False},
{item: 3, active: None}
],
metadata: {
created: "2024-01-15",
version: "1.0"
}
}
}
}
}
}
// Missing all closing braces
"""
end
# Edge cases
def unicode_mixed do
"""
{
name: "JosΓ© MarΓa",
city: 'εδΊ¬',
emoji: "ππ―β¨",
description: "User from SΓ£o Paulo"
}
"""
end
def large_array_malformed do
"""
{
"items": [
""" <>
(1..100 |> Enum.map(fn i ->
"{id: #{i}, name: 'Item#{i}', active: True}"
end) |> Enum.join(",\n")) <>
"""
],
"total": 100
"""
end
def severely_malformed do
"name Alice age 30 active true preferences theme dark notifications true"
end
def json5_style do
"""
{
// JSON5 features
name: 'Alice',
age: 0x1E, // Hexadecimal
pi: 3.14159,
active: true,
metadata: {
/* Block comment */
created: '2024-01-15',
tags: [
'user',
'active',
], // Trailing comma
},
}
"""
end
end
This comprehensive test plan covers all major aspects of the JsonRemedy library:
- Layer-specific testing for each repair stage
- Integration testing for end-to-end scenarios
- Performance testing to ensure reasonable speed
- Edge case testing for robustness
- Real-world scenarios from actual LLM and legacy system outputs
- Error handling for graceful failures
- Comprehensive fixtures with diverse malformed JSON examples
The test suite should provide confidence that the library handles the full spectrum of JSON repair scenarios correctly and efficiently.
Implementation Strategy
Test Organization
test/
βββ json_remedy_test.exs # Main API tests
βββ layers/
β βββ content_cleaning_test.exs # Layer 1 tests
β βββ structural_repair_test.exs # Layer 2 tests
β βββ syntax_normalization_test.exs # Layer 3 tests
β βββ validation_test.exs # Layer 4 tests
β βββ tolerant_parsing_test.exs # Layer 5 tests
βββ integration/
β βββ end_to_end_test.exs # Full scenarios
β βββ real_world_test.exs # LLM/legacy outputs
β βββ streaming_test.exs # Large file handling
βββ performance/
β βββ benchmark_test.exs # Performance validation
β βββ memory_test.exs # Memory usage tests
βββ support/
β βββ json_fixtures.ex # Test data
β βββ test_helper.ex # Test utilities
βββ property/
βββ property_test.exs # Property-based testing
Property-Based Testing Addition
# test/property/property_test.exs
defmodule JsonRemedy.PropertyTest do
use ExUnit.Case
use PropCheck
property "repair is idempotent for valid JSON" do
forall valid_json <- valid_json_generator() do
{:ok, result1} = JsonRemedy.repair(valid_json)
json_string = Jason.encode!(result1)
{:ok, result2} = JsonRemedy.repair(json_string)
result1 == result2
end
end
property "repair preserves data semantics when possible" do
forall malformed_json <- malformed_json_generator() do
case JsonRemedy.repair(malformed_json) do
{:ok, result} ->
# Result should be valid JSON
{:ok, _} = Jason.encode(result)
true
{:error, _} ->
# Acceptable failure for severely malformed input
true
end
end
end
# Generators for property testing
defp valid_json_generator do
oneof([
map(atom(:name), binary()),
list(integer()),
boolean(),
nil,
float()
])
end
defp malformed_json_generator do
oneof([
# Missing quotes
"{name: \"Alice\"}",
# Trailing commas
"[1, 2, 3,]",
# Python booleans
"{\"active\": True}",
# Missing delimiters
"{\"name\": \"Alice\"",
])
end
end
Test Data Categorization
Category 1: Syntax Fixes (High Success Rate Expected)
- Unquoted keys:
{name: "Alice"}
- Single quotes:
{'name': 'Alice'}
- Boolean variants:
{active: True}
- Null variants:
{value: None}
- Trailing commas:
[1, 2, 3,]
Category 2: Structural Repairs (Medium Success Rate)
- Missing braces:
{"name": "Alice"
- Missing brackets:
[1, 2, 3
- Missing commas:
{"a": 1 "b": 2}
- Missing colons:
{"name" "Alice"}
Category 3: Content Cleaning (High Success Rate)
- Code fences:
json {"a": 1}
- Comments:
// comment \n {"a": 1}
- Wrapper text:
Here's JSON: {"a": 1}
Category 4: Complex Scenarios (Variable Success Rate)
- Multiple issues: Code fences + syntax + structure problems
- Deeply nested malformations
- Large files with scattered issues
- Truncated/incomplete JSON
Category 5: Edge Cases (Lower Success Rate Expected)
- Severely malformed:
name Alice age 30
- Binary data mixed in
- Extremely large inputs
- Recursive/circular references
Success Rate Targets by Category
# Expected success rates for test categories
@success_targets %{
syntax_fixes: 0.95, # 95% should succeed
structural_repairs: 0.85, # 85% should succeed
content_cleaning: 0.98, # 98% should succeed
complex_scenarios: 0.75, # 75% should succeed
edge_cases: 0.50 # 50% should succeed (graceful failure ok)
}
Test Execution Strategy
Phase 1: Unit Tests
- Test each layer independently
- Mock dependencies where appropriate
- Focus on specific repair capabilities
Phase 2: Integration Tests
- Test full pipeline with realistic inputs
- Measure end-to-end repair success rates
- Validate logging and error handling
Phase 3: Performance Tests
- Benchmark against targets
- Memory profiling
- Large file handling validation
Phase 4: Property Tests
- Verify invariants hold across random inputs
- Test idempotency and consistency
- Stress test with generated malformed JSON
Phase 5: Real-World Validation
- Test against actual LLM outputs
- Validate legacy system compatibility
- Community feedback integration
This comprehensive test plan ensures JsonRemedy meets its promises of reliable, fast, and intelligent JSON repair across the full spectrum of real-world malformed JSON scenarios.