#!/usr/bin/env bash # test-all.sh -- Comprehensive CLI test suite for clinical-intelligence. # # Usage: # bash scripts/test-all.sh # default: levels 1-3 (~3 min) # bash scripts/test-all.sh --level 1 # infrastructure only (~30s) # bash scripts/test-all.sh --level 4 # includes agent tests (~30 min) # bash scripts/test-all.sh --level 5 # full e2e (~45 min) # bash scripts/test-all.sh --test T3.8 # single test # bash scripts/test-all.sh --verbose # show full output per test # # Runs from the DGX host. Requires: openshell CLI on PATH, sandbox running. set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" source "$SCRIPT_DIR/test-lib.sh" # Source .env so OLLAMA_PORT/OPENFOLD_PORT overrides reach the curl URLs below. if [ -f "$REPO_DIR/.env" ]; then set -a # shellcheck disable=SC1091 . "$REPO_DIR/.env" set +a fi MAX_LEVEL=3 SINGLE_TEST="" while [[ $# -gt 0 ]]; do case "$1" in --level) MAX_LEVEL="$2"; shift 2 ;; --test) SINGLE_TEST="$2"; shift 2 ;; --verbose) VERBOSE=true; shift ;; *) echo "Unknown option: $1"; exit 1 ;; esac done init_test_run BRIDGE_IP=$(_bridge_ip) # Helper: run only if test matches single-test filter or no filter set should_run() { [[ -z "$SINGLE_TEST" ]] || [[ "$1" == "$SINGLE_TEST" ]] } # ═══════════════════════════════════════════════════════════════════════ # Level 1: Infrastructure Health (host-side, ~30 seconds) # ═══════════════════════════════════════════════════════════════════════ run_level1() { echo "" echo "═══ Level 1: Infrastructure Health ═══" echo "" should_run "T1.1" && run_test "T1.1" "Ollama alive" \ "curl -sf http://localhost:${OLLAMA_PORT:-11434}/" \ assert_exit_0 \ "Ollama not running. Docker (default): make up. Host Ollama alternative: OLLAMA_HOST=0.0.0.0 ollama serve." should_run "T1.2" && run_test "T1.2" "Model available (nemotron-3-super)" \ "curl -s http://localhost:${OLLAMA_PORT:-11434}/api/tags | python3 -c \"import sys,json; names=[m['name'] for m in json.load(sys.stdin)['models']]; print('FOUND' if any('nemotron-3-super' in n for n in names) else 'MISSING')\"" \ assert_contains \ "Model not pulled. Run: ollama pull nemotron-3-super" \ "FOUND" should_run "T1.3" && run_test "T1.3" "Ollama generates text (direct)" \ "curl -sf -m 30 -X POST http://localhost:${OLLAMA_PORT:-11434}/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\":\"nemotron-3-super:120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Say OK\"}],\"max_tokens\":5}' | python3 -c \"import sys,json; c=json.load(sys.stdin).get('choices',[{}])[0].get('message',{}).get('content',''); print(c if c else 'EMPTY')\"" \ assert_output_not_empty \ "Ollama can't generate. Check: curl localhost:${OLLAMA_PORT:-11434}/api/ps" should_run "T1.5" && run_test "T1.5" "OpenFold3 NIM ready" \ "curl -sf http://localhost:${OPENFOLD_PORT:-8000}/v1/health/ready" \ assert_contains \ "OpenFold3 not ready. Check: docker ps | grep openfold" \ "ready" should_run "T1.6" && run_test "T1.6" "GPU accessible" \ "nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader 2>&1" \ assert_contains \ "GPU not accessible. Check NVIDIA driver." \ "MiB" should_run "T1.7" && run_test "T1.7" "FHIR server reachable" \ "curl -sf -o /dev/null -w '%{http_code}' https://r4.smarthealthit.org/metadata" \ assert_equals \ "FHIR unreachable. Check network/DNS." \ "200" } # ═══════════════════════════════════════════════════════════════════════ # Level 2: OpenShell + Sandbox Health (~1 minute) # ═══════════════════════════════════════════════════════════════════════ run_level2() { echo "" echo "═══ Level 2: OpenShell + Sandbox Health ═══" echo "" should_run "T2.1" && run_test "T2.1" "Gateway connected" \ "openshell status 2>&1" \ assert_contains \ "Gateway down. Run: OPENSHELL_K3S_ARGS='--kubelet-arg=cgroup-driver=systemd' openshell gateway start" \ "Connected" should_run "T2.2" && run_test "T2.2" "Sandbox exists and ready" \ "openshell sandbox list 2>&1" \ assert_contains \ "Sandbox not found. Run: bash scripts/setup_sandbox.sh" \ "Ready" should_run "T2.3" && run_test "T2.3" "Forward running on 18789" \ "openshell forward list 2>&1" \ assert_contains \ "Forward dead. Run: openshell forward stop 18789 clinical-sandbox; openshell forward start -d 18789 clinical-sandbox" \ "running" should_run "T2.3b" && run_test "T2.3b" "Gateway HTTP responding on 18789" \ "curl -sf -m 5 -o /dev/null -w %{http_code} http://127.0.0.1:18789/__openclaw__/health 2>&1 || curl -sf -m 5 -o /dev/null -w %{http_code} http://127.0.0.1:18789/ 2>&1" \ assert_contains \ "Gateway HTTP not responding. Forward exists but no listener — re-run scripts/restart_sandbox.sh inside the sandbox or check /tmp/gw.log for the os.networkInterfaces() crash (needs openclaw-os-shim.js loaded via NODE_OPTIONS=--require)." \ "200" should_run "T2.4" && run_test "T2.4" "FHIR from sandbox (curl)" \ "_sandbox 'curl -sf https://r4.smarthealthit.org/Patient?_count=1 -o /dev/null -w %{http_code}'" \ assert_equals \ "FHIR blocked by sandbox policy. Check fhir section + python binary wildcards." \ "200" should_run "T2.5" && run_test "T2.5" "Inference from sandbox" \ "_sandbox 'curl -sk https://inference.local/v1/models'" \ assert_contains \ "Inference not routed. Check: openshell inference get" \ "nemotron" should_run "T2.6" && run_test "T2.6" "OpenFold3 health from sandbox" \ "_sandbox 'curl -sf http://${BRIDGE_IP}:8000/v1/health/ready'" \ assert_contains \ "OpenFold3 health check blocked by sandbox. Check openfold3 policy." \ "ready" should_run "T2.6b" && run_test "T2.6b" "OpenFold3 predict endpoint reachable from sandbox" \ "_sandbox 'curl -s -o /dev/null -w %{http_code} -X POST -H \"Content-Type: application/json\" -d \"{\\\"dummy\\\": true}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict'" \ assert_not_contains \ "OpenFold3 predict blocked (HTTP 403). Sandbox policy may have L7 rules that break plain HTTP." \ "403" should_run "T2.6c" && run_test "T2.6c" "OpenFold3 predict accepts POST from sandbox" \ "_sandbox 'curl -s -w \"\n%{http_code}\" -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"MKTVRQERLKSIVRI\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nMKTVRQERLKSIVRI\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict 2>&1 | tail -1'" \ assert_contains \ "OpenFold3 predict endpoint rejected POST from sandbox. Check sandbox policy and OpenFold3 NIM status." \ "200" should_run "T2.7" && run_test "T2.7" "Outbound traffic blocked (security)" \ "_sandbox 'curl --max-time 3 https://google.com 2>&1; echo EXIT_CODE=\$?'" \ assert_not_contains \ "SECURITY FAILURE: Outbound traffic NOT blocked!" \ "EXIT_CODE=0" should_run "T2.8" && run_test "T2.8" "Python packages available" \ "_sandbox 'python -c \"import subprocess, json, pandas, matplotlib; print(\\\"OK\\\")\"'" \ assert_contains \ "Python packages missing. Sandbox may need recreation." \ "OK" } # ═══════════════════════════════════════════════════════════════════════ # Level 3: OpenClaw Configuration Correctness (~2 minutes) # ═══════════════════════════════════════════════════════════════════════ run_level3() { echo "" echo "═══ Level 3: OpenClaw Configuration ═══" echo "" # -- Gateway process and logs -- echo " --- Gateway ---" should_run "T3.1" && run_test "T3.1" "Gateway process alive" \ "_sandbox 'pgrep -f openclaw-gateway > /dev/null && echo ALIVE || echo DEAD'" \ assert_contains \ "OpenClaw gateway not running. Restart it." \ "ALIVE" should_run "T3.2" && run_test "T3.2" "Gateway model correct" \ "_sandbox 'grep \"agent model\" /tmp/gw.log 2>/dev/null | tail -1'" \ warn:assert_contains \ "Wrong model. Check ~/.openclaw/openclaw.json" \ "local-ollama/nemotron-3-super" should_run "T3.3" && run_test "T3.3" "Gateway no errors" \ "_sandbox 'head -50 /tmp/gw.log 2>/dev/null | grep -iE \"\\[error\\]|\\[fatal\\]|crashed|segfault\" | grep -cv apply_patch'" \ assert_equals \ "Gateway has startup errors. Run: _sandbox head -50 /tmp/gw.log" \ "0" # -- Model and auth -- echo " --- Model & Auth ---" should_run "T3.4" && run_test "T3.4" "Model auth OK (not missing)" \ "_sandbox 'openclaw models list 2>&1 | grep nemotron | grep -c missing; true'" \ assert_equals \ "Auth profile missing. Recreate auth-profiles.json for all agents." \ "0" local agents="main patient-data labs-vitals medications analyst molecular" local auth_suffix=a for agent in $agents; do should_run "T3.5${auth_suffix}" && run_test "T3.5${auth_suffix}" "Auth profile exists: $agent" \ "_sandbox 'test -f ~/.openclaw/agents/${agent}/agent/auth-profiles.json && echo EXISTS || echo MISSING'" \ assert_contains \ "Auth profile missing for $agent. Rerun setup step 10." \ "EXISTS" auth_suffix=$(echo "$auth_suffix" | tr 'a-e' 'b-f') done should_run "T3.6" && run_test "T3.6" "Auth profile content valid" \ "_sandbox 'cat ~/.openclaw/agents/main/agent/auth-profiles.json 2>/dev/null'" \ assert_contains \ "Auth profile malformed. Should contain version:1 and provider:local-ollama." \ "local-ollama" # -- Skills -- echo " --- Skills ---" should_run "T3.7" && run_test "T3.7" "Skills count (expect 7)" \ "_sandbox 'openclaw skills list 2>&1 | grep -c openclaw-workspace'" \ assert_equals \ "Not all skills loaded. Redeploy to ~/.openclaw/workspace/skills/" \ "7" local skills="analysis-methods case-summary clinical-delegation clinical-knowledge cohort-compare fhir-basics molecular-viz" local skill_suffix=a for skill in $skills; do should_run "T3.8${skill_suffix}" && run_test "T3.8${skill_suffix}" "Skill loaded: $skill" \ "_sandbox 'openclaw skills list 2>&1 | grep ${skill} | grep -c ready'" \ assert_numeric_gt \ "Skill $skill not loaded. Check ~/.openclaw/workspace/skills/${skill}/SKILL.md" \ "0" skill_suffix=$(echo "$skill_suffix" | tr 'a-f' 'b-g') done should_run "T3.9" && run_test "T3.9" "analysis-methods uses subprocess" \ "_sandbox 'grep -c subprocess ~/.openclaw/workspace/skills/analysis-methods/SKILL.md 2>/dev/null'" \ assert_numeric_gt \ "analysis-methods skill still uses requests. Redeploy updated version." \ "0" should_run "T3.10" && run_test "T3.10" "fhir-basics uses subprocess" \ "_sandbox 'grep -c subprocess ~/.openclaw/workspace/skills/fhir-basics/SKILL.md 2>/dev/null'" \ assert_numeric_gt \ "fhir-basics skill still uses requests. Redeploy updated version." \ "0" # -- Agents -- echo " --- Agents ---" should_run "T3.11" && run_test "T3.11" "Agents count (expect >= 5)" \ "_sandbox 'openclaw agents list 2>&1 | grep -c Workspace:'" \ assert_numeric_gt \ "Not all agents registered. Rerun setup step 9." \ "4" local agent_suffix=a for agent in patient-data labs-vitals medications analyst molecular; do should_run "T3.12${agent_suffix}" && run_test "T3.12${agent_suffix}" "Agent registered: $agent" \ "_sandbox 'openclaw agents list 2>&1 | grep -c ${agent}'" \ assert_numeric_gt \ "Agent $agent not registered." \ "0" agent_suffix=$(echo "$agent_suffix" | tr 'a-d' 'b-e') done # -- IDENTITY.md -- echo " --- IDENTITY.md ---" should_run "T3.13" && run_test "T3.13" "IDENTITY.md exists" \ "_sandbox 'test -f ~/.openclaw/workspace/IDENTITY.md && echo EXISTS || echo MISSING'" \ assert_contains \ "IDENTITY.md not deployed." \ "EXISTS" should_run "T3.14" && run_test "T3.14" "IDENTITY.md header correct" \ "_sandbox 'head -1 ~/.openclaw/workspace/IDENTITY.md'" \ assert_contains \ "IDENTITY.md has wrong header." \ "Clinical Intelligence" should_run "T3.15" && run_test "T3.15" "IDENTITY.md has molecular delegation" \ "_sandbox 'grep -c molecular ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \ assert_numeric_gt \ "IDENTITY.md missing molecular agent delegation." \ "0" should_run "T3.16" && run_test "T3.16" "IDENTITY.md has how-to-work section" \ "_sandbox 'grep -c 'How to work' ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \ assert_numeric_gt \ "IDENTITY.md missing How to work section." \ "0" should_run "T3.17" && run_test "T3.17" "IDENTITY.md has principles" \ "_sandbox 'grep -c Principles ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \ assert_numeric_gt \ "IDENTITY.md missing Principles section." \ "0" # -- openclaw.json -- echo " --- openclaw.json ---" should_run "T3.18" && run_test "T3.18" "Model in openclaw.json" \ "_sandbox 'python3 -c \"import json,os; d=json.load(open(os.path.expanduser(\\\"~/.openclaw/openclaw.json\\\"))); print(d[\\\"agents\\\"][\\\"defaults\\\"][\\\"model\\\"])\"'" \ assert_contains \ "Wrong model in openclaw.json." \ "local-ollama/nemotron-3-super" should_run "T3.19" && run_test "T3.19" "allowAgents includes molecular" \ "_sandbox 'python3 -c \"import json,os; d=json.load(open(os.path.expanduser(\\\"~/.openclaw/openclaw.json\\\"))); a=d[\\\"agents\\\"][\\\"list\\\"][0][\\\"subagents\\\"][\\\"allowAgents\\\"]; print(\\\"OK\\\" if \\\"molecular\\\" in a else \\\"MISSING\\\")\"'" \ assert_contains \ "molecular not in allowAgents. Update openclaw.json." \ "OK" # -- Scripts -- echo " --- Scripts ---" should_run "T3.20" && run_test "T3.20" "build_viewer.py exists in sandbox" \ "_sandbox 'test -f /sandbox/clinical-intelligence/scripts/build_viewer.py && echo EXISTS || echo MISSING'" \ assert_contains \ "build_viewer.py not uploaded to sandbox." \ "EXISTS" should_run "T3.21" && run_test "T3.21" "build_viewer.py uses subprocess" \ "_sandbox 'grep -c subprocess.run /sandbox/clinical-intelligence/scripts/build_viewer.py 2>/dev/null'" \ assert_numeric_gt \ "build_viewer.py still uses urllib.request. Deploy updated version." \ "0" should_run "T3.22" && run_test "T3.22" "validate_and_run.py exists" \ "_sandbox 'test -f /sandbox/clinical-intelligence/scripts/validate_and_run.py && echo EXISTS || echo MISSING'" \ assert_contains \ "validate_and_run.py not uploaded to sandbox." \ "EXISTS" # -- Smoke test -- echo " --- Smoke Test ---" should_run "T3.23" && run_test "T3.23" "Agent responds to prompt" \ "_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id smoke-\$\$ --thinking off --message \"Say OK\" --timeout 60 2>&1 | tail -5'" \ assert_contains \ "Agent cannot respond. Check all Level 3 tests above first." \ "OK" } # ═══════════════════════════════════════════════════════════════════════ # Level 4: Agent Functional Tests (~20-30 minutes) # ═══════════════════════════════════════════════════════════════════════ run_level4() { echo "" echo "═══ Level 4: Agent Functional Tests ═══" echo "" should_run "T4.1" && run_test "T4.1" "Cohort count (expect 47)" \ "_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t41-\$\$ --thinking off --timeout 300 --message \"Find all diabetic patients and count them\" 2>&1 | tail -20'" \ assert_contains \ "Agent failed cohort query. Check FHIR access + analysis-methods skill." \ "47" should_run "T4.2" && run_test "T4.2" "Lab retrieval (HbA1c value)" \ "_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t42-\$\$ --thinking off --timeout 300 --message \"Get the latest HbA1c for patient 9eb43ac3-7c1e-4e25-94cd-4b2c43f7234e\" 2>&1 | tail -20'" \ assert_output_not_empty \ "Agent failed lab retrieval." should_run "T4.3" && run_test "T4.3" "Code execution (print 42)" \ "_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t43-\$\$ --thinking off --timeout 120 --message \"Write a Python script that prints 42 and execute it\" 2>&1 | tail -10'" \ assert_contains \ "Agent cannot execute code." \ "42" should_run "T4.4" && run_test "T4.4" "Molecular visualization" \ "_sandbox 'cd /sandbox/clinical-intelligence && rm -f ~/.openclaw/canvas/atorvastatin*.html && openclaw agent --local --session-id t44-\$\$ --thinking off --timeout 300 --message \"Show me the 3D structure of atorvastatin bound to its target HMG-CoA reductase\" 2>&1 | tail -10; ls -la ~/.openclaw/canvas/atorvastatin*.html 2>/dev/null | wc -l'" \ assert_numeric_gt \ "Molecular viz failed. Check OpenFold3 access + build_viewer.py." \ "0" # -- OpenFold3 / molecular-viz tests -- echo " --- OpenFold3 / Molecular Viz ---" should_run "T4.5" && run_test "T4.5" "OpenFold3 prediction response schema" \ "_sandbox 'curl -sf --max-time 300 -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"schema-test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"FVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nFVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict | python3 -c \"import sys,json; r=json.load(sys.stdin); o=r[\\\"outputs\\\"][0][\\\"structures_with_scores\\\"][0]; assert \\\"structure\\\" in o, \\\"missing structure\\\"; assert \\\"confidence_score\\\" in o, \\\"missing confidence_score\\\"; assert \\\"complex_plddt_score\\\" in o, \\\"missing plddt\\\"; assert \\\"ptm_score\\\" in o, \\\"missing ptm\\\"; print(\\\"SCHEMA_OK\\\")\"'" \ assert_contains \ "OpenFold3 response missing expected fields (structure, confidence_score, plddt, ptm). Check NIM version." \ "SCHEMA_OK" should_run "T4.6" && run_test "T4.6" "OpenFold3 confidence scores are numeric" \ "_sandbox 'curl -sf --max-time 300 -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"score-test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"FVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nFVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict | python3 -c \"import sys,json; r=json.load(sys.stdin); o=r[\\\"outputs\\\"][0][\\\"structures_with_scores\\\"][0]; plddt=float(o[\\\"complex_plddt_score\\\"]); ptm=float(o[\\\"ptm_score\\\"]); conf=float(o[\\\"confidence_score\\\"]); iptm=float(o.get(\\\"iptm_score\\\",0)); print(f\\\"pLDDT={plddt:.1f} pTM={ptm:.2f} ipTM={iptm:.2f} conf={conf:.2f}\\\"); assert plddt > 0, \\\"pLDDT not positive\\\"; assert ptm >= 0, \\\"pTM negative\\\"; print(\\\"SCORES_OK\\\")\"'" \ assert_contains \ "Confidence scores not numeric or out of range. Check OpenFold3 prediction output." \ "SCORES_OK" should_run "T4.7" && run_test "T4.7" "build_viewer.py HTML output valid" \ "_sandbox 'cd /sandbox/clinical-intelligence && rm -f ~/.openclaw/canvas/metformin_complex.html ~/.openclaw/canvas/metformin_complex.pdb && python3 scripts/build_viewer.py --drug metformin --openfold-host ${BRIDGE_IP} 2>&1; cat ~/.openclaw/canvas/metformin_complex.html 2>/dev/null | python3 -c \"import sys; html=sys.stdin.read(); checks=[\\\"3Dmol\\\" in html, \\\"ATOM\\\" in html or \\\"HETATM\\\" in html, \\\"pLDDT\\\" in html, \\\"confidence\\\" in html.lower()]; print(f\\\"3Dmol={checks[0]} PDB={checks[1]} pLDDT={checks[2]} conf={checks[3]}\\\"); print(\\\"HTML_OK\\\" if all(checks) else \\\"HTML_FAIL\\\")\"'" \ assert_contains \ "build_viewer.py HTML missing 3Dmol.js, PDB structure data, or confidence scores. Check script output." \ "HTML_OK" should_run "T4.8" && run_test "T4.8" "build_viewer.py creates file in canvas" \ "_sandbox 'test -f ~/.openclaw/canvas/metformin_complex.html && stat -c %s ~/.openclaw/canvas/metformin_complex.html || echo 0'" \ assert_numeric_gt \ "build_viewer.py did not create output file in canvas dir. Run T4.7 first." \ "1000" } # ═══════════════════════════════════════════════════════════════════════ # Level 5: End-to-End Integration (~15 minutes) # ═══════════════════════════════════════════════════════════════════════ run_level5() { echo "" echo "═══ Level 5: End-to-End Integration ═══" echo "" should_run "T5.1" && run_test "T5.1" "Cross-condition analysis (diabetes + hypertension + eGFR)" \ "_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t51-\$\$ --thinking off --timeout 600 --message \"Find all diabetic patients that also have hypertension. For the overlap, get their eGFR. Flag anyone with eGFR below 60 as kidney disease risk.\" 2>&1 | tail -30'" \ assert_contains \ "Cross-condition query failed. Run Level 4 tests individually to isolate." \ "24" } # ═══════════════════════════════════════════════════════════════════════ # Main # ═══════════════════════════════════════════════════════════════════════ echo "" echo "╔════════════════════════════════════════════════╗" echo "║ Clinical Intelligence Test Suite ║" echo "║ Max level: $MAX_LEVEL ║" echo "╚════════════════════════════════════════════════╝" (( MAX_LEVEL >= 1 )) && run_level1 (( MAX_LEVEL >= 2 )) && run_level2 (( MAX_LEVEL >= 3 )) && run_level3 (( MAX_LEVEL >= 4 )) && run_level4 (( MAX_LEVEL >= 5 )) && run_level5 print_summary