mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-06-21 21:59:30 +00:00
448 lines
24 KiB
Bash
Executable File
448 lines
24 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# test-all.sh -- Comprehensive CLI test suite for clinical-intelligence.
|
|
#
|
|
# Usage:
|
|
# bash scripts/test-all.sh # default: levels 1-3 (~3 min)
|
|
# bash scripts/test-all.sh --level 1 # infrastructure only (~30s)
|
|
# bash scripts/test-all.sh --level 4 # includes agent tests (~30 min)
|
|
# bash scripts/test-all.sh --level 5 # full e2e (~45 min)
|
|
# bash scripts/test-all.sh --test T3.8 # single test
|
|
# bash scripts/test-all.sh --verbose # show full output per test
|
|
#
|
|
# Runs from the DGX host. Requires: openshell CLI on PATH, sandbox running.
|
|
|
|
set -uo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
source "$SCRIPT_DIR/test-lib.sh"
|
|
|
|
# Source .env so OLLAMA_PORT/OPENFOLD_PORT overrides reach the curl URLs below.
|
|
if [ -f "$REPO_DIR/.env" ]; then
|
|
set -a
|
|
# shellcheck disable=SC1091
|
|
. "$REPO_DIR/.env"
|
|
set +a
|
|
fi
|
|
|
|
MAX_LEVEL=3
|
|
SINGLE_TEST=""
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--level) MAX_LEVEL="$2"; shift 2 ;;
|
|
--test) SINGLE_TEST="$2"; shift 2 ;;
|
|
--verbose) VERBOSE=true; shift ;;
|
|
*) echo "Unknown option: $1"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
init_test_run
|
|
|
|
BRIDGE_IP=$(_bridge_ip)
|
|
|
|
# Helper: run only if test matches single-test filter or no filter set
|
|
should_run() {
|
|
[[ -z "$SINGLE_TEST" ]] || [[ "$1" == "$SINGLE_TEST" ]]
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Level 1: Infrastructure Health (host-side, ~30 seconds)
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
run_level1() {
|
|
echo ""
|
|
echo "═══ Level 1: Infrastructure Health ═══"
|
|
echo ""
|
|
|
|
should_run "T1.1" && run_test "T1.1" "Ollama alive" \
|
|
"curl -sf http://localhost:${OLLAMA_PORT:-11434}/" \
|
|
assert_exit_0 \
|
|
"Ollama not running. Docker (default): make up. Host Ollama alternative: OLLAMA_HOST=0.0.0.0 ollama serve."
|
|
|
|
should_run "T1.2" && run_test "T1.2" "Model available (nemotron-3-super)" \
|
|
"curl -s http://localhost:${OLLAMA_PORT:-11434}/api/tags | python3 -c \"import sys,json; names=[m['name'] for m in json.load(sys.stdin)['models']]; print('FOUND' if any('nemotron-3-super' in n for n in names) else 'MISSING')\"" \
|
|
assert_contains \
|
|
"Model not pulled. Run: ollama pull nemotron-3-super" \
|
|
"FOUND"
|
|
|
|
should_run "T1.3" && run_test "T1.3" "Ollama generates text (direct)" \
|
|
"curl -sf -m 30 -X POST http://localhost:${OLLAMA_PORT:-11434}/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\":\"nemotron-3-super:120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Say OK\"}],\"max_tokens\":5}' | python3 -c \"import sys,json; c=json.load(sys.stdin).get('choices',[{}])[0].get('message',{}).get('content',''); print(c if c else 'EMPTY')\"" \
|
|
assert_output_not_empty \
|
|
"Ollama can't generate. Check: curl localhost:${OLLAMA_PORT:-11434}/api/ps"
|
|
|
|
should_run "T1.5" && run_test "T1.5" "OpenFold3 NIM ready" \
|
|
"curl -sf http://localhost:${OPENFOLD_PORT:-8000}/v1/health/ready" \
|
|
assert_contains \
|
|
"OpenFold3 not ready. Check: docker ps | grep openfold" \
|
|
"ready"
|
|
|
|
should_run "T1.6" && run_test "T1.6" "GPU accessible" \
|
|
"nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader 2>&1" \
|
|
assert_contains \
|
|
"GPU not accessible. Check NVIDIA driver." \
|
|
"MiB"
|
|
|
|
should_run "T1.7" && run_test "T1.7" "FHIR server reachable" \
|
|
"curl -sf -o /dev/null -w '%{http_code}' https://r4.smarthealthit.org/metadata" \
|
|
assert_equals \
|
|
"FHIR unreachable. Check network/DNS." \
|
|
"200"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Level 2: OpenShell + Sandbox Health (~1 minute)
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
run_level2() {
|
|
echo ""
|
|
echo "═══ Level 2: OpenShell + Sandbox Health ═══"
|
|
echo ""
|
|
|
|
should_run "T2.1" && run_test "T2.1" "Gateway connected" \
|
|
"openshell status 2>&1" \
|
|
assert_contains \
|
|
"Gateway down. Run: OPENSHELL_K3S_ARGS='--kubelet-arg=cgroup-driver=systemd' openshell gateway start" \
|
|
"Connected"
|
|
|
|
should_run "T2.2" && run_test "T2.2" "Sandbox exists and ready" \
|
|
"openshell sandbox list 2>&1" \
|
|
assert_contains \
|
|
"Sandbox not found. Run: bash scripts/setup_sandbox.sh" \
|
|
"Ready"
|
|
|
|
should_run "T2.3" && run_test "T2.3" "Forward running on 18789" \
|
|
"openshell forward list 2>&1" \
|
|
assert_contains \
|
|
"Forward dead. Run: openshell forward stop 18789 clinical-sandbox; openshell forward start -d 18789 clinical-sandbox" \
|
|
"running"
|
|
|
|
should_run "T2.3b" && run_test "T2.3b" "Gateway HTTP responding on 18789" \
|
|
"curl -sf -m 5 -o /dev/null -w %{http_code} http://127.0.0.1:18789/__openclaw__/health 2>&1 || curl -sf -m 5 -o /dev/null -w %{http_code} http://127.0.0.1:18789/ 2>&1" \
|
|
assert_contains \
|
|
"Gateway HTTP not responding. Forward exists but no listener — re-run scripts/restart_sandbox.sh inside the sandbox or check /tmp/gw.log for the os.networkInterfaces() crash (needs openclaw-os-shim.js loaded via NODE_OPTIONS=--require)." \
|
|
"200"
|
|
|
|
should_run "T2.4" && run_test "T2.4" "FHIR from sandbox (curl)" \
|
|
"_sandbox 'curl -sf https://r4.smarthealthit.org/Patient?_count=1 -o /dev/null -w %{http_code}'" \
|
|
assert_equals \
|
|
"FHIR blocked by sandbox policy. Check fhir section + python binary wildcards." \
|
|
"200"
|
|
|
|
should_run "T2.5" && run_test "T2.5" "Inference from sandbox" \
|
|
"_sandbox 'curl -sk https://inference.local/v1/models'" \
|
|
assert_contains \
|
|
"Inference not routed. Check: openshell inference get" \
|
|
"nemotron"
|
|
|
|
should_run "T2.6" && run_test "T2.6" "OpenFold3 health from sandbox" \
|
|
"_sandbox 'curl -sf http://${BRIDGE_IP}:8000/v1/health/ready'" \
|
|
assert_contains \
|
|
"OpenFold3 health check blocked by sandbox. Check openfold3 policy." \
|
|
"ready"
|
|
|
|
should_run "T2.6b" && run_test "T2.6b" "OpenFold3 predict endpoint reachable from sandbox" \
|
|
"_sandbox 'curl -s -o /dev/null -w %{http_code} -X POST -H \"Content-Type: application/json\" -d \"{\\\"dummy\\\": true}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict'" \
|
|
assert_not_contains \
|
|
"OpenFold3 predict blocked (HTTP 403). Sandbox policy may have L7 rules that break plain HTTP." \
|
|
"403"
|
|
|
|
should_run "T2.6c" && run_test "T2.6c" "OpenFold3 predict accepts POST from sandbox" \
|
|
"_sandbox 'curl -s -w \"\n%{http_code}\" -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"MKTVRQERLKSIVRI\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nMKTVRQERLKSIVRI\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict 2>&1 | tail -1'" \
|
|
assert_contains \
|
|
"OpenFold3 predict endpoint rejected POST from sandbox. Check sandbox policy and OpenFold3 NIM status." \
|
|
"200"
|
|
|
|
should_run "T2.7" && run_test "T2.7" "Outbound traffic blocked (security)" \
|
|
"_sandbox 'curl --max-time 3 https://google.com 2>&1; echo EXIT_CODE=\$?'" \
|
|
assert_not_contains \
|
|
"SECURITY FAILURE: Outbound traffic NOT blocked!" \
|
|
"EXIT_CODE=0"
|
|
|
|
should_run "T2.8" && run_test "T2.8" "Python packages available" \
|
|
"_sandbox 'python -c \"import subprocess, json, pandas, matplotlib; print(\\\"OK\\\")\"'" \
|
|
assert_contains \
|
|
"Python packages missing. Sandbox may need recreation." \
|
|
"OK"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Level 3: OpenClaw Configuration Correctness (~2 minutes)
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
run_level3() {
|
|
echo ""
|
|
echo "═══ Level 3: OpenClaw Configuration ═══"
|
|
echo ""
|
|
|
|
# -- Gateway process and logs --
|
|
echo " --- Gateway ---"
|
|
|
|
should_run "T3.1" && run_test "T3.1" "Gateway process alive" \
|
|
"_sandbox 'pgrep -f openclaw-gateway > /dev/null && echo ALIVE || echo DEAD'" \
|
|
assert_contains \
|
|
"OpenClaw gateway not running. Restart it." \
|
|
"ALIVE"
|
|
|
|
should_run "T3.2" && run_test "T3.2" "Gateway model correct" \
|
|
"_sandbox 'grep \"agent model\" /tmp/gw.log 2>/dev/null | tail -1'" \
|
|
warn:assert_contains \
|
|
"Wrong model. Check ~/.openclaw/openclaw.json" \
|
|
"local-ollama/nemotron-3-super"
|
|
|
|
should_run "T3.3" && run_test "T3.3" "Gateway no errors" \
|
|
"_sandbox 'head -50 /tmp/gw.log 2>/dev/null | grep -iE \"\\[error\\]|\\[fatal\\]|crashed|segfault\" | grep -cv apply_patch'" \
|
|
assert_equals \
|
|
"Gateway has startup errors. Run: _sandbox head -50 /tmp/gw.log" \
|
|
"0"
|
|
|
|
# -- Model and auth --
|
|
echo " --- Model & Auth ---"
|
|
|
|
should_run "T3.4" && run_test "T3.4" "Model auth OK (not missing)" \
|
|
"_sandbox 'openclaw models list 2>&1 | grep nemotron | grep -c missing; true'" \
|
|
assert_equals \
|
|
"Auth profile missing. Recreate auth-profiles.json for all agents." \
|
|
"0"
|
|
|
|
local agents="main patient-data labs-vitals medications analyst molecular"
|
|
local auth_suffix=a
|
|
for agent in $agents; do
|
|
should_run "T3.5${auth_suffix}" && run_test "T3.5${auth_suffix}" "Auth profile exists: $agent" \
|
|
"_sandbox 'test -f ~/.openclaw/agents/${agent}/agent/auth-profiles.json && echo EXISTS || echo MISSING'" \
|
|
assert_contains \
|
|
"Auth profile missing for $agent. Rerun setup step 10." \
|
|
"EXISTS"
|
|
auth_suffix=$(echo "$auth_suffix" | tr 'a-e' 'b-f')
|
|
done
|
|
|
|
should_run "T3.6" && run_test "T3.6" "Auth profile content valid" \
|
|
"_sandbox 'cat ~/.openclaw/agents/main/agent/auth-profiles.json 2>/dev/null'" \
|
|
assert_contains \
|
|
"Auth profile malformed. Should contain version:1 and provider:local-ollama." \
|
|
"local-ollama"
|
|
|
|
# -- Skills --
|
|
echo " --- Skills ---"
|
|
|
|
should_run "T3.7" && run_test "T3.7" "Skills count (expect 7)" \
|
|
"_sandbox 'openclaw skills list 2>&1 | grep -c openclaw-workspace'" \
|
|
assert_equals \
|
|
"Not all skills loaded. Redeploy to ~/.openclaw/workspace/skills/" \
|
|
"7"
|
|
|
|
local skills="analysis-methods case-summary clinical-delegation clinical-knowledge cohort-compare fhir-basics molecular-viz"
|
|
local skill_suffix=a
|
|
for skill in $skills; do
|
|
should_run "T3.8${skill_suffix}" && run_test "T3.8${skill_suffix}" "Skill loaded: $skill" \
|
|
"_sandbox 'openclaw skills list 2>&1 | grep ${skill} | grep -c ready'" \
|
|
assert_numeric_gt \
|
|
"Skill $skill not loaded. Check ~/.openclaw/workspace/skills/${skill}/SKILL.md" \
|
|
"0"
|
|
skill_suffix=$(echo "$skill_suffix" | tr 'a-f' 'b-g')
|
|
done
|
|
|
|
should_run "T3.9" && run_test "T3.9" "analysis-methods uses subprocess" \
|
|
"_sandbox 'grep -c subprocess ~/.openclaw/workspace/skills/analysis-methods/SKILL.md 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"analysis-methods skill still uses requests. Redeploy updated version." \
|
|
"0"
|
|
|
|
should_run "T3.10" && run_test "T3.10" "fhir-basics uses subprocess" \
|
|
"_sandbox 'grep -c subprocess ~/.openclaw/workspace/skills/fhir-basics/SKILL.md 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"fhir-basics skill still uses requests. Redeploy updated version." \
|
|
"0"
|
|
|
|
# -- Agents --
|
|
echo " --- Agents ---"
|
|
|
|
should_run "T3.11" && run_test "T3.11" "Agents count (expect >= 5)" \
|
|
"_sandbox 'openclaw agents list 2>&1 | grep -c Workspace:'" \
|
|
assert_numeric_gt \
|
|
"Not all agents registered. Rerun setup step 9." \
|
|
"4"
|
|
|
|
local agent_suffix=a
|
|
for agent in patient-data labs-vitals medications analyst molecular; do
|
|
should_run "T3.12${agent_suffix}" && run_test "T3.12${agent_suffix}" "Agent registered: $agent" \
|
|
"_sandbox 'openclaw agents list 2>&1 | grep -c ${agent}'" \
|
|
assert_numeric_gt \
|
|
"Agent $agent not registered." \
|
|
"0"
|
|
agent_suffix=$(echo "$agent_suffix" | tr 'a-d' 'b-e')
|
|
done
|
|
|
|
# -- IDENTITY.md --
|
|
echo " --- IDENTITY.md ---"
|
|
|
|
should_run "T3.13" && run_test "T3.13" "IDENTITY.md exists" \
|
|
"_sandbox 'test -f ~/.openclaw/workspace/IDENTITY.md && echo EXISTS || echo MISSING'" \
|
|
assert_contains \
|
|
"IDENTITY.md not deployed." \
|
|
"EXISTS"
|
|
|
|
should_run "T3.14" && run_test "T3.14" "IDENTITY.md header correct" \
|
|
"_sandbox 'head -1 ~/.openclaw/workspace/IDENTITY.md'" \
|
|
assert_contains \
|
|
"IDENTITY.md has wrong header." \
|
|
"Clinical Intelligence"
|
|
|
|
should_run "T3.15" && run_test "T3.15" "IDENTITY.md has molecular delegation" \
|
|
"_sandbox 'grep -c molecular ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"IDENTITY.md missing molecular agent delegation." \
|
|
"0"
|
|
|
|
should_run "T3.16" && run_test "T3.16" "IDENTITY.md has how-to-work section" \
|
|
"_sandbox 'grep -c 'How to work' ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"IDENTITY.md missing How to work section." \
|
|
"0"
|
|
|
|
should_run "T3.17" && run_test "T3.17" "IDENTITY.md has principles" \
|
|
"_sandbox 'grep -c Principles ~/.openclaw/workspace/IDENTITY.md 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"IDENTITY.md missing Principles section." \
|
|
"0"
|
|
|
|
# -- openclaw.json --
|
|
echo " --- openclaw.json ---"
|
|
|
|
should_run "T3.18" && run_test "T3.18" "Model in openclaw.json" \
|
|
"_sandbox 'python3 -c \"import json,os; d=json.load(open(os.path.expanduser(\\\"~/.openclaw/openclaw.json\\\"))); print(d[\\\"agents\\\"][\\\"defaults\\\"][\\\"model\\\"])\"'" \
|
|
assert_contains \
|
|
"Wrong model in openclaw.json." \
|
|
"local-ollama/nemotron-3-super"
|
|
|
|
should_run "T3.19" && run_test "T3.19" "allowAgents includes molecular" \
|
|
"_sandbox 'python3 -c \"import json,os; d=json.load(open(os.path.expanduser(\\\"~/.openclaw/openclaw.json\\\"))); a=d[\\\"agents\\\"][\\\"list\\\"][0][\\\"subagents\\\"][\\\"allowAgents\\\"]; print(\\\"OK\\\" if \\\"molecular\\\" in a else \\\"MISSING\\\")\"'" \
|
|
assert_contains \
|
|
"molecular not in allowAgents. Update openclaw.json." \
|
|
"OK"
|
|
|
|
# -- Scripts --
|
|
echo " --- Scripts ---"
|
|
|
|
should_run "T3.20" && run_test "T3.20" "build_viewer.py exists in sandbox" \
|
|
"_sandbox 'test -f /sandbox/clinical-intelligence/scripts/build_viewer.py && echo EXISTS || echo MISSING'" \
|
|
assert_contains \
|
|
"build_viewer.py not uploaded to sandbox." \
|
|
"EXISTS"
|
|
|
|
should_run "T3.21" && run_test "T3.21" "build_viewer.py uses subprocess" \
|
|
"_sandbox 'grep -c subprocess.run /sandbox/clinical-intelligence/scripts/build_viewer.py 2>/dev/null'" \
|
|
assert_numeric_gt \
|
|
"build_viewer.py still uses urllib.request. Deploy updated version." \
|
|
"0"
|
|
|
|
should_run "T3.22" && run_test "T3.22" "validate_and_run.py exists" \
|
|
"_sandbox 'test -f /sandbox/clinical-intelligence/scripts/validate_and_run.py && echo EXISTS || echo MISSING'" \
|
|
assert_contains \
|
|
"validate_and_run.py not uploaded to sandbox." \
|
|
"EXISTS"
|
|
|
|
# -- Smoke test --
|
|
echo " --- Smoke Test ---"
|
|
|
|
should_run "T3.23" && run_test "T3.23" "Agent responds to prompt" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id smoke-\$\$ --thinking off --message \"Say OK\" --timeout 60 2>&1 | tail -5'" \
|
|
assert_contains \
|
|
"Agent cannot respond. Check all Level 3 tests above first." \
|
|
"OK"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Level 4: Agent Functional Tests (~20-30 minutes)
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
run_level4() {
|
|
echo ""
|
|
echo "═══ Level 4: Agent Functional Tests ═══"
|
|
echo ""
|
|
|
|
should_run "T4.1" && run_test "T4.1" "Cohort count (expect 47)" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t41-\$\$ --thinking off --timeout 300 --message \"Find all diabetic patients and count them\" 2>&1 | tail -20'" \
|
|
assert_contains \
|
|
"Agent failed cohort query. Check FHIR access + analysis-methods skill." \
|
|
"47"
|
|
|
|
should_run "T4.2" && run_test "T4.2" "Lab retrieval (HbA1c value)" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t42-\$\$ --thinking off --timeout 300 --message \"Get the latest HbA1c for patient 9eb43ac3-7c1e-4e25-94cd-4b2c43f7234e\" 2>&1 | tail -20'" \
|
|
assert_output_not_empty \
|
|
"Agent failed lab retrieval."
|
|
|
|
should_run "T4.3" && run_test "T4.3" "Code execution (print 42)" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t43-\$\$ --thinking off --timeout 120 --message \"Write a Python script that prints 42 and execute it\" 2>&1 | tail -10'" \
|
|
assert_contains \
|
|
"Agent cannot execute code." \
|
|
"42"
|
|
|
|
should_run "T4.4" && run_test "T4.4" "Molecular visualization" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && rm -f ~/.openclaw/canvas/atorvastatin*.html && openclaw agent --local --session-id t44-\$\$ --thinking off --timeout 300 --message \"Show me the 3D structure of atorvastatin bound to its target HMG-CoA reductase\" 2>&1 | tail -10; ls -la ~/.openclaw/canvas/atorvastatin*.html 2>/dev/null | wc -l'" \
|
|
assert_numeric_gt \
|
|
"Molecular viz failed. Check OpenFold3 access + build_viewer.py." \
|
|
"0"
|
|
|
|
# -- OpenFold3 / molecular-viz tests --
|
|
echo " --- OpenFold3 / Molecular Viz ---"
|
|
|
|
should_run "T4.5" && run_test "T4.5" "OpenFold3 prediction response schema" \
|
|
"_sandbox 'curl -sf --max-time 300 -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"schema-test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"FVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nFVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict | python3 -c \"import sys,json; r=json.load(sys.stdin); o=r[\\\"outputs\\\"][0][\\\"structures_with_scores\\\"][0]; assert \\\"structure\\\" in o, \\\"missing structure\\\"; assert \\\"confidence_score\\\" in o, \\\"missing confidence_score\\\"; assert \\\"complex_plddt_score\\\" in o, \\\"missing plddt\\\"; assert \\\"ptm_score\\\" in o, \\\"missing ptm\\\"; print(\\\"SCHEMA_OK\\\")\"'" \
|
|
assert_contains \
|
|
"OpenFold3 response missing expected fields (structure, confidence_score, plddt, ptm). Check NIM version." \
|
|
"SCHEMA_OK"
|
|
|
|
should_run "T4.6" && run_test "T4.6" "OpenFold3 confidence scores are numeric" \
|
|
"_sandbox 'curl -sf --max-time 300 -X POST -H \"Content-Type: application/json\" -d \"{\\\"inputs\\\":[{\\\"input_id\\\":\\\"score-test\\\",\\\"molecules\\\":[{\\\"type\\\":\\\"protein\\\",\\\"id\\\":\\\"A\\\",\\\"sequence\\\":\\\"FVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"msa\\\":{\\\"main\\\":{\\\"a3m\\\":{\\\"alignment\\\":\\\">q\\\\nFVNQHLCGSHLVEALYLVCGERGFFYTPKT\\\",\\\"format\\\":\\\"a3m\\\"}}}}],\\\"output_format\\\":\\\"pdb\\\"}]}\" http://${BRIDGE_IP}:8000/biology/openfold/openfold3/predict | python3 -c \"import sys,json; r=json.load(sys.stdin); o=r[\\\"outputs\\\"][0][\\\"structures_with_scores\\\"][0]; plddt=float(o[\\\"complex_plddt_score\\\"]); ptm=float(o[\\\"ptm_score\\\"]); conf=float(o[\\\"confidence_score\\\"]); iptm=float(o.get(\\\"iptm_score\\\",0)); print(f\\\"pLDDT={plddt:.1f} pTM={ptm:.2f} ipTM={iptm:.2f} conf={conf:.2f}\\\"); assert plddt > 0, \\\"pLDDT not positive\\\"; assert ptm >= 0, \\\"pTM negative\\\"; print(\\\"SCORES_OK\\\")\"'" \
|
|
assert_contains \
|
|
"Confidence scores not numeric or out of range. Check OpenFold3 prediction output." \
|
|
"SCORES_OK"
|
|
|
|
should_run "T4.7" && run_test "T4.7" "build_viewer.py HTML output valid" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && rm -f ~/.openclaw/canvas/metformin_complex.html ~/.openclaw/canvas/metformin_complex.pdb && python3 scripts/build_viewer.py --drug metformin --openfold-host ${BRIDGE_IP} 2>&1; cat ~/.openclaw/canvas/metformin_complex.html 2>/dev/null | python3 -c \"import sys; html=sys.stdin.read(); checks=[\\\"3Dmol\\\" in html, \\\"ATOM\\\" in html or \\\"HETATM\\\" in html, \\\"pLDDT\\\" in html, \\\"confidence\\\" in html.lower()]; print(f\\\"3Dmol={checks[0]} PDB={checks[1]} pLDDT={checks[2]} conf={checks[3]}\\\"); print(\\\"HTML_OK\\\" if all(checks) else \\\"HTML_FAIL\\\")\"'" \
|
|
assert_contains \
|
|
"build_viewer.py HTML missing 3Dmol.js, PDB structure data, or confidence scores. Check script output." \
|
|
"HTML_OK"
|
|
|
|
should_run "T4.8" && run_test "T4.8" "build_viewer.py creates file in canvas" \
|
|
"_sandbox 'test -f ~/.openclaw/canvas/metformin_complex.html && stat -c %s ~/.openclaw/canvas/metformin_complex.html || echo 0'" \
|
|
assert_numeric_gt \
|
|
"build_viewer.py did not create output file in canvas dir. Run T4.7 first." \
|
|
"1000"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Level 5: End-to-End Integration (~15 minutes)
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
run_level5() {
|
|
echo ""
|
|
echo "═══ Level 5: End-to-End Integration ═══"
|
|
echo ""
|
|
|
|
should_run "T5.1" && run_test "T5.1" "Cross-condition analysis (diabetes + hypertension + eGFR)" \
|
|
"_sandbox 'cd /sandbox/clinical-intelligence && openclaw agent --local --session-id t51-\$\$ --thinking off --timeout 600 --message \"Find all diabetic patients that also have hypertension. For the overlap, get their eGFR. Flag anyone with eGFR below 60 as kidney disease risk.\" 2>&1 | tail -30'" \
|
|
assert_contains \
|
|
"Cross-condition query failed. Run Level 4 tests individually to isolate." \
|
|
"24"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Main
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
echo ""
|
|
echo "╔════════════════════════════════════════════════╗"
|
|
echo "║ Clinical Intelligence Test Suite ║"
|
|
echo "║ Max level: $MAX_LEVEL ║"
|
|
echo "╚════════════════════════════════════════════════╝"
|
|
|
|
(( MAX_LEVEL >= 1 )) && run_level1
|
|
(( MAX_LEVEL >= 2 )) && run_level2
|
|
(( MAX_LEVEL >= 3 )) && run_level3
|
|
(( MAX_LEVEL >= 4 )) && run_level4
|
|
(( MAX_LEVEL >= 5 )) && run_level5
|
|
|
|
print_summary
|