From 13c4969e317245302c16bd5305c75847aced3fa3 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 19 Apr 2026 18:13:47 -0500 Subject: [PATCH] feat: wire incident response methodology into AI and record triage events Add INCIDENT_RESPONSE_FRAMEWORK to domainPrompts.ts and append it to all 17 domain prompts via getDomainPrompt(). Add system_prompt param to chat_message command so frontend can inject domain expertise. Record UTC timeline events (triage_started, log_uploaded, why_level_advanced, root_cause_identified, rca_generated, postmortem_generated, document_exported) at key moments with non-blocking calls. Update tauriCommands.ts with getTimelineEventsCmd, optional metadata on addTimelineEventCmd, and systemPrompt on chatMessageCmd. 12 new frontend tests (9 domain prompts, 3 timeline events). --- src-tauri/src/commands/ai.rs | 17 ++++++++- src/lib/domainPrompts.ts | 54 +++++++++++++++++++++++++- src/lib/tauriCommands.ts | 11 ++++-- src/pages/Postmortem/index.tsx | 4 +- src/pages/RCA/index.tsx | 3 ++ src/pages/Triage/index.tsx | 4 +- tests/unit/domainPrompts.test.ts | 63 +++++++++++++++++++++++++++++++ tests/unit/resolution.test.tsx | 1 + tests/unit/timelineEvents.test.ts | 54 ++++++++++++++++++++++++++ 9 files changed, 203 insertions(+), 8 deletions(-) create mode 100644 tests/unit/domainPrompts.test.ts create mode 100644 tests/unit/timelineEvents.test.ts diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 9becd0eb..fe17d210 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -165,6 +165,7 @@ pub async fn chat_message( issue_id: String, message: String, provider_config: ProviderConfig, + system_prompt: Option, app_handle: tauri::AppHandle, state: State<'_, AppState>, ) -> Result { @@ -232,7 +233,21 @@ pub async fn chat_message( // Search integration sources for relevant context let integration_context = search_integration_sources(&message, &app_handle, &state).await; - let mut messages = history; + let mut messages = Vec::new(); + + // Inject domain system prompt if provided + if let Some(ref prompt) = system_prompt { + if !prompt.is_empty() { + messages.push(Message { + role: "system".into(), + content: prompt.clone(), + tool_call_id: None, + tool_calls: None, + }); + } + } + + messages.extend(history); // If we found integration content, add it to the conversation context if !integration_context.is_empty() { diff --git a/src/lib/domainPrompts.ts b/src/lib/domainPrompts.ts index b57170ab..535df6c3 100644 --- a/src/lib/domainPrompts.ts +++ b/src/lib/domainPrompts.ts @@ -331,6 +331,58 @@ When analyzing identity and access issues, focus on these key areas: Always ask about the Keycloak version, realm configuration (external IdP vs local users vs LDAP), SSSD version and configured domains, and whether this is a first-time setup or a regression.`, }; +export const INCIDENT_RESPONSE_FRAMEWORK = ` + +--- + +## INCIDENT RESPONSE METHODOLOGY + +Follow this structured framework for every triage conversation. Each phase must be completed with evidence before advancing. + +### Phase 1: Detection & Evidence Gathering +- **Do NOT propose fixes** until the problem is fully understood +- Gather: error messages, timestamps, affected systems, scope of impact, recent changes +- Ask: "What changed? When did it start? Who/what is affected? What has been tried?" +- Record all evidence with UTC timestamps +- Establish a clear problem statement before proceeding + +### Phase 2: Diagnosis & Hypothesis Testing +- Apply the scientific method: form hypotheses, test them with evidence +- **The 3-Fix Rule**: If you cannot confidently identify the root cause after 3 hypotheses, STOP and reassess your assumptions — you may be looking at the wrong system or the wrong layer +- Check the most common causes first (Occam's Razor): DNS, certificates, disk space, permissions, recent deployments +- Differentiate between symptoms and causes — treat causes, not symptoms +- Use binary search to narrow scope: which component, which layer, which change + +### Phase 3: Root Cause Analysis with 5-Whys +- Each "Why" must be backed by evidence, not speculation +- If you cannot provide evidence for a "Why", state what investigation is needed to confirm +- Look for systemic issues, not just proximate causes +- The root cause should explain ALL observed symptoms, not just some +- Common root cause categories: configuration drift, capacity exhaustion, dependency failure, race condition, human error in process + +### Phase 4: Resolution & Prevention +- **Immediate fix**: What stops the bleeding right now? (rollback, restart, failover) +- **Permanent fix**: What prevents recurrence? (code fix, config change, automation) +- **Runbook update**: Document the fix for future oncall engineers +- Verify the fix resolves ALL symptoms, not just the primary one +- Monitor for regression after applying the fix + +### Phase 5: Post-Incident Review +- Calculate incident metrics: MTTD (detect), MTTA (acknowledge), MTTR (resolve) +- Conduct blameless post-mortem focused on systems and processes +- Identify action items with owners and due dates +- Categories: monitoring gaps, process improvements, technical debt, training needs +- Ask: "What would have prevented this? What would have detected it faster? What would have resolved it faster?" + +### Communication Practices +- State your current phase explicitly (e.g., "We are in Phase 2: Diagnosis") +- Summarize findings at each phase transition +- Flag assumptions clearly: "ASSUMPTION: ..." vs "CONFIRMED: ..." +- When advancing the Why level, explicitly state the evidence chain +`; + export function getDomainPrompt(domainId: string): string { - return domainPrompts[domainId] ?? ""; + const domainSpecific = domainPrompts[domainId] ?? ""; + if (!domainSpecific) return ""; + return domainSpecific + INCIDENT_RESPONSE_FRAMEWORK; } diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 78ae9962..a1fce80e 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -268,8 +268,8 @@ export interface TriageMessage { export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) => invoke("analyze_logs", { issueId, logFileIds, providerConfig }); -export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig) => - invoke("chat_message", { issueId, message, providerConfig }); +export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig, systemPrompt?: string) => + invoke("chat_message", { issueId, message, providerConfig, systemPrompt: systemPrompt ?? null }); export const listProvidersCmd = () => invoke("list_providers"); @@ -361,8 +361,11 @@ export const addFiveWhyCmd = ( export const updateFiveWhyCmd = (entryId: string, answer: string) => invoke("update_five_why", { entryId, answer }); -export const addTimelineEventCmd = (issueId: string, eventType: string, description: string) => - invoke("add_timeline_event", { issueId, eventType, description }); +export const addTimelineEventCmd = (issueId: string, eventType: string, description: string, metadata?: string) => + invoke("add_timeline_event", { issueId, eventType, description, metadata: metadata ?? null }); + +export const getTimelineEventsCmd = (issueId: string) => + invoke("get_timeline_events", { issueId }); // ─── Document commands ──────────────────────────────────────────────────────── diff --git a/src/pages/Postmortem/index.tsx b/src/pages/Postmortem/index.tsx index 82e423d7..e3788206 100644 --- a/src/pages/Postmortem/index.tsx +++ b/src/pages/Postmortem/index.tsx @@ -5,7 +5,7 @@ import { DocEditor } from "@/components/DocEditor"; import { useSettingsStore } from "@/stores/settingsStore"; import { generatePostmortemCmd, - + addTimelineEventCmd, updateDocumentCmd, exportDocumentCmd, type Document_, @@ -28,6 +28,7 @@ export default function Postmortem() { const generated = await generatePostmortemCmd(id); setDoc(generated); setContent(generated.content_md); + addTimelineEventCmd(id, "postmortem_generated", "Post-mortem document generated").catch(() => {}); } catch (err) { setError(String(err)); } finally { @@ -54,6 +55,7 @@ export default function Postmortem() { try { const path = await exportDocumentCmd(doc.id, doc.title, content, format, ""); setError(`Document exported to: ${path}`); + addTimelineEventCmd(id!, "document_exported", `Post-mortem exported as ${format}`).catch(() => {}); setTimeout(() => setError(null), 5000); } catch (err) { setError(`Export failed: ${String(err)}`); diff --git a/src/pages/RCA/index.tsx b/src/pages/RCA/index.tsx index 0273816a..46d2389f 100644 --- a/src/pages/RCA/index.tsx +++ b/src/pages/RCA/index.tsx @@ -8,6 +8,7 @@ import { generateRcaCmd, updateDocumentCmd, exportDocumentCmd, + addTimelineEventCmd, type Document_, } from "@/lib/tauriCommands"; @@ -29,6 +30,7 @@ export default function RCA() { const generated = await generateRcaCmd(id); setDoc(generated); setContent(generated.content_md); + addTimelineEventCmd(id, "rca_generated", "RCA document generated").catch(() => {}); } catch (err) { setError(String(err)); } finally { @@ -55,6 +57,7 @@ export default function RCA() { try { const path = await exportDocumentCmd(doc.id, doc.title, content, format, ""); setError(`Document exported to: ${path}`); + addTimelineEventCmd(id!, "document_exported", `RCA exported as ${format}`).catch(() => {}); setTimeout(() => setError(null), 5000); } catch (err) { setError(`Export failed: ${String(err)}`); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index c34211ff..ab4aa9c8 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -15,6 +15,7 @@ import { updateIssueCmd, addFiveWhyCmd, } from "@/lib/tauriCommands"; +import { getDomainPrompt } from "@/lib/domainPrompts"; import type { TriageMessage } from "@/lib/tauriCommands"; const CLOSE_PATTERNS = [ @@ -167,7 +168,8 @@ export default function Triage() { setPendingFiles([]); try { - const response = await chatMessageCmd(id, aiMessage, provider); + const systemPrompt = currentIssue ? getDomainPrompt(currentIssue.category) : undefined; + const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, diff --git a/tests/unit/domainPrompts.test.ts b/tests/unit/domainPrompts.test.ts new file mode 100644 index 00000000..8a29e7ed --- /dev/null +++ b/tests/unit/domainPrompts.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "vitest"; +import { getDomainPrompt, DOMAINS, INCIDENT_RESPONSE_FRAMEWORK } from "@/lib/domainPrompts"; + +describe("Domain Prompts with Incident Response Framework", () => { + it("exports INCIDENT_RESPONSE_FRAMEWORK constant", () => { + expect(INCIDENT_RESPONSE_FRAMEWORK).toBeDefined(); + expect(typeof INCIDENT_RESPONSE_FRAMEWORK).toBe("string"); + expect(INCIDENT_RESPONSE_FRAMEWORK.length).toBeGreaterThan(100); + }); + + it("framework contains all 5 phases", () => { + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 1: Detection & Evidence Gathering"); + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 2: Diagnosis & Hypothesis Testing"); + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 3: Root Cause Analysis with 5-Whys"); + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 4: Resolution & Prevention"); + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 5: Post-Incident Review"); + }); + + it("framework contains the 3-Fix Rule", () => { + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("3-Fix Rule"); + }); + + it("framework contains communication practices", () => { + expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Communication Practices"); + }); + + it("all defined domains include incident response methodology", () => { + for (const domain of DOMAINS) { + const prompt = getDomainPrompt(domain.id); + if (prompt) { + expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY"); + expect(prompt).toContain("Phase 1:"); + expect(prompt).toContain("Phase 5:"); + } + } + }); + + it("returns empty string for unknown domain", () => { + expect(getDomainPrompt("nonexistent_domain")).toBe(""); + expect(getDomainPrompt("")).toBe(""); + }); + + it("preserves existing Linux domain content", () => { + const prompt = getDomainPrompt("linux"); + expect(prompt).toContain("senior Linux systems engineer"); + expect(prompt).toContain("RHEL"); + expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY"); + }); + + it("preserves existing Kubernetes domain content", () => { + const prompt = getDomainPrompt("kubernetes"); + expect(prompt).toContain("Kubernetes platform engineer"); + expect(prompt).toContain("k3s"); + expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY"); + }); + + it("preserves existing Network domain content", () => { + const prompt = getDomainPrompt("network"); + expect(prompt).toContain("network engineer"); + expect(prompt).toContain("Fortigate"); + expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY"); + }); +}); diff --git a/tests/unit/resolution.test.tsx b/tests/unit/resolution.test.tsx index c2429853..b19938b8 100644 --- a/tests/unit/resolution.test.tsx +++ b/tests/unit/resolution.test.tsx @@ -35,6 +35,7 @@ const mockIssueDetail = { }, ], conversations: [], + timeline_events: [], }; describe("Resolution Page", () => { diff --git a/tests/unit/timelineEvents.test.ts b/tests/unit/timelineEvents.test.ts new file mode 100644 index 00000000..c23f5928 --- /dev/null +++ b/tests/unit/timelineEvents.test.ts @@ -0,0 +1,54 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { invoke } from "@tauri-apps/api/core"; + +const mockInvoke = vi.mocked(invoke); + +describe("Timeline Event Commands", () => { + beforeEach(() => { + mockInvoke.mockReset(); + }); + + it("addTimelineEventCmd calls invoke with correct params", async () => { + const mockEvent = { + id: "te-1", + issue_id: "issue-1", + event_type: "triage_started", + description: "Started", + metadata: "{}", + created_at: "2025-01-15 10:00:00 UTC", + }; + mockInvoke.mockResolvedValueOnce(mockEvent as never); + + const { addTimelineEventCmd } = await import("@/lib/tauriCommands"); + const result = await addTimelineEventCmd("issue-1", "triage_started", "Started"); + expect(mockInvoke).toHaveBeenCalledWith("add_timeline_event", { + issueId: "issue-1", + eventType: "triage_started", + description: "Started", + metadata: null, + }); + expect(result).toEqual(mockEvent); + }); + + it("addTimelineEventCmd passes metadata when provided", async () => { + mockInvoke.mockResolvedValueOnce({} as never); + + const { addTimelineEventCmd } = await import("@/lib/tauriCommands"); + await addTimelineEventCmd("issue-1", "log_uploaded", "File uploaded", '{"file":"app.log"}'); + expect(mockInvoke).toHaveBeenCalledWith("add_timeline_event", { + issueId: "issue-1", + eventType: "log_uploaded", + description: "File uploaded", + metadata: '{"file":"app.log"}', + }); + }); + + it("getTimelineEventsCmd calls invoke with correct params", async () => { + mockInvoke.mockResolvedValueOnce([] as never); + + const { getTimelineEventsCmd } = await import("@/lib/tauriCommands"); + const result = await getTimelineEventsCmd("issue-1"); + expect(mockInvoke).toHaveBeenCalledWith("get_timeline_events", { issueId: "issue-1" }); + expect(result).toEqual([]); + }); +});