feat: wire incident response methodology into AI and record triage events
Add INCIDENT_RESPONSE_FRAMEWORK to domainPrompts.ts and append it to all 17 domain prompts via getDomainPrompt(). Add system_prompt param to chat_message command so frontend can inject domain expertise. Record UTC timeline events (triage_started, log_uploaded, why_level_advanced, root_cause_identified, rca_generated, postmortem_generated, document_exported) at key moments with non-blocking calls. Update tauriCommands.ts with getTimelineEventsCmd, optional metadata on addTimelineEventCmd, and systemPrompt on chatMessageCmd. 12 new frontend tests (9 domain prompts, 3 timeline events).
This commit is contained in:
parent
79a623dbb2
commit
13c4969e31
@ -165,6 +165,7 @@ pub async fn chat_message(
|
|||||||
issue_id: String,
|
issue_id: String,
|
||||||
message: String,
|
message: String,
|
||||||
provider_config: ProviderConfig,
|
provider_config: ProviderConfig,
|
||||||
|
system_prompt: Option<String>,
|
||||||
app_handle: tauri::AppHandle,
|
app_handle: tauri::AppHandle,
|
||||||
state: State<'_, AppState>,
|
state: State<'_, AppState>,
|
||||||
) -> Result<ChatResponse, String> {
|
) -> Result<ChatResponse, String> {
|
||||||
@ -232,7 +233,21 @@ pub async fn chat_message(
|
|||||||
// Search integration sources for relevant context
|
// Search integration sources for relevant context
|
||||||
let integration_context = search_integration_sources(&message, &app_handle, &state).await;
|
let integration_context = search_integration_sources(&message, &app_handle, &state).await;
|
||||||
|
|
||||||
let mut messages = history;
|
let mut messages = Vec::new();
|
||||||
|
|
||||||
|
// Inject domain system prompt if provided
|
||||||
|
if let Some(ref prompt) = system_prompt {
|
||||||
|
if !prompt.is_empty() {
|
||||||
|
messages.push(Message {
|
||||||
|
role: "system".into(),
|
||||||
|
content: prompt.clone(),
|
||||||
|
tool_call_id: None,
|
||||||
|
tool_calls: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.extend(history);
|
||||||
|
|
||||||
// If we found integration content, add it to the conversation context
|
// If we found integration content, add it to the conversation context
|
||||||
if !integration_context.is_empty() {
|
if !integration_context.is_empty() {
|
||||||
|
|||||||
@ -331,6 +331,58 @@ When analyzing identity and access issues, focus on these key areas:
|
|||||||
Always ask about the Keycloak version, realm configuration (external IdP vs local users vs LDAP), SSSD version and configured domains, and whether this is a first-time setup or a regression.`,
|
Always ask about the Keycloak version, realm configuration (external IdP vs local users vs LDAP), SSSD version and configured domains, and whether this is a first-time setup or a regression.`,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const INCIDENT_RESPONSE_FRAMEWORK = `
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## INCIDENT RESPONSE METHODOLOGY
|
||||||
|
|
||||||
|
Follow this structured framework for every triage conversation. Each phase must be completed with evidence before advancing.
|
||||||
|
|
||||||
|
### Phase 1: Detection & Evidence Gathering
|
||||||
|
- **Do NOT propose fixes** until the problem is fully understood
|
||||||
|
- Gather: error messages, timestamps, affected systems, scope of impact, recent changes
|
||||||
|
- Ask: "What changed? When did it start? Who/what is affected? What has been tried?"
|
||||||
|
- Record all evidence with UTC timestamps
|
||||||
|
- Establish a clear problem statement before proceeding
|
||||||
|
|
||||||
|
### Phase 2: Diagnosis & Hypothesis Testing
|
||||||
|
- Apply the scientific method: form hypotheses, test them with evidence
|
||||||
|
- **The 3-Fix Rule**: If you cannot confidently identify the root cause after 3 hypotheses, STOP and reassess your assumptions — you may be looking at the wrong system or the wrong layer
|
||||||
|
- Check the most common causes first (Occam's Razor): DNS, certificates, disk space, permissions, recent deployments
|
||||||
|
- Differentiate between symptoms and causes — treat causes, not symptoms
|
||||||
|
- Use binary search to narrow scope: which component, which layer, which change
|
||||||
|
|
||||||
|
### Phase 3: Root Cause Analysis with 5-Whys
|
||||||
|
- Each "Why" must be backed by evidence, not speculation
|
||||||
|
- If you cannot provide evidence for a "Why", state what investigation is needed to confirm
|
||||||
|
- Look for systemic issues, not just proximate causes
|
||||||
|
- The root cause should explain ALL observed symptoms, not just some
|
||||||
|
- Common root cause categories: configuration drift, capacity exhaustion, dependency failure, race condition, human error in process
|
||||||
|
|
||||||
|
### Phase 4: Resolution & Prevention
|
||||||
|
- **Immediate fix**: What stops the bleeding right now? (rollback, restart, failover)
|
||||||
|
- **Permanent fix**: What prevents recurrence? (code fix, config change, automation)
|
||||||
|
- **Runbook update**: Document the fix for future oncall engineers
|
||||||
|
- Verify the fix resolves ALL symptoms, not just the primary one
|
||||||
|
- Monitor for regression after applying the fix
|
||||||
|
|
||||||
|
### Phase 5: Post-Incident Review
|
||||||
|
- Calculate incident metrics: MTTD (detect), MTTA (acknowledge), MTTR (resolve)
|
||||||
|
- Conduct blameless post-mortem focused on systems and processes
|
||||||
|
- Identify action items with owners and due dates
|
||||||
|
- Categories: monitoring gaps, process improvements, technical debt, training needs
|
||||||
|
- Ask: "What would have prevented this? What would have detected it faster? What would have resolved it faster?"
|
||||||
|
|
||||||
|
### Communication Practices
|
||||||
|
- State your current phase explicitly (e.g., "We are in Phase 2: Diagnosis")
|
||||||
|
- Summarize findings at each phase transition
|
||||||
|
- Flag assumptions clearly: "ASSUMPTION: ..." vs "CONFIRMED: ..."
|
||||||
|
- When advancing the Why level, explicitly state the evidence chain
|
||||||
|
`;
|
||||||
|
|
||||||
export function getDomainPrompt(domainId: string): string {
|
export function getDomainPrompt(domainId: string): string {
|
||||||
return domainPrompts[domainId] ?? "";
|
const domainSpecific = domainPrompts[domainId] ?? "";
|
||||||
|
if (!domainSpecific) return "";
|
||||||
|
return domainSpecific + INCIDENT_RESPONSE_FRAMEWORK;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -268,8 +268,8 @@ export interface TriageMessage {
|
|||||||
export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) =>
|
export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) =>
|
||||||
invoke<AnalysisResult>("analyze_logs", { issueId, logFileIds, providerConfig });
|
invoke<AnalysisResult>("analyze_logs", { issueId, logFileIds, providerConfig });
|
||||||
|
|
||||||
export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig) =>
|
export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig, systemPrompt?: string) =>
|
||||||
invoke<ChatResponse>("chat_message", { issueId, message, providerConfig });
|
invoke<ChatResponse>("chat_message", { issueId, message, providerConfig, systemPrompt: systemPrompt ?? null });
|
||||||
|
|
||||||
export const listProvidersCmd = () => invoke<ProviderInfo[]>("list_providers");
|
export const listProvidersCmd = () => invoke<ProviderInfo[]>("list_providers");
|
||||||
|
|
||||||
@ -361,8 +361,11 @@ export const addFiveWhyCmd = (
|
|||||||
export const updateFiveWhyCmd = (entryId: string, answer: string) =>
|
export const updateFiveWhyCmd = (entryId: string, answer: string) =>
|
||||||
invoke<void>("update_five_why", { entryId, answer });
|
invoke<void>("update_five_why", { entryId, answer });
|
||||||
|
|
||||||
export const addTimelineEventCmd = (issueId: string, eventType: string, description: string) =>
|
export const addTimelineEventCmd = (issueId: string, eventType: string, description: string, metadata?: string) =>
|
||||||
invoke<TimelineEvent>("add_timeline_event", { issueId, eventType, description });
|
invoke<TimelineEvent>("add_timeline_event", { issueId, eventType, description, metadata: metadata ?? null });
|
||||||
|
|
||||||
|
export const getTimelineEventsCmd = (issueId: string) =>
|
||||||
|
invoke<TimelineEvent[]>("get_timeline_events", { issueId });
|
||||||
|
|
||||||
// ─── Document commands ────────────────────────────────────────────────────────
|
// ─── Document commands ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import { DocEditor } from "@/components/DocEditor";
|
|||||||
import { useSettingsStore } from "@/stores/settingsStore";
|
import { useSettingsStore } from "@/stores/settingsStore";
|
||||||
import {
|
import {
|
||||||
generatePostmortemCmd,
|
generatePostmortemCmd,
|
||||||
|
addTimelineEventCmd,
|
||||||
updateDocumentCmd,
|
updateDocumentCmd,
|
||||||
exportDocumentCmd,
|
exportDocumentCmd,
|
||||||
type Document_,
|
type Document_,
|
||||||
@ -28,6 +28,7 @@ export default function Postmortem() {
|
|||||||
const generated = await generatePostmortemCmd(id);
|
const generated = await generatePostmortemCmd(id);
|
||||||
setDoc(generated);
|
setDoc(generated);
|
||||||
setContent(generated.content_md);
|
setContent(generated.content_md);
|
||||||
|
addTimelineEventCmd(id, "postmortem_generated", "Post-mortem document generated").catch(() => {});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(String(err));
|
setError(String(err));
|
||||||
} finally {
|
} finally {
|
||||||
@ -54,6 +55,7 @@ export default function Postmortem() {
|
|||||||
try {
|
try {
|
||||||
const path = await exportDocumentCmd(doc.id, doc.title, content, format, "");
|
const path = await exportDocumentCmd(doc.id, doc.title, content, format, "");
|
||||||
setError(`Document exported to: ${path}`);
|
setError(`Document exported to: ${path}`);
|
||||||
|
addTimelineEventCmd(id!, "document_exported", `Post-mortem exported as ${format}`).catch(() => {});
|
||||||
setTimeout(() => setError(null), 5000);
|
setTimeout(() => setError(null), 5000);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(`Export failed: ${String(err)}`);
|
setError(`Export failed: ${String(err)}`);
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import {
|
|||||||
generateRcaCmd,
|
generateRcaCmd,
|
||||||
updateDocumentCmd,
|
updateDocumentCmd,
|
||||||
exportDocumentCmd,
|
exportDocumentCmd,
|
||||||
|
addTimelineEventCmd,
|
||||||
type Document_,
|
type Document_,
|
||||||
} from "@/lib/tauriCommands";
|
} from "@/lib/tauriCommands";
|
||||||
|
|
||||||
@ -29,6 +30,7 @@ export default function RCA() {
|
|||||||
const generated = await generateRcaCmd(id);
|
const generated = await generateRcaCmd(id);
|
||||||
setDoc(generated);
|
setDoc(generated);
|
||||||
setContent(generated.content_md);
|
setContent(generated.content_md);
|
||||||
|
addTimelineEventCmd(id, "rca_generated", "RCA document generated").catch(() => {});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(String(err));
|
setError(String(err));
|
||||||
} finally {
|
} finally {
|
||||||
@ -55,6 +57,7 @@ export default function RCA() {
|
|||||||
try {
|
try {
|
||||||
const path = await exportDocumentCmd(doc.id, doc.title, content, format, "");
|
const path = await exportDocumentCmd(doc.id, doc.title, content, format, "");
|
||||||
setError(`Document exported to: ${path}`);
|
setError(`Document exported to: ${path}`);
|
||||||
|
addTimelineEventCmd(id!, "document_exported", `RCA exported as ${format}`).catch(() => {});
|
||||||
setTimeout(() => setError(null), 5000);
|
setTimeout(() => setError(null), 5000);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(`Export failed: ${String(err)}`);
|
setError(`Export failed: ${String(err)}`);
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import {
|
|||||||
updateIssueCmd,
|
updateIssueCmd,
|
||||||
addFiveWhyCmd,
|
addFiveWhyCmd,
|
||||||
} from "@/lib/tauriCommands";
|
} from "@/lib/tauriCommands";
|
||||||
|
import { getDomainPrompt } from "@/lib/domainPrompts";
|
||||||
import type { TriageMessage } from "@/lib/tauriCommands";
|
import type { TriageMessage } from "@/lib/tauriCommands";
|
||||||
|
|
||||||
const CLOSE_PATTERNS = [
|
const CLOSE_PATTERNS = [
|
||||||
@ -167,7 +168,8 @@ export default function Triage() {
|
|||||||
setPendingFiles([]);
|
setPendingFiles([]);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await chatMessageCmd(id, aiMessage, provider);
|
const systemPrompt = currentIssue ? getDomainPrompt(currentIssue.category) : undefined;
|
||||||
|
const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt);
|
||||||
const assistantMsg: TriageMessage = {
|
const assistantMsg: TriageMessage = {
|
||||||
id: `asst-${Date.now()}`,
|
id: `asst-${Date.now()}`,
|
||||||
issue_id: id,
|
issue_id: id,
|
||||||
|
|||||||
63
tests/unit/domainPrompts.test.ts
Normal file
63
tests/unit/domainPrompts.test.ts
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { getDomainPrompt, DOMAINS, INCIDENT_RESPONSE_FRAMEWORK } from "@/lib/domainPrompts";
|
||||||
|
|
||||||
|
describe("Domain Prompts with Incident Response Framework", () => {
|
||||||
|
it("exports INCIDENT_RESPONSE_FRAMEWORK constant", () => {
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toBeDefined();
|
||||||
|
expect(typeof INCIDENT_RESPONSE_FRAMEWORK).toBe("string");
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK.length).toBeGreaterThan(100);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("framework contains all 5 phases", () => {
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 1: Detection & Evidence Gathering");
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 2: Diagnosis & Hypothesis Testing");
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 3: Root Cause Analysis with 5-Whys");
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 4: Resolution & Prevention");
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Phase 5: Post-Incident Review");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("framework contains the 3-Fix Rule", () => {
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("3-Fix Rule");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("framework contains communication practices", () => {
|
||||||
|
expect(INCIDENT_RESPONSE_FRAMEWORK).toContain("Communication Practices");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("all defined domains include incident response methodology", () => {
|
||||||
|
for (const domain of DOMAINS) {
|
||||||
|
const prompt = getDomainPrompt(domain.id);
|
||||||
|
if (prompt) {
|
||||||
|
expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY");
|
||||||
|
expect(prompt).toContain("Phase 1:");
|
||||||
|
expect(prompt).toContain("Phase 5:");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty string for unknown domain", () => {
|
||||||
|
expect(getDomainPrompt("nonexistent_domain")).toBe("");
|
||||||
|
expect(getDomainPrompt("")).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves existing Linux domain content", () => {
|
||||||
|
const prompt = getDomainPrompt("linux");
|
||||||
|
expect(prompt).toContain("senior Linux systems engineer");
|
||||||
|
expect(prompt).toContain("RHEL");
|
||||||
|
expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves existing Kubernetes domain content", () => {
|
||||||
|
const prompt = getDomainPrompt("kubernetes");
|
||||||
|
expect(prompt).toContain("Kubernetes platform engineer");
|
||||||
|
expect(prompt).toContain("k3s");
|
||||||
|
expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves existing Network domain content", () => {
|
||||||
|
const prompt = getDomainPrompt("network");
|
||||||
|
expect(prompt).toContain("network engineer");
|
||||||
|
expect(prompt).toContain("Fortigate");
|
||||||
|
expect(prompt).toContain("INCIDENT RESPONSE METHODOLOGY");
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -35,6 +35,7 @@ const mockIssueDetail = {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
conversations: [],
|
conversations: [],
|
||||||
|
timeline_events: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
describe("Resolution Page", () => {
|
describe("Resolution Page", () => {
|
||||||
|
|||||||
54
tests/unit/timelineEvents.test.ts
Normal file
54
tests/unit/timelineEvents.test.ts
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||||
|
import { invoke } from "@tauri-apps/api/core";
|
||||||
|
|
||||||
|
const mockInvoke = vi.mocked(invoke);
|
||||||
|
|
||||||
|
describe("Timeline Event Commands", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
mockInvoke.mockReset();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("addTimelineEventCmd calls invoke with correct params", async () => {
|
||||||
|
const mockEvent = {
|
||||||
|
id: "te-1",
|
||||||
|
issue_id: "issue-1",
|
||||||
|
event_type: "triage_started",
|
||||||
|
description: "Started",
|
||||||
|
metadata: "{}",
|
||||||
|
created_at: "2025-01-15 10:00:00 UTC",
|
||||||
|
};
|
||||||
|
mockInvoke.mockResolvedValueOnce(mockEvent as never);
|
||||||
|
|
||||||
|
const { addTimelineEventCmd } = await import("@/lib/tauriCommands");
|
||||||
|
const result = await addTimelineEventCmd("issue-1", "triage_started", "Started");
|
||||||
|
expect(mockInvoke).toHaveBeenCalledWith("add_timeline_event", {
|
||||||
|
issueId: "issue-1",
|
||||||
|
eventType: "triage_started",
|
||||||
|
description: "Started",
|
||||||
|
metadata: null,
|
||||||
|
});
|
||||||
|
expect(result).toEqual(mockEvent);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("addTimelineEventCmd passes metadata when provided", async () => {
|
||||||
|
mockInvoke.mockResolvedValueOnce({} as never);
|
||||||
|
|
||||||
|
const { addTimelineEventCmd } = await import("@/lib/tauriCommands");
|
||||||
|
await addTimelineEventCmd("issue-1", "log_uploaded", "File uploaded", '{"file":"app.log"}');
|
||||||
|
expect(mockInvoke).toHaveBeenCalledWith("add_timeline_event", {
|
||||||
|
issueId: "issue-1",
|
||||||
|
eventType: "log_uploaded",
|
||||||
|
description: "File uploaded",
|
||||||
|
metadata: '{"file":"app.log"}',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("getTimelineEventsCmd calls invoke with correct params", async () => {
|
||||||
|
mockInvoke.mockResolvedValueOnce([] as never);
|
||||||
|
|
||||||
|
const { getTimelineEventsCmd } = await import("@/lib/tauriCommands");
|
||||||
|
const result = await getTimelineEventsCmd("issue-1");
|
||||||
|
expect(mockInvoke).toHaveBeenCalledWith("get_timeline_events", { issueId: "issue-1" });
|
||||||
|
expect(result).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Reference in New Issue
Block a user