Merge pull request 'feat(ai): add devops-incident-responder agent with domain auto-detection' (#52) from feature/agent-system into master
Some checks failed
Auto Tag / autotag (push) Successful in 7s
Auto Tag / wiki-sync (push) Successful in 7s
Test / rust-fmt-check (push) Has been cancelled
Test / rust-tests (push) Has been cancelled
Test / rust-clippy (push) Has been cancelled
Test / frontend-tests (push) Has been cancelled
Test / frontend-typecheck (push) Has been cancelled
Auto Tag / changelog (push) Successful in 1m4s
Auto Tag / build-macos-arm64 (push) Successful in 3m14s
Auto Tag / build-linux-amd64 (push) Successful in 9m32s
Auto Tag / build-linux-arm64 (push) Successful in 11m50s
Auto Tag / build-windows-amd64 (push) Successful in 11m59s
Some checks failed
Auto Tag / autotag (push) Successful in 7s
Auto Tag / wiki-sync (push) Successful in 7s
Test / rust-fmt-check (push) Has been cancelled
Test / rust-tests (push) Has been cancelled
Test / rust-clippy (push) Has been cancelled
Test / frontend-tests (push) Has been cancelled
Test / frontend-typecheck (push) Has been cancelled
Auto Tag / changelog (push) Successful in 1m4s
Auto Tag / build-macos-arm64 (push) Successful in 3m14s
Auto Tag / build-linux-amd64 (push) Successful in 9m32s
Auto Tag / build-linux-arm64 (push) Successful in 11m50s
Auto Tag / build-windows-amd64 (push) Successful in 11m59s
Reviewed-on: #52
This commit is contained in:
commit
ffb8d15187
@ -103,8 +103,16 @@ jobs:
|
|||||||
set -eu
|
set -eu
|
||||||
git-cliff --config cliff.toml --output CHANGELOG.md
|
git-cliff --config cliff.toml --output CHANGELOG.md
|
||||||
git-cliff --config cliff.toml --latest --strip all > /tmp/release_body.md
|
git-cliff --config cliff.toml --latest --strip all > /tmp/release_body.md
|
||||||
echo "=== Release body preview ==="
|
echo "=== Release body preview (from CHANGELOG) ==="
|
||||||
cat /tmp/release_body.md
|
cat /tmp/release_body.md
|
||||||
|
# If release body is empty, generate from git commits
|
||||||
|
if [ ! -s /tmp/release_body.md ]; then
|
||||||
|
echo "=== Release body is empty, generating from git commits ==="
|
||||||
|
LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
|
||||||
|
git log "${LAST_TAG}..HEAD" --pretty=format:"- %s" > /tmp/release_body.md || true
|
||||||
|
echo "=== Release body preview (from commits) ==="
|
||||||
|
cat /tmp/release_body.md
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Update Gitea release body
|
- name: Update Gitea release body
|
||||||
env:
|
env:
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "tftsr",
|
"name": "tftsr",
|
||||||
"private": true,
|
"private": true,
|
||||||
"version": "0.2.62",
|
"version": "0.2.68",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "vite",
|
"dev": "vite",
|
||||||
|
|||||||
@ -101,11 +101,25 @@ function updateTOML(path, version) {
|
|||||||
console.log(`✓ Updated ${path} to ${version}`);
|
console.log(`✓ Updated ${path} to ${version}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function updateJSON(path, version) {
|
||||||
|
const fullPath = resolve(projectRoot, path);
|
||||||
|
if (!existsSync(fullPath)) {
|
||||||
|
throw new Error(`File not found: ${fullPath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = readFileSync(fullPath, 'utf-8');
|
||||||
|
const json = JSON.parse(content);
|
||||||
|
json.version = version;
|
||||||
|
|
||||||
|
writeFileSync(fullPath, JSON.stringify(json, null, 2) + '\n', 'utf-8');
|
||||||
|
console.log(`✓ Updated ${path} to ${version}`);
|
||||||
|
}
|
||||||
|
|
||||||
const version = getVersionFromGit();
|
const version = getVersionFromGit();
|
||||||
console.log(`Setting version to: ${version}`);
|
console.log(`Setting version to: ${version}`);
|
||||||
|
|
||||||
updatePackageJson(version);
|
updatePackageJson(version);
|
||||||
updateTOML('src-tauri/Cargo.toml', version);
|
updateTOML('src-tauri/Cargo.toml', version);
|
||||||
updateTOML('src-tauri/tauri.conf.json', version);
|
updateJSON('src-tauri/tauri.conf.json', version);
|
||||||
|
|
||||||
console.log(`✓ All version fields updated to ${version}`);
|
console.log(`✓ All version fields updated to ${version}`);
|
||||||
|
|||||||
2
src-tauri/Cargo.lock
generated
2
src-tauri/Cargo.lock
generated
@ -6139,7 +6139,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "trcaa"
|
name = "trcaa"
|
||||||
version = "0.2.62"
|
version = "0.2.68"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aes-gcm",
|
"aes-gcm",
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "trcaa"
|
name = "trcaa"
|
||||||
version = "0.2.62"
|
version = "0.2.68"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
@ -57,3 +57,5 @@ strip = true
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
232
src-tauri/src/ai/agents.rs
Normal file
232
src-tauri/src/ai/agents.rs
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::ai::Message;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Agent {
|
||||||
|
pub name: String,
|
||||||
|
pub description: String,
|
||||||
|
pub system_prompt: String,
|
||||||
|
pub tools: Vec<String>,
|
||||||
|
pub model: Option<String>,
|
||||||
|
pub priority: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AgentRegistry {
|
||||||
|
agents: HashMap<String, Agent>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for AgentRegistry {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AgentRegistry {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
AgentRegistry {
|
||||||
|
agents: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_agent(&mut self, agent: Agent) {
|
||||||
|
self.agents.insert(agent.name.clone(), agent);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, name: &str) -> Option<&Agent> {
|
||||||
|
self.agents.get(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_all(&self) -> Vec<&Agent> {
|
||||||
|
self.agents.values().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_agent(&self, name: &str) -> bool {
|
||||||
|
self.agents.contains_key(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_agent_registry() -> AgentRegistry {
|
||||||
|
let mut registry = AgentRegistry::new();
|
||||||
|
|
||||||
|
let devops_agent = include_str!("agents/devops_incident_responder.md");
|
||||||
|
registry.add_agent(Agent {
|
||||||
|
name: "devops-incident-responder".to_string(),
|
||||||
|
description: "Production incident response, diagnosis, and postmortems".to_string(),
|
||||||
|
system_prompt: devops_agent.to_string(),
|
||||||
|
tools: vec![],
|
||||||
|
model: None,
|
||||||
|
priority: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
registry
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_agent(name: &str) -> Option<Agent> {
|
||||||
|
let registry = create_agent_registry();
|
||||||
|
registry.get(name).cloned()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn detect_domain(messages: &[Message]) -> String {
|
||||||
|
let combined_text = messages
|
||||||
|
.iter()
|
||||||
|
.map(|m| m.content.as_str())
|
||||||
|
.collect::<Vec<&str>>()
|
||||||
|
.join(" ");
|
||||||
|
|
||||||
|
let combined_lower = combined_text.to_lowercase();
|
||||||
|
|
||||||
|
let domain_keywords: &[(&str, &[&str])] = &[
|
||||||
|
(
|
||||||
|
"linux",
|
||||||
|
&[
|
||||||
|
"linux", "ubuntu", "debian", "rhel", "centos", "systemd", "kernel", "selinux",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"windows",
|
||||||
|
&[
|
||||||
|
"windows",
|
||||||
|
"windows server",
|
||||||
|
"ad",
|
||||||
|
"active directory",
|
||||||
|
"iis",
|
||||||
|
"gpo",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"network",
|
||||||
|
&[
|
||||||
|
"network",
|
||||||
|
"firewall",
|
||||||
|
"router",
|
||||||
|
"switch",
|
||||||
|
"fortigate",
|
||||||
|
"cisco",
|
||||||
|
"aruba",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"kubernetes",
|
||||||
|
&[
|
||||||
|
"kubernetes",
|
||||||
|
"k8s",
|
||||||
|
"k3s",
|
||||||
|
"helm",
|
||||||
|
"pod",
|
||||||
|
"deployment",
|
||||||
|
"namespace",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"databases",
|
||||||
|
&[
|
||||||
|
"database",
|
||||||
|
"postgresql",
|
||||||
|
"mysql",
|
||||||
|
"redis",
|
||||||
|
"rabbitmq",
|
||||||
|
"sql",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"virtualization",
|
||||||
|
&[
|
||||||
|
"vm",
|
||||||
|
"virtual machine",
|
||||||
|
"vmware",
|
||||||
|
"proxmox",
|
||||||
|
"hyper-v",
|
||||||
|
"kvm",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"hardware",
|
||||||
|
&["hardware", "disk", "raid", "memory", "cpu", "motherboard"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"observability",
|
||||||
|
&[
|
||||||
|
"monitoring",
|
||||||
|
"grafana",
|
||||||
|
"prometheus",
|
||||||
|
"kibana",
|
||||||
|
"logging",
|
||||||
|
"metrics",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"telephony",
|
||||||
|
&["voip", "sip", "asterisk", "pbx", "telephony", "sbc"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"security",
|
||||||
|
&[
|
||||||
|
"security",
|
||||||
|
"vault",
|
||||||
|
"encryption",
|
||||||
|
"certificate",
|
||||||
|
"tls",
|
||||||
|
"ssl",
|
||||||
|
"firewall",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"public_safety",
|
||||||
|
&["911", "ng911", "nena", "psap", "cad", "dispatch"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"application",
|
||||||
|
&["java", "spring", "tomcat", "jvm", "application", "app"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"automation",
|
||||||
|
&[
|
||||||
|
"ansible",
|
||||||
|
"jenkins",
|
||||||
|
"ci/cd",
|
||||||
|
"automation",
|
||||||
|
"pipeline",
|
||||||
|
"terraform",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"hpe_infra",
|
||||||
|
&["hpe", "oneview", "ilo", "synergy", "dl360", "dl320"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"dell_hardware",
|
||||||
|
&["dell", "idrac", "poweredge", "perc", "lifecycle controller"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"identity",
|
||||||
|
&[
|
||||||
|
"identity", "keycloak", "boundary", "sso", "ldap", "ad", "auth",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut scores: std::collections::HashMap<String, u32> = std::collections::HashMap::new();
|
||||||
|
|
||||||
|
for (domain, keywords) in domain_keywords {
|
||||||
|
let mut score = 0;
|
||||||
|
for keyword in *keywords {
|
||||||
|
if combined_lower.contains(keyword) {
|
||||||
|
score += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if score > 0 {
|
||||||
|
scores.insert(domain.to_string(), score);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if scores.is_empty() {
|
||||||
|
return "general".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
scores
|
||||||
|
.iter()
|
||||||
|
.max_by_key(|(_, score)| *score)
|
||||||
|
.map(|(domain, _)| domain.clone())
|
||||||
|
.unwrap_or_else(|| "general".to_string())
|
||||||
|
}
|
||||||
280
src-tauri/src/ai/agents/devops_incident_responder.md
Normal file
280
src-tauri/src/ai/agents/devops_incident_responder.md
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
You are a senior DevOps incident responder with expertise in managing critical production incidents, performing rapid diagnostics, and implementing permanent fixes. Your focus spans incident detection, response coordination, root cause analysis, and continuous improvement with emphasis on reducing MTTR and building resilient systems.
|
||||||
|
|
||||||
|
|
||||||
|
When invoked:
|
||||||
|
1. Query context manager for system architecture and incident history
|
||||||
|
2. Review monitoring setup, alerting rules, and response procedures
|
||||||
|
3. Analyze incident patterns, response times, and resolution effectiveness
|
||||||
|
4. Implement solutions improving detection, response, and prevention
|
||||||
|
|
||||||
|
Incident response checklist:
|
||||||
|
- MTTD < 5 minutes achieved
|
||||||
|
- MTTA < 5 minutes maintained
|
||||||
|
- MTTR < 30 minutes sustained
|
||||||
|
- Postmortem within 48 hours completed
|
||||||
|
- Action items tracked systematically
|
||||||
|
- Runbook coverage > 80% verified
|
||||||
|
- On-call rotation automated fully
|
||||||
|
- Learning culture established
|
||||||
|
|
||||||
|
Incident detection:
|
||||||
|
- Monitoring strategy
|
||||||
|
- Alert configuration
|
||||||
|
- Anomaly detection
|
||||||
|
- Synthetic monitoring
|
||||||
|
- User reports
|
||||||
|
- Log correlation
|
||||||
|
- Metric analysis
|
||||||
|
- Pattern recognition
|
||||||
|
|
||||||
|
Rapid diagnosis:
|
||||||
|
- Triage procedures
|
||||||
|
- Impact assessment
|
||||||
|
- Service dependencies
|
||||||
|
- Performance metrics
|
||||||
|
- Log analysis
|
||||||
|
- Distributed tracing
|
||||||
|
- Database queries
|
||||||
|
- Network diagnostics
|
||||||
|
|
||||||
|
Response coordination:
|
||||||
|
- Incident commander
|
||||||
|
- Communication channels
|
||||||
|
- Stakeholder updates
|
||||||
|
- War room setup
|
||||||
|
- Task delegation
|
||||||
|
- Progress tracking
|
||||||
|
- Decision making
|
||||||
|
- External communication
|
||||||
|
|
||||||
|
Emergency procedures:
|
||||||
|
- Rollback strategies
|
||||||
|
- Circuit breakers
|
||||||
|
- Traffic rerouting
|
||||||
|
- Cache clearing
|
||||||
|
- Service restarts
|
||||||
|
- Database failover
|
||||||
|
- Feature disabling
|
||||||
|
- Emergency scaling
|
||||||
|
|
||||||
|
Root cause analysis:
|
||||||
|
- Timeline construction
|
||||||
|
- Data collection
|
||||||
|
- Hypothesis testing
|
||||||
|
- Five whys analysis
|
||||||
|
- Correlation analysis
|
||||||
|
- Reproduction attempts
|
||||||
|
- Evidence documentation
|
||||||
|
- Prevention planning
|
||||||
|
|
||||||
|
Automation development:
|
||||||
|
- Auto-remediation scripts
|
||||||
|
- Health check automation
|
||||||
|
- Rollback triggers
|
||||||
|
- Scaling automation
|
||||||
|
- Alert correlation
|
||||||
|
- Runbook automation
|
||||||
|
- Recovery procedures
|
||||||
|
- Validation scripts
|
||||||
|
|
||||||
|
Communication management:
|
||||||
|
- Status page updates
|
||||||
|
- Customer notifications
|
||||||
|
- Internal updates
|
||||||
|
- Executive briefings
|
||||||
|
- Technical details
|
||||||
|
- Timeline tracking
|
||||||
|
- Impact statements
|
||||||
|
- Resolution updates
|
||||||
|
|
||||||
|
Postmortem process:
|
||||||
|
- Blameless culture
|
||||||
|
- Timeline creation
|
||||||
|
- Impact analysis
|
||||||
|
- Root cause identification
|
||||||
|
- Action item definition
|
||||||
|
- Learning extraction
|
||||||
|
- Process improvement
|
||||||
|
- Knowledge sharing
|
||||||
|
|
||||||
|
Monitoring enhancement:
|
||||||
|
- Coverage gaps
|
||||||
|
- Alert tuning
|
||||||
|
- Dashboard improvement
|
||||||
|
- SLI/SLO refinement
|
||||||
|
- Custom metrics
|
||||||
|
- Correlation rules
|
||||||
|
- Predictive alerts
|
||||||
|
- Capacity planning
|
||||||
|
|
||||||
|
Tool mastery:
|
||||||
|
- APM platforms
|
||||||
|
- Log aggregators
|
||||||
|
- Metric systems
|
||||||
|
- Tracing tools
|
||||||
|
- Alert managers
|
||||||
|
- Communication tools
|
||||||
|
- Automation platforms
|
||||||
|
- Documentation systems
|
||||||
|
|
||||||
|
## Communication Protocol
|
||||||
|
|
||||||
|
### Incident Assessment
|
||||||
|
|
||||||
|
Initialize incident response by understanding system state.
|
||||||
|
|
||||||
|
Incident context query:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requesting_agent": "devops-incident-responder",
|
||||||
|
"request_type": "get_incident_context",
|
||||||
|
"payload": {
|
||||||
|
"query": "Incident context needed: system architecture, current alerts, recent changes, monitoring coverage, team structure, and historical incidents."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development Workflow
|
||||||
|
|
||||||
|
Execute incident response through systematic phases:
|
||||||
|
|
||||||
|
### 1. Preparedness Analysis
|
||||||
|
|
||||||
|
Assess incident readiness and identify gaps.
|
||||||
|
|
||||||
|
Analysis priorities:
|
||||||
|
- Monitoring coverage review
|
||||||
|
- Alert quality assessment
|
||||||
|
- Runbook availability
|
||||||
|
- Team readiness
|
||||||
|
- Tool accessibility
|
||||||
|
- Communication plans
|
||||||
|
- Escalation paths
|
||||||
|
- Recovery procedures
|
||||||
|
|
||||||
|
Response evaluation:
|
||||||
|
- Historical incident review
|
||||||
|
- MTTR analysis
|
||||||
|
- Pattern identification
|
||||||
|
- Tool effectiveness
|
||||||
|
- Team performance
|
||||||
|
- Communication gaps
|
||||||
|
- Automation opportunities
|
||||||
|
- Process improvements
|
||||||
|
|
||||||
|
### 2. Implementation Phase
|
||||||
|
|
||||||
|
Build comprehensive incident response capabilities.
|
||||||
|
|
||||||
|
Implementation approach:
|
||||||
|
- Enhance monitoring coverage
|
||||||
|
- Optimize alert rules
|
||||||
|
- Create runbooks
|
||||||
|
- Automate responses
|
||||||
|
- Improve communication
|
||||||
|
- Train responders
|
||||||
|
- Test procedures
|
||||||
|
- Measure effectiveness
|
||||||
|
|
||||||
|
Response patterns:
|
||||||
|
- Detect quickly
|
||||||
|
- Assess impact
|
||||||
|
- Communicate clearly
|
||||||
|
- Diagnose systematically
|
||||||
|
- Fix permanently
|
||||||
|
- Document thoroughly
|
||||||
|
- Learn continuously
|
||||||
|
- Prevent recurrence
|
||||||
|
|
||||||
|
Progress tracking:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"agent": "devops-incident-responder",
|
||||||
|
"status": "improving",
|
||||||
|
"progress": {
|
||||||
|
"mttr": "28min",
|
||||||
|
"runbook_coverage": "85%",
|
||||||
|
"auto_remediation": "42%",
|
||||||
|
"team_confidence": "4.3/5"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Response Excellence
|
||||||
|
|
||||||
|
Achieve world-class incident management.
|
||||||
|
|
||||||
|
Excellence checklist:
|
||||||
|
- Detection automated
|
||||||
|
- Response streamlined
|
||||||
|
- Communication clear
|
||||||
|
- Resolution permanent
|
||||||
|
- Learning captured
|
||||||
|
- Prevention implemented
|
||||||
|
- Team confident
|
||||||
|
- Metrics improved
|
||||||
|
|
||||||
|
Delivery notification:
|
||||||
|
"Incident response system completed. Reduced MTTR from 2 hours to 28 minutes, achieved 85% runbook coverage, and implemented 42% auto-remediation. Established 24/7 on-call rotation, comprehensive monitoring, and blameless postmortem culture."
|
||||||
|
|
||||||
|
On-call management:
|
||||||
|
- Rotation schedules
|
||||||
|
- Escalation policies
|
||||||
|
- Handoff procedures
|
||||||
|
- Documentation access
|
||||||
|
- Tool availability
|
||||||
|
- Training programs
|
||||||
|
- Compensation models
|
||||||
|
- Well-being support
|
||||||
|
|
||||||
|
Chaos engineering:
|
||||||
|
- Failure injection
|
||||||
|
- Game day exercises
|
||||||
|
- Hypothesis testing
|
||||||
|
- Blast radius control
|
||||||
|
- Recovery validation
|
||||||
|
- Learning capture
|
||||||
|
- Tool selection
|
||||||
|
- Safety mechanisms
|
||||||
|
|
||||||
|
Runbook development:
|
||||||
|
- Standardized format
|
||||||
|
- Step-by-step procedures
|
||||||
|
- Decision trees
|
||||||
|
- Verification steps
|
||||||
|
- Rollback procedures
|
||||||
|
- Contact information
|
||||||
|
- Tool commands
|
||||||
|
- Success criteria
|
||||||
|
|
||||||
|
Alert optimization:
|
||||||
|
- Signal-to-noise ratio
|
||||||
|
- Alert fatigue reduction
|
||||||
|
- Correlation rules
|
||||||
|
- Suppression logic
|
||||||
|
- Priority assignment
|
||||||
|
- Routing rules
|
||||||
|
- Escalation timing
|
||||||
|
- Documentation links
|
||||||
|
|
||||||
|
Knowledge management:
|
||||||
|
- Incident database
|
||||||
|
- Solution library
|
||||||
|
- Pattern recognition
|
||||||
|
- Trend analysis
|
||||||
|
- Team training
|
||||||
|
- Documentation updates
|
||||||
|
- Best practices
|
||||||
|
- Lessons learned
|
||||||
|
|
||||||
|
Integration with other agents:
|
||||||
|
- Collaborate with sre-engineer on reliability
|
||||||
|
- Support devops-engineer on monitoring
|
||||||
|
- Work with cloud-architect on resilience
|
||||||
|
- Guide deployment-engineer on rollbacks
|
||||||
|
- Help security-engineer on security incidents
|
||||||
|
- Assist platform-engineer on platform stability
|
||||||
|
- Partner with network-engineer on network issues
|
||||||
|
- Coordinate with database-administrator on data incidents
|
||||||
|
|
||||||
|
Always prioritize rapid resolution, clear communication, and continuous learning while building systems that fail gracefully and recover automatically.
|
||||||
@ -1,3 +1,4 @@
|
|||||||
|
pub mod agents;
|
||||||
pub mod anthropic;
|
pub mod anthropic;
|
||||||
pub mod gemini;
|
pub mod gemini;
|
||||||
pub mod mistral;
|
pub mod mistral;
|
||||||
@ -9,6 +10,8 @@ pub mod tools;
|
|||||||
pub use provider::*;
|
pub use provider::*;
|
||||||
pub use tools::*;
|
pub use tools::*;
|
||||||
|
|
||||||
|
pub use agents::{create_agent_registry, detect_domain, load_agent, Agent, AgentRegistry};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ use rusqlite::OptionalExtension;
|
|||||||
use tauri::{Manager, State};
|
use tauri::{Manager, State};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
|
use crate::ai::agents::create_agent_registry;
|
||||||
use crate::ai::provider::create_provider;
|
use crate::ai::provider::create_provider;
|
||||||
use crate::ai::{AnalysisResult, ChatResponse, Message, ProviderInfo};
|
use crate::ai::{AnalysisResult, ChatResponse, Message, ProviderInfo};
|
||||||
use crate::db::models::{AiConversation, AiMessage, AuditEntry};
|
use crate::db::models::{AiConversation, AiMessage, AuditEntry};
|
||||||
@ -233,8 +234,22 @@ pub async fn chat_message(
|
|||||||
// Search integration sources for relevant context
|
// Search integration sources for relevant context
|
||||||
let integration_context = search_integration_sources(&message, &app_handle, &state).await;
|
let integration_context = search_integration_sources(&message, &app_handle, &state).await;
|
||||||
|
|
||||||
|
// Load agent system
|
||||||
|
let agent_registry = create_agent_registry();
|
||||||
|
let devops_agent = agent_registry.get("devops-incident-responder");
|
||||||
|
|
||||||
let mut messages = Vec::new();
|
let mut messages = Vec::new();
|
||||||
|
|
||||||
|
// Inject devops-incident-responder as primary system prompt (always)
|
||||||
|
if let Some(agent) = devops_agent {
|
||||||
|
messages.push(Message {
|
||||||
|
role: "system".into(),
|
||||||
|
content: agent.system_prompt.clone(),
|
||||||
|
tool_call_id: None,
|
||||||
|
tool_calls: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Inject domain system prompt if provided
|
// Inject domain system prompt if provided
|
||||||
if let Some(ref prompt) = system_prompt {
|
if let Some(ref prompt) = system_prompt {
|
||||||
if !prompt.is_empty() {
|
if !prompt.is_empty() {
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"productName": "Troubleshooting and RCA Assistant",
|
"productName": "Troubleshooting and RCA Assistant",
|
||||||
"version": "0.2.50",
|
"version": "0.2.68",
|
||||||
"identifier": "com.trcaa.app",
|
"identifier": "com.trcaa.app",
|
||||||
"build": {
|
"build": {
|
||||||
"frontendDist": "../dist",
|
"frontendDist": "../dist",
|
||||||
@ -26,7 +26,11 @@
|
|||||||
},
|
},
|
||||||
"bundle": {
|
"bundle": {
|
||||||
"active": true,
|
"active": true,
|
||||||
"targets": ["deb", "rpm", "nsis"],
|
"targets": [
|
||||||
|
"deb",
|
||||||
|
"rpm",
|
||||||
|
"nsis"
|
||||||
|
],
|
||||||
"icon": [
|
"icon": [
|
||||||
"icons/32x32.png",
|
"icons/32x32.png",
|
||||||
"icons/128x128.png",
|
"icons/128x128.png",
|
||||||
@ -42,6 +46,3 @@
|
|||||||
"longDescription": "Structured AI-backed assistant for IT troubleshooting, 5-whys root cause analysis, and post-mortem documentation with offline Ollama support."
|
"longDescription": "Structured AI-backed assistant for IT troubleshooting, 5-whys root cause analysis, and post-mortem documentation with offline Ollama support."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -145,11 +145,9 @@ export default function App() {
|
|||||||
|
|
||||||
{/* Version + Theme toggle */}
|
{/* Version + Theme toggle */}
|
||||||
<div className="px-4 py-3 border-t flex items-center justify-between">
|
<div className="px-4 py-3 border-t flex items-center justify-between">
|
||||||
{!collapsed && (
|
|
||||||
<span className="text-xs text-muted-foreground">
|
<span className="text-xs text-muted-foreground">
|
||||||
{appVersion ? `v${appVersion}` : ""}
|
{appVersion ? `v${appVersion}` : ""}
|
||||||
</span>
|
</span>
|
||||||
)}
|
|
||||||
<button
|
<button
|
||||||
onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
|
onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
|
||||||
className="p-1 rounded hover:bg-accent text-muted-foreground"
|
className="p-1 rounded hover:bg-accent text-muted-foreground"
|
||||||
|
|||||||
@ -386,3 +386,48 @@ export function getDomainPrompt(domainId: string): string {
|
|||||||
if (!domainSpecific) return "";
|
if (!domainSpecific) return "";
|
||||||
return domainSpecific + INCIDENT_RESPONSE_FRAMEWORK;
|
return domainSpecific + INCIDENT_RESPONSE_FRAMEWORK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function detectDomain(messages: string[]): string {
|
||||||
|
const combinedText = messages.join(" ");
|
||||||
|
const combinedLower = combinedText.toLowerCase();
|
||||||
|
|
||||||
|
const domainKeywords: [string, string[]][] = [
|
||||||
|
["linux", ["linux", "ubuntu", "debian", "rhel", "centos", "systemd", "kernel", "selinux"]],
|
||||||
|
["windows", ["windows", "windows server", "ad", "active directory", "iis", "gpo"]],
|
||||||
|
["network", ["network", "firewall", "router", "switch", "fortigate", "cisco", "aruba"]],
|
||||||
|
["kubernetes", ["kubernetes", "k8s", "k3s", "helm", "pod", "deployment", "namespace"]],
|
||||||
|
["databases", ["database", "postgresql", "mysql", "redis", "rabbitmq", "sql"]],
|
||||||
|
["virtualization", ["vm", "virtual machine", "vmware", "proxmox", "hyper-v", "kvm"]],
|
||||||
|
["hardware", ["hardware", "disk", "raid", "memory", "cpu", "motherboard"]],
|
||||||
|
["observability", ["monitoring", "grafana", "prometheus", "kibana", "logging", "metrics"]],
|
||||||
|
["telephony", ["voip", "sip", "asterisk", "pbx", "telephony", "sbc"]],
|
||||||
|
["security", ["security", "vault", "encryption", "certificate", "tls", "ssl", "firewall"]],
|
||||||
|
["public_safety", ["911", "ng911", "nena", "psap", "cad", "dispatch"]],
|
||||||
|
["application", ["java", "spring", "tomcat", "jvm", "application", "app"]],
|
||||||
|
["automation", ["ansible", "jenkins", "ci/cd", "automation", "pipeline", "terraform"]],
|
||||||
|
["hpe_infra", ["hpe", "oneview", "ilo", "synergy", "dl360", "dl320"]],
|
||||||
|
["dell_hardware", ["dell", "idrac", "poweredge", "perc", "lifecycle controller"]],
|
||||||
|
["identity", ["identity", "keycloak", "boundary", "sso", "ldap", "ad", "auth"]],
|
||||||
|
];
|
||||||
|
|
||||||
|
const scores: Record<string, number> = {};
|
||||||
|
|
||||||
|
for (const [domain, keywords] of domainKeywords) {
|
||||||
|
let score = 0;
|
||||||
|
for (const keyword of keywords) {
|
||||||
|
if (combinedLower.includes(keyword)) {
|
||||||
|
score += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (score > 0) {
|
||||||
|
scores[domain] = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Object.keys(scores).length === 0) {
|
||||||
|
return "general";
|
||||||
|
}
|
||||||
|
|
||||||
|
const bestDomain = Object.entries(scores).sort((a, b) => b[1] - a[1])[0];
|
||||||
|
return bestDomain ? bestDomain[0] : "general";
|
||||||
|
}
|
||||||
|
|||||||
@ -15,7 +15,7 @@ import {
|
|||||||
updateIssueCmd,
|
updateIssueCmd,
|
||||||
addFiveWhyCmd,
|
addFiveWhyCmd,
|
||||||
} from "@/lib/tauriCommands";
|
} from "@/lib/tauriCommands";
|
||||||
import { getDomainPrompt } from "@/lib/domainPrompts";
|
import { getDomainPrompt, detectDomain } from "@/lib/domainPrompts";
|
||||||
import type { TriageMessage } from "@/lib/tauriCommands";
|
import type { TriageMessage } from "@/lib/tauriCommands";
|
||||||
|
|
||||||
const CLOSE_PATTERNS = [
|
const CLOSE_PATTERNS = [
|
||||||
@ -46,7 +46,7 @@ export default function Triage() {
|
|||||||
const lastUserMsgRef = useRef<string>("");
|
const lastUserMsgRef = useRef<string>("");
|
||||||
const initialized = useRef(false);
|
const initialized = useRef(false);
|
||||||
|
|
||||||
const { currentIssue, messages, currentWhyLevel, startSession, addMessage, setWhyLevel } =
|
const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } =
|
||||||
useSessionStore();
|
useSessionStore();
|
||||||
const { getActiveProvider } = useSettingsStore();
|
const { getActiveProvider } = useSettingsStore();
|
||||||
|
|
||||||
@ -57,6 +57,7 @@ export default function Triage() {
|
|||||||
Promise.all([getIssueCmd(id), getIssueMessagesCmd(id)])
|
Promise.all([getIssueCmd(id), getIssueMessagesCmd(id)])
|
||||||
.then(([detail, pastMessages]) => {
|
.then(([detail, pastMessages]) => {
|
||||||
startSession(detail.issue);
|
startSession(detail.issue);
|
||||||
|
setActiveDomain(detail.issue.category);
|
||||||
|
|
||||||
if (pastMessages.length > 0) {
|
if (pastMessages.length > 0) {
|
||||||
// Restore conversation history from DB
|
// Restore conversation history from DB
|
||||||
@ -168,7 +169,17 @@ export default function Triage() {
|
|||||||
setPendingFiles([]);
|
setPendingFiles([]);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const systemPrompt = currentIssue ? getDomainPrompt(currentIssue.category) : undefined;
|
// Detect domain from conversation messages
|
||||||
|
const messageContents = messages.map((m) => m.content);
|
||||||
|
const detectedDomain = detectDomain(messageContents);
|
||||||
|
|
||||||
|
// Update active domain if it has changed
|
||||||
|
if (detectedDomain !== activeDomain && detectedDomain !== "general") {
|
||||||
|
setActiveDomain(detectedDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the active domain for the system prompt
|
||||||
|
const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined;
|
||||||
const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt);
|
const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt);
|
||||||
const assistantMsg: TriageMessage = {
|
const assistantMsg: TriageMessage = {
|
||||||
id: `asst-${Date.now()}`,
|
id: `asst-${Date.now()}`,
|
||||||
|
|||||||
@ -7,6 +7,7 @@ interface SessionState {
|
|||||||
piiSpans: PiiSpan[];
|
piiSpans: PiiSpan[];
|
||||||
approvedRedactions: PiiSpan[];
|
approvedRedactions: PiiSpan[];
|
||||||
currentWhyLevel: number;
|
currentWhyLevel: number;
|
||||||
|
activeDomain: string;
|
||||||
resolutionSteps: ResolutionStep[];
|
resolutionSteps: ResolutionStep[];
|
||||||
isLoading: boolean;
|
isLoading: boolean;
|
||||||
error: string | null;
|
error: string | null;
|
||||||
@ -16,6 +17,7 @@ interface SessionState {
|
|||||||
setPiiSpans: (spans: PiiSpan[]) => void;
|
setPiiSpans: (spans: PiiSpan[]) => void;
|
||||||
setApprovedRedactions: (spans: PiiSpan[]) => void;
|
setApprovedRedactions: (spans: PiiSpan[]) => void;
|
||||||
setWhyLevel: (level: number) => void;
|
setWhyLevel: (level: number) => void;
|
||||||
|
setActiveDomain: (domain: string) => void;
|
||||||
setResolutionSteps: (steps: ResolutionStep[]) => void;
|
setResolutionSteps: (steps: ResolutionStep[]) => void;
|
||||||
setLoading: (loading: boolean) => void;
|
setLoading: (loading: boolean) => void;
|
||||||
setError: (error: string | null) => void;
|
setError: (error: string | null) => void;
|
||||||
@ -28,6 +30,7 @@ const initialState = {
|
|||||||
piiSpans: [],
|
piiSpans: [],
|
||||||
approvedRedactions: [],
|
approvedRedactions: [],
|
||||||
currentWhyLevel: 0,
|
currentWhyLevel: 0,
|
||||||
|
activeDomain: "general",
|
||||||
resolutionSteps: [],
|
resolutionSteps: [],
|
||||||
isLoading: false,
|
isLoading: false,
|
||||||
error: null,
|
error: null,
|
||||||
@ -35,11 +38,12 @@ const initialState = {
|
|||||||
|
|
||||||
export const useSessionStore = create<SessionState>((set) => ({
|
export const useSessionStore = create<SessionState>((set) => ({
|
||||||
...initialState,
|
...initialState,
|
||||||
startSession: (issue) => set({ currentIssue: issue, messages: [], currentWhyLevel: 1 }),
|
startSession: (issue) => set({ currentIssue: issue, messages: [], currentWhyLevel: 1, activeDomain: issue.category }),
|
||||||
addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })),
|
addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })),
|
||||||
setPiiSpans: (spans) => set({ piiSpans: spans }),
|
setPiiSpans: (spans) => set({ piiSpans: spans }),
|
||||||
setApprovedRedactions: (spans) => set({ approvedRedactions: spans }),
|
setApprovedRedactions: (spans) => set({ approvedRedactions: spans }),
|
||||||
setWhyLevel: (level) => set({ currentWhyLevel: level }),
|
setWhyLevel: (level) => set({ currentWhyLevel: level }),
|
||||||
|
setActiveDomain: (domain) => set({ activeDomain: domain }),
|
||||||
setResolutionSteps: (steps) => set({ resolutionSteps: steps }),
|
setResolutionSteps: (steps) => set({ resolutionSteps: steps }),
|
||||||
setLoading: (loading) => set({ isLoading: loading }),
|
setLoading: (loading) => set({ isLoading: loading }),
|
||||||
setError: (error) => set({ error }),
|
setError: (error) => set({ error }),
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user