From cd67a09a6a627d9d063cb7a1f5a8899aabf2f6e1 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 13:50:29 -0500 Subject: [PATCH 01/19] fix(ai,search): load history across all conversations; deep search related tables AI history continuity: Changed the history-load query in chat_message to JOIN ai_conversations and select by issue_id instead of single conversation_id. This preserves full context when provider/model changes mid-triage. Deep search: Added DISTINCT to list_issues SELECT and extended the search filter with EXISTS subqueries covering ai_messages, resolution_steps, log_files, and timeline_events. Ensures comprehensive search without duplicate results. Includes 11 new unit tests covering both features. --- src-tauri/src/commands/ai.rs | 200 ++++++++++++++++++++++++++++++++++- src-tauri/src/commands/db.rs | 176 +++++++++++++++++++++++++++++- 2 files changed, 370 insertions(+), 6 deletions(-) diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 142d4f2e..47414d4f 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -204,15 +204,19 @@ pub async fn chat_message( } }; - // Load conversation history (use and_then to keep stmt lifetime within closure) + // Load conversation history across ALL conversations for this issue let history: Vec = { let db = state.db.lock().map_err(|e| e.to_string())?; let raw: Vec<(String, String)> = db .prepare( - "SELECT role, content FROM ai_messages WHERE conversation_id = ?1 ORDER BY created_at ASC", + "SELECT am.role, am.content \ + FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = ?1 \ + ORDER BY am.created_at ASC", ) .and_then(|mut stmt| { - stmt.query_map([&conversation_id], |row| { + stmt.query_map([&issue_id], |row| { Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) }) .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) @@ -1025,4 +1029,194 @@ mod tests { let list = extract_list(text, "KEY_FINDINGS:"); assert_eq!(list, vec!["Actual item"]); } + + #[test] + fn test_history_query_same_conversation() { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO issues (id, title, created_at, updated_at) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["issue-1", "Test", &now, &now], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-1", "issue-1", "openai", "gpt-4o", &now, "Conv 1"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-1", "conv-1", "user", "Hello", 5, "2025-01-01 10:00:00"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-2", "conv-1", "assistant", "Hi there", 8, "2025-01-01 10:00:01"], + ) + .unwrap(); + + let issue_id = "issue-1"; + let raw: Vec<(String, String)> = conn + .prepare( + "SELECT am.role, am.content \ + FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = ?1 \ + ORDER BY am.created_at ASC", + ) + .and_then(|mut stmt| { + stmt.query_map([&issue_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) + }) + .unwrap(); + + assert_eq!(raw.len(), 2); + assert_eq!(raw[0], ("user".to_string(), "Hello".to_string())); + assert_eq!(raw[1], ("assistant".to_string(), "Hi there".to_string())); + } + + #[test] + fn test_history_query_across_conversations() { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO issues (id, title, created_at, updated_at) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["issue-1", "Test", &now, &now], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-1", "issue-1", "openai", "gpt-4o", &now, "Conv 1"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-2", "issue-1", "anthropic", "claude-3", &now, "Conv 2"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-1", "conv-1", "user", "From conv 1", 5, "2025-01-01 10:00:00"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-2", "conv-2", "user", "From conv 2", 5, "2025-01-01 11:00:00"], + ) + .unwrap(); + + let issue_id = "issue-1"; + let raw: Vec<(String, String)> = conn + .prepare( + "SELECT am.role, am.content \ + FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = ?1 \ + ORDER BY am.created_at ASC", + ) + .and_then(|mut stmt| { + stmt.query_map([&issue_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) + }) + .unwrap(); + + assert_eq!(raw.len(), 2); + assert_eq!(raw[0].1, "From conv 1"); + assert_eq!(raw[1].1, "From conv 2"); + } + + #[test] + fn test_history_query_empty_for_new_issue() { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO issues (id, title, created_at, updated_at) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["issue-new", "Empty", &now, &now], + ) + .unwrap(); + + let issue_id = "issue-new"; + let raw: Vec<(String, String)> = conn + .prepare( + "SELECT am.role, am.content \ + FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = ?1 \ + ORDER BY am.created_at ASC", + ) + .and_then(|mut stmt| { + stmt.query_map([&issue_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) + }) + .unwrap(); + + assert!(raw.is_empty()); + } + + #[test] + fn test_history_query_ordered_by_created_at() { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO issues (id, title, created_at, updated_at) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["issue-1", "Test", &now, &now], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-1", "issue-1", "openai", "gpt-4o", &now, "Conv 1"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-2", "issue-1", "anthropic", "claude-3", &now, "Conv 2"], + ) + .unwrap(); + // Insert messages out of order: conv-2 message is earlier than conv-1 message + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-1", "conv-1", "user", "Second", 5, "2025-01-01 12:00:00"], + ) + .unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-2", "conv-2", "user", "First", 5, "2025-01-01 09:00:00"], + ) + .unwrap(); + + let issue_id = "issue-1"; + let raw: Vec<(String, String)> = conn + .prepare( + "SELECT am.role, am.content \ + FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = ?1 \ + ORDER BY am.created_at ASC", + ) + .and_then(|mut stmt| { + stmt.query_map([&issue_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect::>()) + }) + .unwrap(); + + assert_eq!(raw.len(), 2); + assert_eq!(raw[0].1, "First"); + assert_eq!(raw[1].1, "Second"); + } } diff --git a/src-tauri/src/commands/db.rs b/src-tauri/src/commands/db.rs index 73550f94..9746dc32 100644 --- a/src-tauri/src/commands/db.rs +++ b/src-tauri/src/commands/db.rs @@ -347,7 +347,7 @@ pub async fn list_issues( let offset = filter.offset.unwrap_or(0); let mut sql = String::from( - "SELECT i.id, i.title, i.severity, i.status, i.category, i.created_at, i.updated_at, \ + "SELECT DISTINCT i.id, i.title, i.severity, i.status, i.category, i.created_at, i.updated_at, \ (SELECT COUNT(*) FROM log_files lf WHERE lf.issue_id = i.id) as log_count, \ (SELECT COUNT(*) FROM resolution_steps rs WHERE rs.issue_id = i.id) as step_count \ FROM issues i WHERE 1=1", @@ -384,9 +384,19 @@ pub async fn list_issues( } if let Some(ref search) = filter.search { let pattern = format!("%{search}%"); + let idx = params.len() + 1; sql.push_str(&format!( - " AND (i.title LIKE ?{0} OR i.description LIKE ?{0} OR i.category LIKE ?{0})", - params.len() + 1 + " AND (i.title LIKE ?{idx} OR i.description LIKE ?{idx} OR i.category LIKE ?{idx} \ + OR EXISTS (SELECT 1 FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = i.id AND am.content LIKE ?{idx}) \ + OR EXISTS (SELECT 1 FROM resolution_steps rs \ + WHERE rs.issue_id = i.id \ + AND (rs.why_question LIKE ?{idx} OR rs.answer LIKE ?{idx} OR rs.evidence LIKE ?{idx})) \ + OR EXISTS (SELECT 1 FROM log_files lf \ + WHERE lf.issue_id = i.id AND lf.file_name LIKE ?{idx}) \ + OR EXISTS (SELECT 1 FROM timeline_events te \ + WHERE te.issue_id = i.id AND te.description LIKE ?{idx}))", )); params.push(Box::new(pattern)); } @@ -635,3 +645,163 @@ pub async fn get_timeline_events( .collect(); Ok(events) } + +#[cfg(test)] +mod tests { + use rusqlite::Connection; + + fn setup_test_db() -> Connection { + let conn = Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + conn + } + + fn insert_issue(conn: &Connection, id: &str, title: &str, description: &str) { + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO issues (id, title, description, severity, status, category, source, created_at, updated_at, assigned_to, tags) \ + VALUES (?1, ?2, ?3, 'medium', 'open', 'general', 'manual', ?4, ?5, '', '[]')", + rusqlite::params![id, title, description, &now, &now], + ) + .unwrap(); + } + + fn run_search_query(conn: &Connection, search: &str) -> Vec { + let pattern = format!("%{search}%"); + let idx = 1; + let sql = format!( + "SELECT DISTINCT i.id, i.title, i.severity, i.status, i.category, i.created_at, i.updated_at, \ + (SELECT COUNT(*) FROM log_files lf2 WHERE lf2.issue_id = i.id) as log_count, \ + (SELECT COUNT(*) FROM resolution_steps rs2 WHERE rs2.issue_id = i.id) as step_count \ + FROM issues i WHERE 1=1 \ + AND (i.title LIKE ?{idx} OR i.description LIKE ?{idx} OR i.category LIKE ?{idx} \ + OR EXISTS (SELECT 1 FROM ai_messages am \ + JOIN ai_conversations ac ON ac.id = am.conversation_id \ + WHERE ac.issue_id = i.id AND am.content LIKE ?{idx}) \ + OR EXISTS (SELECT 1 FROM resolution_steps rs \ + WHERE rs.issue_id = i.id \ + AND (rs.why_question LIKE ?{idx} OR rs.answer LIKE ?{idx} OR rs.evidence LIKE ?{idx})) \ + OR EXISTS (SELECT 1 FROM log_files lf \ + WHERE lf.issue_id = i.id AND lf.file_name LIKE ?{idx}) \ + OR EXISTS (SELECT 1 FROM timeline_events te \ + WHERE te.issue_id = i.id AND te.description LIKE ?{idx})) \ + ORDER BY i.updated_at DESC" + ); + let mut stmt = conn.prepare(&sql).unwrap(); + stmt.query_map([&pattern], |row| row.get::<_, String>(0)) + .unwrap() + .filter_map(|r| r.ok()) + .collect() + } + + #[test] + fn test_search_finds_by_title() { + let conn = setup_test_db(); + insert_issue(&conn, "issue-1", "Kubernetes OOM crash", "No details"); + insert_issue(&conn, "issue-2", "Network timeout", "No details"); + + let results = run_search_query(&conn, "Kubernetes"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_finds_by_description() { + let conn = setup_test_db(); + insert_issue( + &conn, + "issue-1", + "Generic title", + "The pod was killed due to memory pressure", + ); + insert_issue(&conn, "issue-2", "Other", "All fine"); + + let results = run_search_query(&conn, "memory pressure"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_finds_by_ai_message_content() { + let conn = setup_test_db(); + insert_issue(&conn, "issue-1", "Unrelated title", "Unrelated desc"); + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-1", "issue-1", "openai", "gpt-4o", &now, "Conv"], + ).unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-1", "conv-1", "assistant", "The root cause is a deadlock in PostgreSQL", 10, &now], + ).unwrap(); + + let results = run_search_query(&conn, "deadlock in PostgreSQL"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_finds_by_resolution_step_answer() { + let conn = setup_test_db(); + insert_issue(&conn, "issue-1", "Unrelated title", "Unrelated desc"); + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO resolution_steps (id, issue_id, step_order, why_question, answer, evidence, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + rusqlite::params!["step-1", "issue-1", 1, "Why did it fail?", "Connection pool exhausted", "metrics dashboard", &now], + ).unwrap(); + + let results = run_search_query(&conn, "pool exhausted"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_finds_by_log_file_name() { + let conn = setup_test_db(); + insert_issue(&conn, "issue-1", "Unrelated title", "Unrelated desc"); + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO log_files (id, issue_id, file_name, file_path, file_size, mime_type, content_hash, uploaded_at, redacted) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + rusqlite::params!["lf-1", "issue-1", "nginx-error-2025.log", "/tmp/nginx-error-2025.log", 1024, "text/plain", "abc", &now, 0], + ).unwrap(); + + let results = run_search_query(&conn, "nginx-error"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_finds_by_timeline_description() { + let conn = setup_test_db(); + insert_issue(&conn, "issue-1", "Unrelated title", "Unrelated desc"); + conn.execute( + "INSERT INTO timeline_events (id, issue_id, event_type, description, metadata, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["te-1", "issue-1", "triage_started", "Engineer started investigating DNS resolution failure", "{}", "2025-01-15 10:00:00"], + ).unwrap(); + + let results = run_search_query(&conn, "DNS resolution"); + assert_eq!(results, vec!["issue-1"]); + } + + #[test] + fn test_search_no_duplicates_for_multiple_matches() { + let conn = setup_test_db(); + insert_issue( + &conn, + "issue-1", + "Kubernetes crash", + "Kubernetes pod killed", + ); + let now = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(); + conn.execute( + "INSERT INTO ai_conversations (id, issue_id, provider, model, created_at, title) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["conv-1", "issue-1", "openai", "gpt-4o", &now, "Conv"], + ).unwrap(); + conn.execute( + "INSERT INTO ai_messages (id, conversation_id, role, content, token_count, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params!["msg-1", "conv-1", "assistant", "Kubernetes OOM detected", 5, &now], + ).unwrap(); + + let results = run_search_query(&conn, "Kubernetes"); + assert_eq!(results.len(), 1); + assert_eq!(results[0], "issue-1"); + } +} From f47ec90d05c0dad8d0baceffde4790394b976548 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 13:50:59 -0500 Subject: [PATCH 02/19] feat(upload): add safe file extension validation and binary text extraction - Add extension allowlist (SAFE_TEXT_EXTENSIONS + SAFE_BINARY_EXTENSIONS) rejecting unsupported file types at both upload_log_file and upload_log_file_by_content entry points - Add extract_text_content() with PDF text extraction via lopdf and DOCX extraction via zip+quick-xml - Binary files (PDF/DOCX) get extracted text written to .extracted.txt for downstream PII detection - Expand frontend file input accept list and add collapsible supported-formats disclosure element - Add 11 unit tests covering allowlist logic and extraction paths --- src-tauri/Cargo.toml | 3 + src-tauri/src/commands/analysis.rs | 287 +++++++++++++++++++++++++++-- src/pages/LogUpload/index.tsx | 15 +- 3 files changed, 286 insertions(+), 19 deletions(-) diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 84150c6f..032982db 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -45,6 +45,9 @@ warp = "0.3" urlencoding = "2" infer = "0.15" url = "2.5.8" +lopdf = "0.31" +zip = "0.6" +quick-xml = "0.36" rmcp = { version = "1.7.0", features = [ "client", "transport-child-process", diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 7344e081..135fb12f 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -9,6 +9,139 @@ use crate::state::AppState; const MAX_LOG_FILE_BYTES: u64 = 50 * 1024 * 1024; +const SAFE_TEXT_EXTENSIONS: &[&str] = &[ + "log", + "txt", + "out", + "err", + "syslog", + "journal", + "yaml", + "yml", + "json", + "toml", + "xml", + "ini", + "cfg", + "conf", + "config", + "env", + "properties", + "md", + "markdown", + "rst", + "csv", + "tsv", + "ndjson", + "jsonl", + "sql", + "sh", + "bash", + "zsh", + "py", + "js", + "ts", + "rb", + "go", + "rs", + "java", + "html", + "htm", + "css", + "diff", + "patch", + "rtf", +]; + +const SAFE_BINARY_EXTENSIONS: &[&str] = &["pdf", "docx", "doc", "xlsx", "xls"]; + +pub fn is_safe_file(path: &Path) -> bool { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_lowercase()); + match ext.as_deref() { + Some(e) => SAFE_TEXT_EXTENSIONS.contains(&e) || SAFE_BINARY_EXTENSIONS.contains(&e), + None => false, + } +} + +pub fn extract_text_content(path: &Path) -> Result { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_lowercase()) + .unwrap_or_default(); + + match ext.as_str() { + "pdf" => extract_pdf_text(path), + "docx" | "doc" => extract_docx_text(path), + "xlsx" | "xls" => Err(format!( + "Spreadsheet format .{ext} is not yet supported for text extraction. \ + Export the sheet as CSV and upload that instead." + )), + _ => std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {e}")), + } +} + +fn extract_pdf_text(path: &Path) -> Result { + let doc = lopdf::Document::load(path).map_err(|e| format!("Failed to parse PDF: {e}"))?; + let mut text = String::new(); + let mut pages: Vec = doc.get_pages().keys().copied().collect(); + pages.sort_unstable(); + for page_num in pages { + if let Ok(content) = doc.extract_text(&[page_num]) { + text.push_str(&content); + text.push('\n'); + } + } + if text.trim().is_empty() { + return Err("PDF contains no extractable text (may be a scanned image)".to_string()); + } + Ok(text) +} + +fn extract_docx_text(path: &Path) -> Result { + use std::io::Read as _; + let file = std::fs::File::open(path).map_err(|e| format!("Failed to open file: {e}"))?; + let mut archive = + zip::ZipArchive::new(file).map_err(|e| format!("Failed to open as ZIP/DOCX: {e}"))?; + let mut xml_content = String::new(); + { + let mut doc_xml = archive + .by_name("word/document.xml") + .map_err(|_| "Not a valid DOCX: missing word/document.xml".to_string())?; + doc_xml + .read_to_string(&mut xml_content) + .map_err(|e| format!("Failed to read document.xml: {e}"))?; + } + let mut text = String::new(); + let mut reader = quick_xml::Reader::from_str(&xml_content); + reader.config_mut().trim_text(true); + let mut buf = Vec::new(); + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Text(e)) => { + if let Ok(s) = e.unescape() { + let trimmed = s.trim().to_string(); + if !trimmed.is_empty() { + text.push_str(&trimmed); + text.push(' '); + } + } + } + Ok(quick_xml::events::Event::Eof) => break, + Err(e) => return Err(format!("XML parse error: {e}")), + _ => {} + } + buf.clear(); + } + if text.trim().is_empty() { + return Err("DOCX contains no extractable text".to_string()); + } + Ok(text) +} + fn validate_log_file_path(file_path: &str) -> Result { let path = Path::new(file_path); let canonical = std::fs::canonicalize(path).map_err(|_| "Unable to access selected file")?; @@ -35,24 +168,59 @@ pub async fn upload_log_file( state: State<'_, AppState>, ) -> Result { let canonical_path = validate_log_file_path(&file_path)?; - let content = std::fs::read(&canonical_path).map_err(|_| "Failed to read selected log file")?; - let content_hash = format!("{:x}", Sha256::digest(&content)); + + if !is_safe_file(&canonical_path) { + let ext = canonical_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("(none)"); + return Err(format!( + "File type '.{ext}' is not supported. Supported formats include .log, .txt, .json, .pdf, .docx, .md, and many more." + )); + } + let file_name = canonical_path .file_name() .and_then(|n| n.to_str()) .unwrap_or("unknown") .to_string(); - let file_size = content.len() as i64; - let mime_type = if file_name.ends_with(".json") { - "application/json" - } else if file_name.ends_with(".xml") { - "application/xml" - } else { - "text/plain" + + let file_ext = canonical_path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_lowercase()) + .unwrap_or_default(); + + let extracted_text = extract_text_content(&canonical_path) + .map_err(|e| format!("Failed to read file content: {e}"))?; + let content_bytes = extracted_text.as_bytes(); + let content_hash = format!("{:x}", Sha256::digest(content_bytes)); + let file_size = content_bytes.len() as i64; + + let mime_type = match file_ext.as_str() { + "json" => "application/json", + "xml" => "application/xml", + "yaml" | "yml" => "application/yaml", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "doc" => "application/msword", + "md" | "markdown" => "text/markdown", + "csv" | "tsv" => "text/csv", + "html" | "htm" => "text/html", + _ => "text/plain", }; - let canonical_file_path = canonical_path.to_string_lossy().to_string(); - let log_file = LogFile::new(issue_id.clone(), file_name, canonical_file_path, file_size); + let is_binary = SAFE_BINARY_EXTENSIONS.contains(&file_ext.as_str()); + let stored_path = if is_binary { + let extracted_path = canonical_path.with_extension("extracted.txt"); + std::fs::write(&extracted_path, &extracted_text) + .map_err(|e| format!("Failed to write extracted text: {e}"))?; + extracted_path.to_string_lossy().to_string() + } else { + canonical_path.to_string_lossy().to_string() + }; + + let log_file = LogFile::new(issue_id.clone(), file_name, stored_path, file_size); let log_file = LogFile { content_hash: content_hash.clone(), mime_type: mime_type.to_string(), @@ -104,17 +272,36 @@ pub async fn upload_log_file_by_content( content: String, state: State<'_, AppState>, ) -> Result { + let fake_path = Path::new(&file_name); + if !is_safe_file(fake_path) { + let ext = fake_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("(none)"); + return Err(format!("File type '.{ext}' is not supported.")); + } + let content_bytes = content.as_bytes(); let content_hash = format!("{:x}", Sha256::digest(content_bytes)); let file_size = content_bytes.len() as i64; - // Determine mime type based on file extension - let mime_type = if file_name.ends_with(".json") { - "application/json" - } else if file_name.ends_with(".xml") { - "application/xml" - } else { - "text/plain" + let file_ext = fake_path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_lowercase()) + .unwrap_or_default(); + + let mime_type = match file_ext.as_str() { + "json" => "application/json", + "xml" => "application/xml", + "yaml" | "yml" => "application/yaml", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "doc" => "application/msword", + "md" | "markdown" => "text/markdown", + "csv" | "tsv" => "text/csv", + "html" | "htm" => "text/html", + _ => "text/plain", }; // Use the file_name as the file_path for DB storage @@ -328,4 +515,68 @@ mod tests { assert!(result.is_ok()); let _ = std::fs::remove_file(file_path); } + + #[test] + fn test_is_safe_file_allows_txt() { + assert!(is_safe_file(Path::new("file.txt"))); + } + + #[test] + fn test_is_safe_file_allows_md() { + assert!(is_safe_file(Path::new("readme.md"))); + } + + #[test] + fn test_is_safe_file_allows_pdf() { + assert!(is_safe_file(Path::new("report.pdf"))); + } + + #[test] + fn test_is_safe_file_allows_docx() { + assert!(is_safe_file(Path::new("doc.docx"))); + } + + #[test] + fn test_is_safe_file_rejects_exe() { + assert!(!is_safe_file(Path::new("malware.exe"))); + } + + #[test] + fn test_is_safe_file_rejects_dll() { + assert!(!is_safe_file(Path::new("library.dll"))); + } + + #[test] + fn test_is_safe_file_rejects_zip_directly() { + assert!(!is_safe_file(Path::new("archive.zip"))); + } + + #[test] + fn test_is_safe_file_case_insensitive() { + assert!(is_safe_file(Path::new("file.TXT"))); + assert!(is_safe_file(Path::new("file.Log"))); + } + + #[test] + fn test_is_safe_file_no_extension_rejected() { + assert!(!is_safe_file(Path::new("Makefile"))); + } + + #[test] + fn test_extract_text_plain_file() { + let dir = std::env::temp_dir(); + let path = dir.join(format!("tftsr-test-extract-{}.txt", uuid::Uuid::now_v7())); + std::fs::write(&path, "hello world").unwrap(); + let result = extract_text_content(&path); + assert!(result.is_ok()); + assert_eq!(result.unwrap().trim(), "hello world"); + let _ = std::fs::remove_file(path); + } + + #[test] + fn test_extract_text_unsupported_binary_returns_error() { + let result = extract_text_content(Path::new("data.xlsx")); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("not yet supported")); + } } diff --git a/src/pages/LogUpload/index.tsx b/src/pages/LogUpload/index.tsx index 140a45ab..57a69c6c 100644 --- a/src/pages/LogUpload/index.tsx +++ b/src/pages/LogUpload/index.tsx @@ -252,8 +252,21 @@ export default function LogUpload() { multiple className="hidden" onChange={handleFileSelect} - accept=".log,.txt,.json,.csv,.xml,.yaml,.yml" + accept=".log,.txt,.out,.err,.syslog,.journal,.yaml,.yml,.json,.toml,.xml,.ini,.cfg,.conf,.config,.env,.properties,.md,.markdown,.rst,.csv,.tsv,.ndjson,.jsonl,.sql,.sh,.bash,.zsh,.py,.js,.ts,.rb,.go,.rs,.java,.html,.htm,.css,.diff,.patch,.pdf,.docx,.doc,.rtf,.xlsx,.xls" /> +
+ + Supported formats + +
+
Logs & text: .log, .txt, .out, .err, .syslog, .journal
+
Config & markup: .yaml, .yml, .json, .toml, .xml, .ini, .cfg, .conf, .env, .properties
+
Documents: .pdf, .docx, .doc, .md, .rst, .rtf
+
Data: .csv, .tsv, .xlsx, .xls, .ndjson, .jsonl, .sql
+
Code & scripts: .sh, .bash, .zsh, .py, .js, .ts, .rb, .go, .rs, .java, .html, .css, .diff, .patch
+

Binary formats (PDF, DOCX, XLSX) will have their text extracted automatically.

+
+
{/* File list */} From ed2e25f835b76670f46d6b17a1f56a46341cb715 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 13:51:08 -0500 Subject: [PATCH 03/19] chore: update Cargo.lock for lopdf, zip, quick-xml deps --- src-tauri/Cargo.lock | 163 +++++++++++++++++++++++++++++- src-tauri/src/commands/agentic.rs | 123 ++++++++++++++++++++++ src-tauri/src/commands/mod.rs | 1 + src-tauri/src/commands/system.rs | 78 ++++++++++++++ src-tauri/src/db/migrations.rs | 67 ++++++++++++ src-tauri/src/lib.rs | 4 + src/lib/tauriCommands.ts | 20 ++++ src/pages/Settings/Security.tsx | 140 ++++++++++++++++++++++++- 8 files changed, 592 insertions(+), 4 deletions(-) create mode 100644 src-tauri/src/commands/agentic.rs diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 0e83bc09..dbe727c2 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -355,6 +355,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cairo-rs" version = "0.18.5" @@ -429,6 +449,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -708,6 +730,25 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1188,6 +1229,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "elliptic-curve" version = "0.13.8" @@ -2467,7 +2514,7 @@ dependencies = [ "hmac", "iterator-sorted", "k256", - "pbkdf2", + "pbkdf2 0.12.2", "rand 0.8.5", "scrypt", "serde", @@ -2588,6 +2635,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.91" @@ -2818,13 +2875,16 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07c8e1b6184b1b32ea5f72f572ebdc40e5da1d2921fa469947ff7c480ad1f85a" dependencies = [ + "chrono", "encoding_rs", "flate2", "itoa", "linked-hash-map", "log", "md5", + "nom", "pom", + "rayon", "time", "weezl", ] @@ -2938,6 +2998,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisign-verify" version = "0.2.5" @@ -3122,6 +3188,16 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -3442,6 +3518,17 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "password-hash" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" +dependencies = [ + "base64ct", + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "paste" version = "1.0.15" @@ -3460,6 +3547,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pbkdf2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" +dependencies = [ + "digest", + "hmac", + "password-hash", + "sha2", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -4161,6 +4260,26 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -4650,7 +4769,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" dependencies = [ - "pbkdf2", + "pbkdf2 0.12.2", "salsa20", "sha2", ] @@ -6252,8 +6371,10 @@ dependencies = [ "hex", "infer 0.15.0", "lazy_static", + "lopdf", "mockito", "printpdf", + "quick-xml 0.36.2", "rand 0.8.5", "regex", "reqwest 0.12.28", @@ -6278,6 +6399,7 @@ dependencies = [ "urlencoding", "uuid", "warp", + "zip 0.6.6", ] [[package]] @@ -7804,10 +7926,18 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" dependencies = [ + "aes", "byteorder", + "bzip2", + "constant_time_eq 0.1.5", "crc32fast", "crossbeam-utils", "flate2", + "hmac", + "pbkdf2 0.11.0", + "sha1", + "time", + "zstd", ] [[package]] @@ -7848,6 +7978,35 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.5.1" diff --git a/src-tauri/src/commands/agentic.rs b/src-tauri/src/commands/agentic.rs new file mode 100644 index 00000000..4b1a79ea --- /dev/null +++ b/src-tauri/src/commands/agentic.rs @@ -0,0 +1,123 @@ +use std::io::Write; +use std::process::{Command, Stdio}; + +#[derive(Debug, serde::Serialize)] +pub struct SudoOutput { + pub stdout: String, + pub stderr: String, + pub success: bool, + pub exit_code: Option, +} + +/// Execute a command via sudo, passing the password via stdin (never via cmdline args). +/// `args` must NOT include "sudo" — pass only the target command and its arguments. +pub fn run_sudo_command(password: &str, args: &[&str]) -> Result { + let mut child = Command::new("sudo") + .arg("-S") // read password from stdin + .arg("-p") + .arg("") // suppress prompt text + .arg("--") // end of sudo options — prevents injection + .args(args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| format!("Failed to spawn sudo: {e}"))?; + + if let Some(mut stdin) = child.stdin.take() { + writeln!(stdin, "{}", password) + .map_err(|e| format!("Failed to write password to stdin: {e}"))?; + } + + let output = child + .wait_with_output() + .map_err(|e| format!("Failed to wait for sudo: {e}"))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = strip_sudo_password_prompt(&String::from_utf8_lossy(&output.stderr)); + + Ok(SudoOutput { + stdout, + stderr, + success: output.status.success(), + exit_code: output.status.code(), + }) +} + +/// Strip "[sudo] password for ..." prompt lines from stderr before logging. +fn strip_sudo_password_prompt(text: &str) -> String { + text.lines() + .filter(|line| { + let lower = line.to_lowercase(); + !lower.contains("[sudo] password") && !lower.starts_with("password:") + }) + .collect::>() + .join("\n") +} + +/// Like run_sudo_command but writes a sanitized audit entry first. +/// The password is NEVER included in audit details. +pub fn run_sudo_command_audited( + password: &str, + args: &[&str], + db: &rusqlite::Connection, +) -> Result { + let sanitized_args: Vec = args.iter().map(|s| s.to_string()).collect(); + let details = serde_json::json!({ + "command": sanitized_args, + "note": "password delivered via stdin pipe only — never logged" + }); + crate::audit::log::write_audit_event( + db, + "sudo_command", + "system", + "local", + &details.to_string(), + ) + .map_err(|e| format!("Audit log failed: {e}"))?; + + run_sudo_command(password, args) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_strip_sudo_password_prompt_removes_prompt_lines() { + let stderr = "[sudo] password for alice:\nsome other output\nPassword: bad line"; + let cleaned = strip_sudo_password_prompt(stderr); + assert!(!cleaned.contains("[sudo] password")); + assert!(!cleaned.contains("Password:")); + assert!(cleaned.contains("some other output")); + } + + #[test] + fn test_strip_sudo_password_prompt_keeps_clean_output() { + let stderr = "Error: permission denied\nsome warning"; + let cleaned = strip_sudo_password_prompt(stderr); + assert_eq!(cleaned, "Error: permission denied\nsome warning"); + } + + #[test] + fn test_run_sudo_command_audited_does_not_log_password() { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + + let _result = run_sudo_command_audited("my-secret-password", &["true"], &conn); + // result may fail in test environment, but audit log must exist + let details: String = conn + .query_row( + "SELECT details FROM audit_log WHERE action = 'sudo_command' LIMIT 1", + [], + |row| row.get(0), + ) + .unwrap_or_default(); + + assert!( + !details.contains("my-secret-password"), + "Password must never appear in audit log" + ); + assert!(details.contains("true"), "Command args should be logged"); + } +} diff --git a/src-tauri/src/commands/mod.rs b/src-tauri/src/commands/mod.rs index 6184fac6..b242ae8f 100644 --- a/src-tauri/src/commands/mod.rs +++ b/src-tauri/src/commands/mod.rs @@ -1,3 +1,4 @@ +pub mod agentic; pub mod ai; pub mod analysis; pub mod db; diff --git a/src-tauri/src/commands/system.rs b/src-tauri/src/commands/system.rs index 7ab59e73..d31a537d 100644 --- a/src-tauri/src/commands/system.rs +++ b/src-tauri/src/commands/system.rs @@ -284,3 +284,81 @@ pub async fn get_app_version() -> Result { .or_else(|_| env::var("CARGO_PKG_VERSION")) .map_err(|e| format!("Failed to get version: {e}")) } + +// --- Sudo credential commands --- + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct SudoConfigStatus { + pub configured: bool, + pub username: String, + pub updated_at: String, +} + +#[tauri::command] +pub async fn set_sudo_password( + password: String, + username: Option, + state: tauri::State<'_, AppState>, +) -> Result<(), String> { + let encrypted = crate::integrations::auth::encrypt_token(&password)?; + let db = state.db.lock().map_err(|e| e.to_string())?; + let id = uuid::Uuid::now_v7().to_string(); + let uname = username.unwrap_or_default(); + db.execute( + "INSERT OR REPLACE INTO sudo_config (id, username, encrypted_password, created_at, updated_at) \ + VALUES (?1, ?2, ?3, datetime('now'), datetime('now'))", + rusqlite::params![id, uname, encrypted], + ) + .map_err(|e| format!("Failed to store sudo config: {e}"))?; + Ok(()) +} + +#[tauri::command] +pub async fn get_sudo_config_status( + state: tauri::State<'_, AppState>, +) -> Result { + let db = state.db.lock().map_err(|e| e.to_string())?; + let result: Option<(String, String)> = db + .prepare("SELECT username, updated_at FROM sudo_config LIMIT 1") + .and_then(|mut stmt| { + stmt.query_row([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + }) + .ok(); + match result { + Some((username, updated_at)) => Ok(SudoConfigStatus { + configured: true, + username, + updated_at, + }), + None => Ok(SudoConfigStatus { + configured: false, + username: String::new(), + updated_at: String::new(), + }), + } +} + +#[tauri::command] +pub async fn test_sudo_password(state: tauri::State<'_, AppState>) -> Result { + let encrypted: Option = { + let db = state.db.lock().map_err(|e| e.to_string())?; + db.prepare("SELECT encrypted_password FROM sudo_config LIMIT 1") + .and_then(|mut stmt| stmt.query_row([], |row| row.get::<_, String>(0))) + .ok() + }; + let encrypted = encrypted.ok_or("No sudo password configured")?; + let password = crate::integrations::auth::decrypt_token(&encrypted)?; + let result = crate::commands::agentic::run_sudo_command(&password, &["true"]) + .map_err(|e| format!("Sudo test failed: {e}"))?; + Ok(result.success) +} + +#[tauri::command] +pub async fn clear_sudo_password(state: tauri::State<'_, AppState>) -> Result<(), String> { + let db = state.db.lock().map_err(|e| e.to_string())?; + db.execute("DELETE FROM sudo_config", []) + .map_err(|e| format!("Failed to clear sudo config: {e}"))?; + Ok(()) +} diff --git a/src-tauri/src/db/migrations.rs b/src-tauri/src/db/migrations.rs index 184aa31f..46f9c321 100644 --- a/src-tauri/src/db/migrations.rs +++ b/src-tauri/src/db/migrations.rs @@ -249,6 +249,16 @@ pub fn run_migrations(conn: &Connection) -> anyhow::Result<()> { FOREIGN KEY(server_id) REFERENCES mcp_servers(id) ON DELETE CASCADE );", ), + ( + "019_create_sudo_config", + "CREATE TABLE IF NOT EXISTS sudo_config ( + id TEXT PRIMARY KEY, + username TEXT NOT NULL DEFAULT '', + encrypted_password TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + );", + ), ]; for (name, sql) in migrations { @@ -1034,4 +1044,61 @@ mod tests { .unwrap(); assert_eq!(applied, 1, "018 should only be recorded once"); } + + // ─── Migration 019: sudo_config ───────────────────────────────────────────── + + #[test] + fn test_019_sudo_config_table_exists() { + let conn = setup_test_db(); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='sudo_config'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(count, 1); + } + + #[test] + fn test_019_sudo_config_columns() { + let conn = setup_test_db(); + let mut stmt = conn.prepare("PRAGMA table_info(sudo_config)").unwrap(); + let columns: Vec = stmt + .query_map([], |row| row.get::<_, String>(1)) + .unwrap() + .collect::, _>>() + .unwrap(); + + assert!(columns.contains(&"id".to_string())); + assert!(columns.contains(&"username".to_string())); + assert!(columns.contains(&"encrypted_password".to_string())); + assert!(columns.contains(&"created_at".to_string())); + assert!(columns.contains(&"updated_at".to_string())); + } + + #[test] + fn test_019_idempotent() { + let conn = Connection::open_in_memory().unwrap(); + run_migrations(&conn).unwrap(); + run_migrations(&conn).unwrap(); + + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='sudo_config'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(count, 1, "sudo_config table should exist after double-run"); + + let applied: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _migrations WHERE name = '019_create_sudo_config'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(applied, 1, "019 should only be recorded once"); + } } diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 0b5efaba..4ffd0624 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -133,6 +133,10 @@ pub fn run() { commands::system::update_settings, commands::system::get_audit_log, commands::system::get_app_version, + commands::system::set_sudo_password, + commands::system::get_sudo_config_status, + commands::system::test_sudo_password, + commands::system::clear_sudo_password, // MCP Servers mcp::commands::list_mcp_servers, mcp::commands::create_mcp_server, diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 35de4da0..f09908de 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -591,6 +591,26 @@ export function initiateMcpOauthCmd(id: string): Promise { return invoke("initiate_mcp_oauth", { id }); } +// ─── Sudo credential commands ───────────────────────────────────────────────── + +export interface SudoConfigStatus { + configured: boolean; + username: string; + updated_at: string; +} + +export const setSudoPasswordCmd = (password: string, username?: string) => + invoke("set_sudo_password", { password, username: username ?? null }); + +export const getSudoConfigStatusCmd = () => + invoke("get_sudo_config_status"); + +export const testSudoPasswordCmd = () => + invoke("test_sudo_password"); + +export const clearSudoPasswordCmd = () => + invoke("clear_sudo_password"); + // ─── System / Version ───────────────────────────────────────────────────────── export const getAppVersionCmd = () => diff --git a/src/pages/Settings/Security.tsx b/src/pages/Settings/Security.tsx index 022671a3..598ecb03 100644 --- a/src/pages/Settings/Security.tsx +++ b/src/pages/Settings/Security.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useState } from "react"; -import { Shield, RefreshCw } from "lucide-react"; +import { Shield, RefreshCw, Lock } from "lucide-react"; import { Card, CardHeader, @@ -7,7 +7,15 @@ import { CardContent, Badge, } from "@/components/ui"; -import { getAuditLogCmd, type AuditEntry } from "@/lib/tauriCommands"; +import { + getAuditLogCmd, + getSudoConfigStatusCmd, + setSudoPasswordCmd, + testSudoPasswordCmd, + clearSudoPasswordCmd, + type AuditEntry, + type SudoConfigStatus, +} from "@/lib/tauriCommands"; import { useSettingsStore } from "@/stores/settingsStore"; const piiPatterns = [ @@ -28,8 +36,15 @@ export default function Security() { const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); + const [sudoPassword, setSudoPassword] = useState(""); + const [sudoUsername, setSudoUsername] = useState(""); + const [sudoStatus, setSudoStatus] = useState(null); + const [sudoMessage, setSudoMessage] = useState(""); + const [sudoTesting, setSudoTesting] = useState(false); + useEffect(() => { loadAuditLog(); + loadSudoStatus(); }, []); const loadAuditLog = async () => { @@ -44,6 +59,51 @@ export default function Security() { } }; + const loadSudoStatus = async () => { + try { + const status = await getSudoConfigStatusCmd(); + setSudoStatus(status); + } catch { + // ignore — table may not exist yet + } + }; + + const handleSaveSudo = async () => { + setSudoMessage(""); + try { + await setSudoPasswordCmd(sudoPassword, sudoUsername || undefined); + setSudoPassword(""); + setSudoMessage("Saved successfully"); + await loadSudoStatus(); + } catch (err) { + setSudoMessage(`Error: ${String(err)}`); + } + }; + + const handleTestSudo = async () => { + setSudoTesting(true); + setSudoMessage(""); + try { + const ok = await testSudoPasswordCmd(); + setSudoMessage(ok ? "Password verified" : "Authentication failed"); + } catch (err) { + setSudoMessage(`Authentication failed: ${String(err)}`); + } finally { + setSudoTesting(false); + } + }; + + const handleClearSudo = async () => { + setSudoMessage(""); + try { + await clearSudoPasswordCmd(); + setSudoMessage("Credentials cleared"); + await loadSudoStatus(); + } catch (err) { + setSudoMessage(`Error: ${String(err)}`); + } + }; + const toggleRow = (entryId: string) => { setExpandedRows((prev) => { const newSet = new Set(prev); @@ -103,6 +163,82 @@ export default function Security() { + {/* Sudo Credentials */} + + + + + Sudo Credentials + + + + {sudoStatus?.configured && ( +

+ Configured (last updated: {sudoStatus.updated_at}) +

+ )} + {sudoStatus && !sudoStatus.configured && ( +

Not configured

+ )} +
+
+ + setSudoUsername(e.target.value)} + placeholder="Leave empty for current user" + className="mt-1 w-full rounded-md border border-input bg-background px-3 py-2 text-sm" + /> +
+
+ + setSudoPassword(e.target.value)} + placeholder="Enter sudo password" + className="mt-1 w-full rounded-md border border-input bg-background px-3 py-2 text-sm" + /> +
+
+
+ + + +
+ {sudoMessage && ( +

+ {sudoMessage} +

+ )} +
+
+ {/* Audit Log */} From cf1d5adb83615938a390b2cd26a8f129bbbe8f3d Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 13:57:38 -0500 Subject: [PATCH 04/19] docs(analysis): document zip-slip safety guarantee in extract_docx_text Only a single hardcoded entry (word/document.xml) is ever accessed from the ZIP archive; no arbitrary path extraction occurs, so path traversal attacks cannot apply. Add a comment to make this invariant explicit for future maintainers. --- src-tauri/src/commands/analysis.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 135fb12f..6a689827 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -108,6 +108,8 @@ fn extract_docx_text(path: &Path) -> Result { zip::ZipArchive::new(file).map_err(|e| format!("Failed to open as ZIP/DOCX: {e}"))?; let mut xml_content = String::new(); { + // Safety: only one hardcoded entry is ever accessed; no arbitrary path extraction is + // performed, so zip-slip path traversal attacks cannot apply here. let mut doc_xml = archive .by_name("word/document.xml") .map_err(|_| "Not a valid DOCX: missing word/document.xml".to_string())?; From 06956940e2ec6bf263c640b8f2f6d9049bc47deb Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:08:10 -0500 Subject: [PATCH 05/19] fix(ci): reduce AI review hallucinations in pr-review workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes: - Exclude Cargo.lock/lockfiles from the diff — removes ~163 lines of hash noise that waste the review budget with no value - Raise line cap from 500 to 3000 and add a truncation notice when the diff is cut, so the model knows the diff is incomplete - Harden prompt: require quoted evidence for every finding; add explicit self-verification step for missing-identifier claims (search full diff before raising); tighten no-hallucinate instruction --- .gitea/workflows/pr-review.yml | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 942b209e..87200cce 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -40,8 +40,17 @@ jobs: run: | set -euo pipefail git fetch origin ${{ github.base_ref }} - git diff origin/${{ github.base_ref }}..HEAD > /tmp/pr_diff.txt - echo "diff_size=$(wc -l < /tmp/pr_diff.txt | tr -d ' ')" >> $GITHUB_OUTPUT + # Exclude generated/lock files — they add noise with no review value + git diff origin/${{ github.base_ref }}..HEAD \ + -- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \ + > /tmp/pr_diff.txt + FULL_SIZE=$(wc -l < /tmp/pr_diff.txt | tr -d ' ') + echo "diff_size=${FULL_SIZE}" >> $GITHUB_OUTPUT + echo "diff_full_size=${FULL_SIZE}" >> $GITHUB_OUTPUT + # Build a manifest of changed files so the prompt can include it + git diff --name-only origin/${{ github.base_ref }}..HEAD \ + -- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \ + > /tmp/pr_files.txt - name: Analyze with LLM id: analyze @@ -57,10 +66,17 @@ jobs: if grep -q "^Binary files" /tmp/pr_diff.txt; then echo "WARNING: Binary file changes detected — they will be excluded from analysis" fi - DIFF_CONTENT=$(head -n 500 /tmp/pr_diff.txt \ + CHANGED_FILES=$(cat /tmp/pr_files.txt | tr '\n' ' ') + FULL_LINES=$(wc -l < /tmp/pr_diff.txt | tr -d ' ') + DIFF_CONTENT=$(head -n 3000 /tmp/pr_diff.txt \ | grep -v -E '^[+-].*(password[[:space:]]*[=:"'"'"']|token[[:space:]]*[=:"'"'"']|secret[[:space:]]*[=:"'"'"']|api_key[[:space:]]*[=:"'"'"']|private_key[[:space:]]*[=:"'"'"']|Authorization:[[:space:]]|AKIA[A-Z0-9]{16}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|gh[opsu]_[A-Za-z0-9_]{36,}|https?://[^@[:space:]]+:[^@[:space:]]+@)' \ | grep -v -E '^[+-].*[A-Za-z0-9+/]{40,}={0,2}([^A-Za-z0-9+/=]|$)') - PROMPT="You are a senior engineer performing a focused code review. Your review must be grounded strictly in the diff provided — do not invent issues about code you cannot see.\n\nPR Title: ${PR_TITLE}\n\nDiff:\n${DIFF_CONTENT}\n\n## Instructions\n\n1. **Read the entire diff first.** Before raising any issue, verify it against the actual lines in the diff. If something appears to be missing, confirm it is absent from ALL relevant files in the diff before claiming it is missing.\n\n2. **Quote the evidence.** For every issue you raise, cite the specific file and line from the diff that supports your claim. If you cannot quote a line, do not raise the issue.\n\n3. **Distinguish severity clearly:**\n - BLOCKER: broken right now, will cause crashes or data loss\n - WARNING: works but has a real risk that should be addressed before merge\n - SUGGESTION: improvement worth considering in a follow-up PR\n\n4. **Do not raise issues about code outside the diff.** If a concern requires reading files not present in the diff, say 'outside the scope of this diff' and skip it.\n\n5. **Keep it concise.** Lead with a one-paragraph summary, then list only genuine findings with evidence. Avoid restating what the code already does correctly unless it is directly relevant to a finding.\n\n## Output format\n\n**Summary** (1 paragraph)\n\n**Findings** (only real issues with quoted evidence)\n- [BLOCKER/WARNING/SUGGESTION] filename:line — description and suggested fix\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" + if [ "$FULL_LINES" -gt 3000 ]; then + TRUNCATION_NOTICE="NOTE: This diff was truncated at 3000 lines (full diff is ${FULL_LINES} lines). Code appearing near the end of the diff may be incomplete. Do NOT raise findings about code that appears to be missing or incomplete — it may simply be outside the truncated window." + else + TRUNCATION_NOTICE="This diff is complete (${FULL_LINES} lines, no truncation)." + fi + PROMPT="You are a senior engineer performing a focused code review. Your review must be grounded strictly in the diff provided — do not speculate about code you cannot see.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n${TRUNCATION_NOTICE}\n\nDiff:\n${DIFF_CONTENT}\n\n## Instructions\n\n1. **Read the entire diff before writing anything.** Do not begin composing your review until you have read every line of the diff above.\n\n2. **Verify before claiming.** Before raising any finding:\n a. Quote the exact line(s) from the diff that support it.\n b. For findings about missing identifiers (undeclared variables, missing parameters, undefined functions): search the ENTIRE diff for the identifier. If it appears anywhere in the diff, the finding is WRONG — discard it.\n c. If you cannot quote supporting evidence from the diff, do not raise the finding.\n\n3. **Do not hallucinate.** You may only raise issues visible in the diff. If a concern requires reading a file not shown in the diff, write 'outside the scope of this diff' and skip it. Never infer that code is broken from partial information.\n\n4. **Distinguish severity:**\n - BLOCKER: provably broken from what is shown — will fail to compile or cause data loss\n - WARNING: works but has a real risk that should be addressed before merge\n - SUGGESTION: improvement worth considering in a follow-up PR\n\n5. **Keep it concise.** Lead with a one-paragraph summary, then list only genuine findings with quoted evidence.\n\n## Output format\n\n**Summary** (1 paragraph)\n\n**Findings** (each must include a quoted line from the diff)\n- [BLOCKER/WARNING/SUGGESTION] filename:line — description, quoted evidence, suggested fix\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ --arg content "$PROMPT" \ From f6787accd65d33ecf4a5d521cbaac8aee42cd3a3 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:19:29 -0500 Subject: [PATCH 06/19] fix(agentic): inline format arg in writeln! to satisfy clippy::uninlined_format_args Rust 1.88 enforces clippy::uninlined_format_args as a style lint under -D warnings. Change `writeln!(stdin, "{}", password)` to the inline form `writeln!(stdin, "{password}")`. --- src-tauri/src/commands/agentic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src-tauri/src/commands/agentic.rs b/src-tauri/src/commands/agentic.rs index 4b1a79ea..c9b71ad5 100644 --- a/src-tauri/src/commands/agentic.rs +++ b/src-tauri/src/commands/agentic.rs @@ -25,7 +25,7 @@ pub fn run_sudo_command(password: &str, args: &[&str]) -> Result Date: Sun, 31 May 2026 14:24:56 -0500 Subject: [PATCH 07/19] fix(ci): rewrite pr-review to send full file contents instead of diffs --- .gitea/workflows/pr-review.yml | 87 +++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 87200cce..4df1f408 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -34,27 +34,67 @@ jobs: git fetch --depth=1 origin ${{ github.head_ref }} git checkout FETCH_HEAD - - name: Get PR diff - id: diff + - name: Build review context + id: context shell: bash run: | set -euo pipefail git fetch origin ${{ github.base_ref }} - # Exclude generated/lock files — they add noise with no review value - git diff origin/${{ github.base_ref }}..HEAD \ - -- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \ - > /tmp/pr_diff.txt - FULL_SIZE=$(wc -l < /tmp/pr_diff.txt | tr -d ' ') - echo "diff_size=${FULL_SIZE}" >> $GITHUB_OUTPUT - echo "diff_full_size=${FULL_SIZE}" >> $GITHUB_OUTPUT - # Build a manifest of changed files so the prompt can include it + + # List changed source files (exclude generated/lock files) git diff --name-only origin/${{ github.base_ref }}..HEAD \ -- ':!Cargo.lock' ':!package-lock.json' ':!*.lock' \ > /tmp/pr_files.txt + FILE_COUNT=$(wc -l < /tmp/pr_files.txt | tr -d ' ') + echo "files_changed=${FILE_COUNT}" >> $GITHUB_OUTPUT + + if [ "$FILE_COUNT" -eq 0 ]; then + echo "No reviewable files changed." + echo "diff_size=0" >> $GITHUB_OUTPUT + exit 0 + fi + + # Build context: full file content for each changed file. + # Files <= 500 lines: include complete content. + # Files > 500 lines: include the per-file diff with generous context (±50 lines). + # Secret scrubbing applied to both paths. + SECRET_PATTERN='^([[:space:]]*[+\-]?[[:space:]]*).*[pP]assword[[:space:]]*[=:"'"'"']|[tT]oken[[:space:]]*[=:"'"'"']|[aA][pP][iI][_][kK]ey[[:space:]]*[=:"'"'"']|AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|Authorization:[[:space:]]' + B64_PATTERN='^[[:space:]]*[+\-]?[[:space:]]*[A-Za-z0-9+/]{40,}={0,2}([^A-Za-z0-9+/=]|$)' + + > /tmp/pr_context.txt + while IFS= read -r file; do + [ -f "$file" ] || continue + lines=$(wc -l < "$file" | tr -d ' ') + printf '\n════════ FILE: %s (%s lines) ════════\n' "$file" "$lines" >> /tmp/pr_context.txt + if [ "$lines" -le 500 ]; then + # Full file — model sees the complete implementation + grep -v -E "$SECRET_PATTERN" "$file" \ + | grep -v -E "$B64_PATTERN" \ + >> /tmp/pr_context.txt || true + else + # Large file — emit annotated diff hunks (±50 lines of context each) + printf '[File too large for full view (%s lines) — showing changed sections only]\n' "$lines" >> /tmp/pr_context.txt + git diff -U50 origin/${{ github.base_ref }}..HEAD -- "$file" \ + | grep -v -E "$SECRET_PATTERN" \ + | grep -v -E "$B64_PATTERN" \ + >> /tmp/pr_context.txt || true + fi + done < /tmp/pr_files.txt + + TOTAL=$(wc -l < /tmp/pr_context.txt | tr -d ' ') + echo "diff_size=${TOTAL}" >> $GITHUB_OUTPUT + + # Cap at 6000 lines so we stay within the model's context window + if [ "$TOTAL" -gt 6000 ]; then + head -n 6000 /tmp/pr_context.txt > /tmp/pr_context_capped.txt + mv /tmp/pr_context_capped.txt /tmp/pr_context.txt + echo "[CONTEXT TRUNCATED at 6000 lines — ${TOTAL} total]" >> /tmp/pr_context.txt + fi + - name: Analyze with LLM id: analyze - if: steps.diff.outputs.diff_size != '0' + if: steps.context.outputs.diff_size != '0' shell: bash env: LITELLM_URL: http://172.0.0.29:11434/v1 @@ -63,20 +103,11 @@ jobs: PR_NUMBER: ${{ github.event.pull_request.number }} run: | set -euo pipefail - if grep -q "^Binary files" /tmp/pr_diff.txt; then - echo "WARNING: Binary file changes detected — they will be excluded from analysis" - fi - CHANGED_FILES=$(cat /tmp/pr_files.txt | tr '\n' ' ') - FULL_LINES=$(wc -l < /tmp/pr_diff.txt | tr -d ' ') - DIFF_CONTENT=$(head -n 3000 /tmp/pr_diff.txt \ - | grep -v -E '^[+-].*(password[[:space:]]*[=:"'"'"']|token[[:space:]]*[=:"'"'"']|secret[[:space:]]*[=:"'"'"']|api_key[[:space:]]*[=:"'"'"']|private_key[[:space:]]*[=:"'"'"']|Authorization:[[:space:]]|AKIA[A-Z0-9]{16}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|gh[opsu]_[A-Za-z0-9_]{36,}|https?://[^@[:space:]]+:[^@[:space:]]+@)' \ - | grep -v -E '^[+-].*[A-Za-z0-9+/]{40,}={0,2}([^A-Za-z0-9+/=]|$)') - if [ "$FULL_LINES" -gt 3000 ]; then - TRUNCATION_NOTICE="NOTE: This diff was truncated at 3000 lines (full diff is ${FULL_LINES} lines). Code appearing near the end of the diff may be incomplete. Do NOT raise findings about code that appears to be missing or incomplete — it may simply be outside the truncated window." - else - TRUNCATION_NOTICE="This diff is complete (${FULL_LINES} lines, no truncation)." - fi - PROMPT="You are a senior engineer performing a focused code review. Your review must be grounded strictly in the diff provided — do not speculate about code you cannot see.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n${TRUNCATION_NOTICE}\n\nDiff:\n${DIFF_CONTENT}\n\n## Instructions\n\n1. **Read the entire diff before writing anything.** Do not begin composing your review until you have read every line of the diff above.\n\n2. **Verify before claiming.** Before raising any finding:\n a. Quote the exact line(s) from the diff that support it.\n b. For findings about missing identifiers (undeclared variables, missing parameters, undefined functions): search the ENTIRE diff for the identifier. If it appears anywhere in the diff, the finding is WRONG — discard it.\n c. If you cannot quote supporting evidence from the diff, do not raise the finding.\n\n3. **Do not hallucinate.** You may only raise issues visible in the diff. If a concern requires reading a file not shown in the diff, write 'outside the scope of this diff' and skip it. Never infer that code is broken from partial information.\n\n4. **Distinguish severity:**\n - BLOCKER: provably broken from what is shown — will fail to compile or cause data loss\n - WARNING: works but has a real risk that should be addressed before merge\n - SUGGESTION: improvement worth considering in a follow-up PR\n\n5. **Keep it concise.** Lead with a one-paragraph summary, then list only genuine findings with quoted evidence.\n\n## Output format\n\n**Summary** (1 paragraph)\n\n**Findings** (each must include a quoted line from the diff)\n- [BLOCKER/WARNING/SUGGESTION] filename:line — description, quoted evidence, suggested fix\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" + CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt) + CONTEXT=$(cat /tmp/pr_context.txt) + + PROMPT="You are a senior engineer performing a code review for the following pull request.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n\n## What you are reading\n\nEach section below contains the COMPLETE, FINAL content of one changed file after the PR's changes have been applied. This is not a diff — it is the full file. For files over 500 lines, only the changed sections are shown (marked with + / - lines), but surrounding context is included.\n\nYou have full visibility into every function signature, every variable, every import in each file. There are no missing parameters, no truncated signatures, no partial implementations.\n\n---\n${CONTEXT}\n---\n\n## Instructions\n\nRead every file above completely before writing anything.\n\nThen, for each potential issue:\n1. Confirm it exists in the code above — quote the exact line.\n2. Confirm it is a real problem (not something that looks unusual but is intentional).\n3. If either check fails, discard the finding silently — do not mention it in your output.\n\nDo NOT show your verification reasoning. Do NOT mention findings you discarded. Only output confirmed issues.\n\nSeverity levels:\n- BLOCKER: provably broken — will fail to compile, corrupt data, or introduce a security vulnerability\n- WARNING: works today but carries real risk that should be fixed before merge\n- SUGGESTION: minor improvement worth a follow-up PR\n\nFocus on: security bugs, logic errors, data loss, race conditions, injection vectors, unhandled error paths that could silently corrupt state.\n\nIgnore: style preferences, missing comments, code organisation opinions, speculative future improvements.\n\n## Output format (strict — do not deviate)\n\n**Summary** (2-3 sentences describing what the PR does and your overall assessment)\n\n**Findings**\n- [SEVERITY] file:line — one-line description\n Evidence: exact quoted line(s)\n Fix: concrete suggested change\n\n(If there are no findings, write: No findings.)\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" + BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ --arg content "$PROMPT" \ @@ -110,7 +141,7 @@ jobs: echo "$REVIEW" > /tmp/pr_review.txt - name: Post review comment - if: always() && steps.diff.outputs.diff_size != '0' + if: always() && steps.context.outputs.diff_size != '0' shell: bash env: TF_TOKEN: ${{ secrets.TFT_GITEA_TOKEN }} @@ -125,7 +156,7 @@ jobs: if [ -f "/tmp/pr_review.txt" ] && [ -s "/tmp/pr_review.txt" ]; then REVIEW_BODY=$(head -c 65536 /tmp/pr_review.txt) BODY=$(jq -n \ - --arg body "Automated PR Review (qwen3-coder-next via liteLLM):\n\n${REVIEW_BODY}\n\n---\n*automated code review*" \ + --arg body "Automated PR Review (qwen3-coder-next via liteLLM):\n\n${REVIEW_BODY}" \ '{body: $body, event: "COMMENT"}') else BODY=$(jq -n \ @@ -147,4 +178,4 @@ jobs: - name: Cleanup if: always() shell: bash - run: rm -f /tmp/pr_diff.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json + run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/pr_review_post_response.json /tmp/pr_files.txt From 6373f0b09cb8536662bd2a3b5f4948fd5b00b246 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:33:44 -0500 Subject: [PATCH 08/19] fix(ci): fix secret scrubbing regex that was deleting legitimate code lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous regex matched any line containing "password", "token", etc. near certain punctuation characters. This silently removed function signatures, variable declarations, and test assertions from the context sent to the LLM — causing it to hallucinate 3 BLOCKERs per review: - "function signature missing" (the `password: &str` param was scrubbed) - "filter body empty" (the filter condition containing "password" was scrubbed) - "password passed unencrypted" (the decrypt_token call line was scrubbed) Fix: match actual credential VALUES only: - Well-known token formats (AKIA..., ghp_..., xox...) - keyword = "long_quoted_literal" (25+ chars, clearly a value not a name) - Standalone base64 blob lines (60+ chars, PEM-style) Never scrub a line just because it contains a credential-related word. --- .gitea/workflows/pr-review.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 4df1f408..065600a8 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -58,9 +58,13 @@ jobs: # Build context: full file content for each changed file. # Files <= 500 lines: include complete content. # Files > 500 lines: include the per-file diff with generous context (±50 lines). - # Secret scrubbing applied to both paths. - SECRET_PATTERN='^([[:space:]]*[+\-]?[[:space:]]*).*[pP]assword[[:space:]]*[=:"'"'"']|[tT]oken[[:space:]]*[=:"'"'"']|[aA][pP][iI][_][kK]ey[[:space:]]*[=:"'"'"']|AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|Authorization:[[:space:]]' - B64_PATTERN='^[[:space:]]*[+\-]?[[:space:]]*[A-Za-z0-9+/]{40,}={0,2}([^A-Za-z0-9+/=]|$)' + # + # Secret scrubbing: match actual credential VALUES only — known API key formats, + # or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone, + # which would silently delete function signatures, variable declarations, and tests. + SECRET_PATTERN='AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|(password|token|api_key|secret)[[:space:]]*=[[:space:]]*["'"'"'][A-Za-z0-9+/_\-!@#]{25,}["'"'"']' + # Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies) + B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$' > /tmp/pr_context.txt while IFS= read -r file; do From cf5bc83b7519d9eabc86141083ad4d6965120547 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:41:47 -0500 Subject: [PATCH 09/19] fix(ci): add post-generation evidence verification to pr-review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qwen3-coder-next fabricates plausible-looking code in its Evidence blocks instead of quoting from the actual files provided. This adds a Python verification step that greps each fenced code block against the real changed files and tags any finding whose evidence cannot be found as UNVERIFIED. This is a safeguard, not a fix — the model is fundamentally unreliable for grounded code review. The longer-term fix is to replace qwen3-coder with a model that stays grounded to context (Claude Haiku, devstral, or deepseek-coder-v2 via the LiteLLM proxy / vLLM at 172.0.1.42). --- .gitea/workflows/pr-review.yml | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 065600a8..b99e6f6b 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -144,6 +144,69 @@ jobs: echo "Review length: ${#REVIEW} chars" echo "$REVIEW" > /tmp/pr_review.txt + - name: Verify findings against codebase + if: steps.analyze.outcome == 'success' + shell: bash + run: | + set -euo pipefail + # For each finding that contains a fenced code block under "Evidence:", + # grep at least one substantial line of that block against the actual changed + # files. If nothing matches, prepend a visible UNVERIFIED tag so reviewers + # know the model fabricated the evidence. + python3 - << 'PYEOF' + import re, os + + review = open('/tmp/pr_review.txt').read() + filelist = [f.strip() for f in open('/tmp/pr_files.txt') if f.strip()] + + # Load content of every changed file + repo_text = {} + for path in filelist: + if os.path.isfile(path): + try: + repo_text[path] = open(path).read() + except Exception: + pass + + all_content = '\n'.join(repo_text.values()) + + def evidence_exists(block: str) -> bool: + """True if ≥1 significant line from the block is found verbatim in changed files.""" + for raw in block.splitlines(): + line = raw.lstrip('+-').strip() + # Skip blank, very short, pure-comment, or diff-header lines + if len(line) < 20: + continue + if line.startswith(('//','#','/*','*','Fix:','Evidence:','---','+++')): + continue + if line in all_content: + return True + return False + + # Split on finding markers; re-join after optional tagging + severity_re = re.compile(r'\[(BLOCKER|WARNING|SUGGESTION)\]') + + def tag_if_unverified(finding_text: str) -> str: + code_match = re.search(r'```[^\n]*\n(.*?)```', finding_text, re.DOTALL) + if code_match and not evidence_exists(code_match.group(1)): + # Replace first severity tag with a prefixed version + return severity_re.sub( + lambda m: f'[{m.group(1)} — ⚠️ UNVERIFIED: evidence not found in PR files]', + finding_text, count=1 + ) + return finding_text + + # Split review into preamble + individual finding blocks + # Each block starts at a severity marker line + parts = re.split(r'(?=^\[(?:BLOCKER|WARNING|SUGGESTION)\])', review, flags=re.MULTILINE) + result = parts[0] # preamble (Summary, etc.) + for block in parts[1:]: + result += tag_if_unverified(block) + + open('/tmp/pr_review.txt', 'w').write(result) + print(f"Verification complete — {len(parts)-1} finding(s) checked.") + PYEOF + - name: Post review comment if: always() && steps.context.outputs.diff_size != '0' shell: bash From 93a0c3f1eeb2b03e8eff14215ce836d15c8a98f2 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:48:32 -0500 Subject: [PATCH 10/19] fix(ci): add codebase index to prompt; verify findings against full repo Two changes to reduce hallucinations in pr-review: 1. Codebase index (new step "Build codebase index"): Generates a compact manifest of everything that EXISTS in the project: - All registered Tauri commands (from lib.rs generate_handler![]) - All TypeScript exports (from tauriCommands.ts) - All public Rust fn signatures in commands/ - All DB migration names This index is prepended to the prompt so the model cannot invent functions like authenticate_sudo or continue_chat_history that are absent from both the index and the file contents. 2. Full-repo verification (updated "Verify findings" step): Previously only grepped changed files, which falsely tagged findings about unchanged-but-real code as UNVERIFIED. Now runs git ls-files to load all tracked source files, so verification only fails for code that genuinely does not exist anywhere in the codebase. If qwen3-coder continues to hallucinate after these changes, swap the model name on line 184 to bedrock-personal or claude-haiku. --- .gitea/workflows/pr-review.yml | 78 ++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index b99e6f6b..1558824d 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -96,6 +96,49 @@ jobs: echo "[CONTEXT TRUNCATED at 6000 lines — ${TOTAL} total]" >> /tmp/pr_context.txt fi + - name: Build codebase index + id: index + if: steps.context.outputs.diff_size != '0' + shell: bash + run: | + set -euo pipefail + # Build a compact index of everything that EXISTS in this codebase. + # Included in the prompt so the model cannot invent functions/commands/tables + # that are not present — any finding referencing something absent from this + # index is immediately suspect. + { + echo "## CODEBASE INDEX" + echo "These are the ONLY Tauri commands, TypeScript exports, Rust public functions," + echo "and database tables that exist in this project. Before raising any finding," + echo "confirm that every symbol you cite appears in this list or in the file" + echo "contents below. If it does not appear in either, your finding is fabricated." + echo "" + + echo "### Registered Tauri commands (lib.rs generate_handler![]):" + grep -oE 'commands::[a-z_]+::[a-z_]+' src-tauri/src/lib.rs 2>/dev/null \ + | sort -u | sed 's/^/ /' || true + echo "" + + echo "### TypeScript invoke wrappers (src/lib/tauriCommands.ts):" + grep -E '^export (const|interface|type) ' src/lib/tauriCommands.ts 2>/dev/null \ + | sed 's/^/ /' || true + echo "" + + echo "### Public Rust functions in src-tauri/src/commands/:" + grep -rh --include='*.rs' '^pub ' src-tauri/src/commands/ 2>/dev/null \ + | grep 'fn ' | sed 's/^/ /' | sort || true + echo "" + + echo "### Database tables (src-tauri/src/db/migrations.rs):" + grep -oE '"[0-9]+_[a-z_]+"' src-tauri/src/db/migrations.rs 2>/dev/null \ + | tr -d '"' | sed 's/^/ /' || true + echo "" + } > /tmp/codebase_index.txt + + INDEX_LINES=$(wc -l < /tmp/codebase_index.txt | tr -d ' ') + echo "index_lines=${INDEX_LINES}" >> $GITHUB_OUTPUT + echo "Built codebase index: ${INDEX_LINES} lines" + - name: Analyze with LLM id: analyze if: steps.context.outputs.diff_size != '0' @@ -108,9 +151,10 @@ jobs: run: | set -euo pipefail CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt) + INDEX=$(cat /tmp/codebase_index.txt) CONTEXT=$(cat /tmp/pr_context.txt) - PROMPT="You are a senior engineer performing a code review for the following pull request.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n\n## What you are reading\n\nEach section below contains the COMPLETE, FINAL content of one changed file after the PR's changes have been applied. This is not a diff — it is the full file. For files over 500 lines, only the changed sections are shown (marked with + / - lines), but surrounding context is included.\n\nYou have full visibility into every function signature, every variable, every import in each file. There are no missing parameters, no truncated signatures, no partial implementations.\n\n---\n${CONTEXT}\n---\n\n## Instructions\n\nRead every file above completely before writing anything.\n\nThen, for each potential issue:\n1. Confirm it exists in the code above — quote the exact line.\n2. Confirm it is a real problem (not something that looks unusual but is intentional).\n3. If either check fails, discard the finding silently — do not mention it in your output.\n\nDo NOT show your verification reasoning. Do NOT mention findings you discarded. Only output confirmed issues.\n\nSeverity levels:\n- BLOCKER: provably broken — will fail to compile, corrupt data, or introduce a security vulnerability\n- WARNING: works today but carries real risk that should be fixed before merge\n- SUGGESTION: minor improvement worth a follow-up PR\n\nFocus on: security bugs, logic errors, data loss, race conditions, injection vectors, unhandled error paths that could silently corrupt state.\n\nIgnore: style preferences, missing comments, code organisation opinions, speculative future improvements.\n\n## Output format (strict — do not deviate)\n\n**Summary** (2-3 sentences describing what the PR does and your overall assessment)\n\n**Findings**\n- [SEVERITY] file:line — one-line description\n Evidence: exact quoted line(s)\n Fix: concrete suggested change\n\n(If there are no findings, write: No findings.)\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" + PROMPT="You are a senior engineer performing a code review for the following pull request.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n\n---\n${INDEX}\n---\n\n## Changed file contents\n\nEach section below contains the COMPLETE, FINAL content of one changed file. This is the full file after the PR's changes — not a diff. For files over 500 lines, only the changed sections are shown with surrounding context.\n\n---\n${CONTEXT}\n---\n\n## Instructions\n\nBefore writing any finding:\n1. Check that every function name, command name, and variable you cite exists in the CODEBASE INDEX above or in the file contents above. If it does not appear in either location, it does not exist — discard the finding.\n2. Quote the exact line(s) from the file contents that support the finding.\n3. Confirm the issue is a real problem, not intentional design.\n4. If any check fails, discard the finding silently — do not mention it.\n\nDo NOT show your reasoning. Do NOT list discarded findings. Only output confirmed issues.\n\nSeverity:\n- BLOCKER: will fail to compile, corrupt data, or introduce a security vulnerability\n- WARNING: real risk that should be fixed before merge\n- SUGGESTION: minor improvement, follow-up PR acceptable\n\nFocus on: security bugs, logic errors, data loss, injection vectors, unhandled error paths.\nIgnore: style, missing comments, speculative future concerns.\n\n## Output format (do not deviate)\n\n**Summary** (2-3 sentences: what the PR does and your overall assessment)\n\n**Findings**\n- [SEVERITY] file:line — description\n Evidence: `exact quoted line`\n Fix: concrete change\n\n(Write: No findings — if there are none.)\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ @@ -150,25 +194,31 @@ jobs: run: | set -euo pipefail # For each finding that contains a fenced code block under "Evidence:", - # grep at least one substantial line of that block against the actual changed - # files. If nothing matches, prepend a visible UNVERIFIED tag so reviewers - # know the model fabricated the evidence. + # grep at least one substantial line of that block against the FULL repository. + # Searching the full repo (not just changed files) prevents false UNVERIFIED + # tags when the model correctly quotes unchanged files, while still flagging + # fabricated code that doesn't exist anywhere in the codebase. python3 - << 'PYEOF' - import re, os + import re, os, subprocess - review = open('/tmp/pr_review.txt').read() - filelist = [f.strip() for f in open('/tmp/pr_files.txt') if f.strip()] + review = open('/tmp/pr_review.txt').read() - # Load content of every changed file - repo_text = {} - for path in filelist: + # Load ENTIRE tracked repository (all .rs, .ts, .tsx, .yml, .toml, .json files) + result = subprocess.run( + ['git', 'ls-files', '--', + '*.rs', '*.ts', '*.tsx', '*.yml', '*.yaml', '*.toml', '*.json', '*.sql'], + capture_output=True, text=True + ) + all_tracked = [f.strip() for f in result.stdout.splitlines() if f.strip()] + + all_content_parts = [] + for path in all_tracked: if os.path.isfile(path): try: - repo_text[path] = open(path).read() + all_content_parts.append(open(path).read()) except Exception: pass - - all_content = '\n'.join(repo_text.values()) + all_content = '\n'.join(all_content_parts) def evidence_exists(block: str) -> bool: """True if ≥1 significant line from the block is found verbatim in changed files.""" @@ -245,4 +295,4 @@ jobs: - name: Cleanup if: always() shell: bash - run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/pr_review_post_response.json /tmp/pr_files.txt + run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt From 4f70fd7fb8d944e8f434f32bd67730552ec5a169 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:53:21 -0500 Subject: [PATCH 11/19] fix(ci): fix backtick command substitution crash in pr-review prompt The PROMPT string contained backtick-quoted text for the Evidence field example. Inside a double-quoted bash string, backticks trigger command substitution, causing 'exact: command not found' at runtime. Fix: build the prompt using a single-quoted heredoc (no shell expansion inside) then splice dynamic values via sed and python3 replace() instead of shell variable interpolation. --- .gitea/workflows/pr-review.yml | 74 +++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 1558824d..6e7e47b9 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -154,7 +154,79 @@ jobs: INDEX=$(cat /tmp/codebase_index.txt) CONTEXT=$(cat /tmp/pr_context.txt) - PROMPT="You are a senior engineer performing a code review for the following pull request.\n\nPR Title: ${PR_TITLE}\nFiles changed: ${CHANGED_FILES}\n\n---\n${INDEX}\n---\n\n## Changed file contents\n\nEach section below contains the COMPLETE, FINAL content of one changed file. This is the full file after the PR's changes — not a diff. For files over 500 lines, only the changed sections are shown with surrounding context.\n\n---\n${CONTEXT}\n---\n\n## Instructions\n\nBefore writing any finding:\n1. Check that every function name, command name, and variable you cite exists in the CODEBASE INDEX above or in the file contents above. If it does not appear in either location, it does not exist — discard the finding.\n2. Quote the exact line(s) from the file contents that support the finding.\n3. Confirm the issue is a real problem, not intentional design.\n4. If any check fails, discard the finding silently — do not mention it.\n\nDo NOT show your reasoning. Do NOT list discarded findings. Only output confirmed issues.\n\nSeverity:\n- BLOCKER: will fail to compile, corrupt data, or introduce a security vulnerability\n- WARNING: real risk that should be fixed before merge\n- SUGGESTION: minor improvement, follow-up PR acceptable\n\nFocus on: security bugs, logic errors, data loss, injection vectors, unhandled error paths.\nIgnore: style, missing comments, speculative future concerns.\n\n## Output format (do not deviate)\n\n**Summary** (2-3 sentences: what the PR does and your overall assessment)\n\n**Findings**\n- [SEVERITY] file:line — description\n Evidence: `exact quoted line`\n Fix: concrete change\n\n(Write: No findings — if there are none.)\n\n**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES" + # Build the prompt via a single-quoted heredoc so the shell never + # interprets backticks, dollar signs, or other special characters inside. + # Variables that must expand ($PR_TITLE etc.) are spliced in by jq --arg, + # not by shell interpolation, so the prompt text itself is always literal. + PROMPT_TEMPLATE=$(cat << 'ENDPROMPT' +You are a senior engineer performing a code review for the following pull request. + +PR Title: __PR_TITLE__ +Files changed: __CHANGED_FILES__ + +--- +__INDEX__ +--- + +## Changed file contents + +Each section below contains the COMPLETE, FINAL content of one changed file after +the PR's changes have been applied. This is the full file — not a diff. For files +over 500 lines, only the changed sections are shown with surrounding context. + +--- +__CONTEXT__ +--- + +## Instructions + +Before raising any finding: +1. Confirm every symbol (function name, command name, variable) you cite exists in + the CODEBASE INDEX above or in the file contents above. If it appears in neither, + discard the finding — it does not exist in this project. +2. Quote the exact line(s) from the file contents that support the finding. +3. Confirm the issue is a genuine problem, not intentional design. +4. If any step fails, discard the finding silently — do not mention it. + +Do NOT show your reasoning process. Do NOT mention discarded findings. +Output only confirmed issues. + +Severity levels: +- BLOCKER: will fail to compile, corrupt data, or introduce a security vulnerability +- WARNING: real risk that should be addressed before merge +- SUGGESTION: minor improvement, acceptable as a follow-up PR + +Focus on: security bugs, logic errors, data loss, injection vectors, unhandled +error paths that could silently corrupt state. +Ignore: style preferences, missing comments, speculative future concerns. + +## Output format (do not deviate) + +**Summary** (2-3 sentences: what the PR does and your overall assessment) + +**Findings** +- [SEVERITY] file:line -- description + Evidence: quoted line from the file above + Fix: concrete suggested change + +(Write "No findings." if there are none.) + +**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES +ENDPROMPT +) + + # Splice runtime values into the template using sed so nothing is eval'd + PROMPT=$(printf '%s' "$PROMPT_TEMPLATE" \ + | sed "s|__PR_TITLE__|${PR_TITLE}|g" \ + | sed "s|__CHANGED_FILES__|${CHANGED_FILES}|g") + # INDEX and CONTEXT may contain special sed chars — use python for those + PROMPT=$(python3 -c " +import sys +template = sys.stdin.read() +index = open('/tmp/codebase_index.txt').read() +context = open('/tmp/pr_context.txt').read() +print(template.replace('__INDEX__', index).replace('__CONTEXT__', context), end='') +" <<< "$PROMPT") BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ From f8c0d247e896db50e8012e2381946a2ab891328c Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 14:59:58 -0500 Subject: [PATCH 12/19] fix(ci): remove concurrency group that silently dropped pr-review runs Gitea 1.22 cancel-in-progress does not behave like GitHub Actions: when a new synchronize event arrives while a review is running, instead of cancelling the running job and starting a new one, it drops the new run silently. Remove the concurrency block entirely so every commit to a PR gets its own review run. --- .gitea/workflows/pr-review.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 6e7e47b9..3e1eb9aa 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -4,9 +4,6 @@ on: pull_request: types: [opened, synchronize, reopened, edited] -concurrency: - group: pr-review-${{ github.event.pull_request.number }} - cancel-in-progress: true jobs: review: From 3d6270fb33705e545475dbcd9efd5a2731014ceb Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:06:09 -0500 Subject: [PATCH 13/19] fix(ci): replace heredoc with printf to fix YAML block scalar breakage Shell heredocs with unindented bodies (line 1 content) terminate YAML run: | block scalars. The YAML parser sees the unindented heredoc body as leaving the block, making the workflow file unparseable -- Gitea silently stops creating runs for a workflow with invalid YAML. Replace the single-quoted heredoc prompt with a group of printf + cat calls. Every line stays properly indented within the YAML block scalar. Use jq --rawfile instead of --arg to load the prompt from a temp file, which also eliminates shell escaping hazards for large strings. --- .gitea/workflows/pr-review.yml | 118 ++++++++++++--------------------- 1 file changed, 41 insertions(+), 77 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 3e1eb9aa..4b4689dd 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -148,86 +148,50 @@ jobs: run: | set -euo pipefail CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt) - INDEX=$(cat /tmp/codebase_index.txt) - CONTEXT=$(cat /tmp/pr_context.txt) - # Build the prompt via a single-quoted heredoc so the shell never - # interprets backticks, dollar signs, or other special characters inside. - # Variables that must expand ($PR_TITLE etc.) are spliced in by jq --arg, - # not by shell interpolation, so the prompt text itself is always literal. - PROMPT_TEMPLATE=$(cat << 'ENDPROMPT' -You are a senior engineer performing a code review for the following pull request. - -PR Title: __PR_TITLE__ -Files changed: __CHANGED_FILES__ - ---- -__INDEX__ ---- - -## Changed file contents - -Each section below contains the COMPLETE, FINAL content of one changed file after -the PR's changes have been applied. This is the full file — not a diff. For files -over 500 lines, only the changed sections are shown with surrounding context. - ---- -__CONTEXT__ ---- - -## Instructions - -Before raising any finding: -1. Confirm every symbol (function name, command name, variable) you cite exists in - the CODEBASE INDEX above or in the file contents above. If it appears in neither, - discard the finding — it does not exist in this project. -2. Quote the exact line(s) from the file contents that support the finding. -3. Confirm the issue is a genuine problem, not intentional design. -4. If any step fails, discard the finding silently — do not mention it. - -Do NOT show your reasoning process. Do NOT mention discarded findings. -Output only confirmed issues. - -Severity levels: -- BLOCKER: will fail to compile, corrupt data, or introduce a security vulnerability -- WARNING: real risk that should be addressed before merge -- SUGGESTION: minor improvement, acceptable as a follow-up PR - -Focus on: security bugs, logic errors, data loss, injection vectors, unhandled -error paths that could silently corrupt state. -Ignore: style preferences, missing comments, speculative future concerns. - -## Output format (do not deviate) - -**Summary** (2-3 sentences: what the PR does and your overall assessment) - -**Findings** -- [SEVERITY] file:line -- description - Evidence: quoted line from the file above - Fix: concrete suggested change - -(Write "No findings." if there are none.) - -**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES -ENDPROMPT -) - - # Splice runtime values into the template using sed so nothing is eval'd - PROMPT=$(printf '%s' "$PROMPT_TEMPLATE" \ - | sed "s|__PR_TITLE__|${PR_TITLE}|g" \ - | sed "s|__CHANGED_FILES__|${CHANGED_FILES}|g") - # INDEX and CONTEXT may contain special sed chars — use python for those - PROMPT=$(python3 -c " -import sys -template = sys.stdin.read() -index = open('/tmp/codebase_index.txt').read() -context = open('/tmp/pr_context.txt').read() -print(template.replace('__INDEX__', index).replace('__CONTEXT__', context), end='') -" <<< "$PROMPT") + # Build prompt with printf + cat so every line stays indented within + # the YAML run: | block. Heredocs with unindented bodies terminate the + # YAML block scalar, breaking the workflow file entirely. + { + printf 'You are a senior engineer performing a code review.\n\n' + printf 'PR Title: %s\n' "$PR_TITLE" + printf 'Files changed: %s\n\n' "$CHANGED_FILES" + printf -- '---\n' + cat /tmp/codebase_index.txt + printf -- '---\n\n' + printf '## Changed file contents\n\n' + printf 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).\n' + printf 'Files over 500 lines show only changed sections with surrounding context.\n\n' + printf -- '---\n' + cat /tmp/pr_context.txt + printf -- '---\n\n' + printf '## Instructions\n\n' + printf 'Before raising any finding:\n' + printf '1. Confirm every symbol you cite exists in the CODEBASE INDEX or file\n' + printf ' contents above. If absent from both, discard the finding.\n' + printf '2. Quote the exact line(s) from the file contents that support it.\n' + printf '3. Confirm the issue is genuine, not intentional design.\n' + printf '4. If any step fails, discard silently -- do not mention it.\n\n' + printf 'Do NOT show reasoning. Only output confirmed issues.\n\n' + printf 'Severity:\n' + printf '- BLOCKER: fails to compile, corrupts data, or security vulnerability\n' + printf '- WARNING: real risk to address before merge\n' + printf '- SUGGESTION: minor improvement, follow-up PR fine\n\n' + printf 'Focus: security bugs, logic errors, data loss, injection, unhandled errors.\n' + printf 'Ignore: style, missing comments, speculative future concerns.\n\n' + printf '## Output format (strict)\n\n' + printf '**Summary** (2-3 sentences)\n\n' + printf '**Findings**\n' + printf '- [SEVERITY] file:line -- description\n' + printf ' Evidence: quoted line\n' + printf ' Fix: concrete change\n\n' + printf '(Write "No findings." if none.)\n\n' + printf '**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES\n' + } > /tmp/prompt.txt BODY=$(jq -cn \ --arg model "qwen3-coder-next" \ - --arg content "$PROMPT" \ + --rawfile content /tmp/prompt.txt \ '{model: $model, messages: [{role: "user", content: $content}], stream: false}') echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API (${#BODY} bytes)..." HTTP_CODE=$(curl -s --max-time 300 --connect-timeout 30 \ @@ -364,4 +328,4 @@ print(template.replace('__INDEX__', index).replace('__CONTEXT__', context), end= - name: Cleanup if: always() shell: bash - run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt + run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/prompt.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt From 03cda08a33bbf3081bc32b8823fe06747a037cb4 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:12:46 -0500 Subject: [PATCH 14/19] fix(ci): fix grep invalid range and printf invalid option in pr-review 1. SECRET_PATTERN had [A-Za-z0-9+/_\-!@#] -- backslash-escaped hyphen is invalid POSIX ERE; grep parsed it as a range with invalid bounds. Fix: move hyphen to end of class: [A-Za-z0-9+/_!@#-]. 2. printf -- '---\n' fails with 'invalid option' in bash because the builtin does not accept -- as end-of-options. Removed -- from all four printf calls. --- .gitea/workflows/pr-review.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 4b4689dd..824edcdc 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -59,7 +59,7 @@ jobs: # Secret scrubbing: match actual credential VALUES only — known API key formats, # or keyword="long_quoted_literal" (25+ chars). Never scrub on keyword alone, # which would silently delete function signatures, variable declarations, and tests. - SECRET_PATTERN='AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|(password|token|api_key|secret)[[:space:]]*=[[:space:]]*["'"'"'][A-Za-z0-9+/_\-!@#]{25,}["'"'"']' + SECRET_PATTERN='AKIA[A-Z0-9]{16}|gh[opsu]_[A-Za-z0-9_]{36,}|xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|(password|token|api_key|secret)[[:space:]]*=[[:space:]]*["'"'"'][A-Za-z0-9+/_!@#-]{25,}["'"'"']' # Only strip lines that are ENTIRELY a long base64 blob (e.g. PEM cert bodies) B64_PATTERN='^[[:space:]]*[A-Za-z0-9+/]{60,}={0,2}[[:space:]]*$' @@ -156,13 +156,13 @@ jobs: printf 'You are a senior engineer performing a code review.\n\n' printf 'PR Title: %s\n' "$PR_TITLE" printf 'Files changed: %s\n\n' "$CHANGED_FILES" - printf -- '---\n' + printf '---\n' cat /tmp/codebase_index.txt printf -- '---\n\n' printf '## Changed file contents\n\n' printf 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).\n' printf 'Files over 500 lines show only changed sections with surrounding context.\n\n' - printf -- '---\n' + printf '---\n' cat /tmp/pr_context.txt printf -- '---\n\n' printf '## Instructions\n\n' From 6c825b1c7371372fb41a967d4c2e59473038a1c8 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:18:02 -0500 Subject: [PATCH 15/19] fix(ci): remove remaining printf -- calls in Analyze with LLM step --- .gitea/workflows/pr-review.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 824edcdc..f3edfbe1 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -158,13 +158,13 @@ jobs: printf 'Files changed: %s\n\n' "$CHANGED_FILES" printf '---\n' cat /tmp/codebase_index.txt - printf -- '---\n\n' + printf '---\n\n' printf '## Changed file contents\n\n' printf 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).\n' printf 'Files over 500 lines show only changed sections with surrounding context.\n\n' printf '---\n' cat /tmp/pr_context.txt - printf -- '---\n\n' + printf '---\n\n' printf '## Instructions\n\n' printf 'Before raising any finding:\n' printf '1. Confirm every symbol you cite exists in the CODEBASE INDEX or file\n' From 84bb3a20c1777425ae4ca20f4ff1b1eb15849b3a Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:27:18 -0500 Subject: [PATCH 16/19] fix(ci): use printf '%s' form to avoid format strings starting with hyphen bash printf treats format strings starting with '-' as option flags in some environments. The POSIX-safe idiom is 'printf "%s\n" content' where the format is always "%s\n" and the content is an argument. Applied to all prompt printf calls. Also replaced '--' in prompt text with single '-' to eliminate any remaining double-dash ambiguity. --- .gitea/workflows/pr-review.yml | 66 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index f3edfbe1..1579bcab 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -149,44 +149,44 @@ jobs: set -euo pipefail CHANGED_FILES=$(tr '\n' ' ' < /tmp/pr_files.txt) - # Build prompt with printf + cat so every line stays indented within - # the YAML run: | block. Heredocs with unindented bodies terminate the - # YAML block scalar, breaking the workflow file entirely. + # Build prompt file. Use 'printf "%s\n" text' throughout so the format + # string is always "%s\n" and content with leading hyphens or embedded + # double-dashes is never misinterpreted as a printf option flag. { - printf 'You are a senior engineer performing a code review.\n\n' + printf '%s\n\n' 'You are a senior engineer performing a code review.' printf 'PR Title: %s\n' "$PR_TITLE" printf 'Files changed: %s\n\n' "$CHANGED_FILES" - printf '---\n' + printf '%s\n' '---' cat /tmp/codebase_index.txt - printf '---\n\n' - printf '## Changed file contents\n\n' - printf 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).\n' - printf 'Files over 500 lines show only changed sections with surrounding context.\n\n' - printf '---\n' + printf '%s\n\n' '---' + printf '%s\n\n' '## Changed file contents' + printf '%s\n' 'Each section is the COMPLETE, FINAL file after PR changes (not a diff).' + printf '%s\n\n' 'Files over 500 lines show only changed sections with surrounding context.' + printf '%s\n' '---' cat /tmp/pr_context.txt - printf '---\n\n' - printf '## Instructions\n\n' - printf 'Before raising any finding:\n' - printf '1. Confirm every symbol you cite exists in the CODEBASE INDEX or file\n' - printf ' contents above. If absent from both, discard the finding.\n' - printf '2. Quote the exact line(s) from the file contents that support it.\n' - printf '3. Confirm the issue is genuine, not intentional design.\n' - printf '4. If any step fails, discard silently -- do not mention it.\n\n' - printf 'Do NOT show reasoning. Only output confirmed issues.\n\n' - printf 'Severity:\n' - printf '- BLOCKER: fails to compile, corrupts data, or security vulnerability\n' - printf '- WARNING: real risk to address before merge\n' - printf '- SUGGESTION: minor improvement, follow-up PR fine\n\n' - printf 'Focus: security bugs, logic errors, data loss, injection, unhandled errors.\n' - printf 'Ignore: style, missing comments, speculative future concerns.\n\n' - printf '## Output format (strict)\n\n' - printf '**Summary** (2-3 sentences)\n\n' - printf '**Findings**\n' - printf '- [SEVERITY] file:line -- description\n' - printf ' Evidence: quoted line\n' - printf ' Fix: concrete change\n\n' - printf '(Write "No findings." if none.)\n\n' - printf '**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES\n' + printf '%s\n\n' '---' + printf '%s\n\n' '## Instructions' + printf '%s\n' 'Before raising any finding:' + printf '%s\n' '1. Confirm every symbol you cite exists in the CODEBASE INDEX or file' + printf '%s\n' ' contents above. If absent from both, discard the finding.' + printf '%s\n' '2. Quote the exact line(s) from the file contents that support it.' + printf '%s\n' '3. Confirm the issue is genuine, not intentional design.' + printf '%s\n\n' '4. If any step fails, discard silently - do not mention it.' + printf '%s\n\n' 'Do NOT show reasoning. Only output confirmed issues.' + printf '%s\n' 'Severity:' + printf '%s\n' '- BLOCKER: fails to compile, corrupts data, or security vulnerability' + printf '%s\n' '- WARNING: real risk to address before merge' + printf '%s\n\n' '- SUGGESTION: minor improvement, follow-up PR fine' + printf '%s\n\n' 'Focus: security bugs, logic errors, data loss, injection, unhandled errors.' + printf '%s\n\n' 'Ignore: style, missing comments, speculative future concerns.' + printf '%s\n\n' '## Output format (strict)' + printf '%s\n\n' '**Summary** (2-3 sentences)' + printf '%s\n' '**Findings**' + printf '%s\n' '- [SEVERITY] file:line - description' + printf '%s\n' ' Evidence: quoted line' + printf '%s\n\n' ' Fix: concrete change' + printf '%s\n\n' '(Write "No findings." if none.)' + printf '%s\n' '**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES' } > /tmp/prompt.txt BODY=$(jq -cn \ From 0057c570ba941d78dc2a1c0be449e85de130c971 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:32:16 -0500 Subject: [PATCH 17/19] fix(ci): write curl body to file to avoid ARG_MAX limit The 147KB JSON body was being passed as a shell argument to curl, hitting the kernel ARG_MAX limit. Write it to /tmp/body.json via jq redirection and use curl --data @/tmp/body.json instead. --- .gitea/workflows/pr-review.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index 1579bcab..c91e7bac 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -189,18 +189,20 @@ jobs: printf '%s\n' '**Verdict**: APPROVE / APPROVE WITH COMMENTS / REQUEST CHANGES' } > /tmp/prompt.txt - BODY=$(jq -cn \ + # Write body to file — passing 100KB+ JSON as a shell arg hits ARG_MAX. + jq -cn \ --arg model "qwen3-coder-next" \ --rawfile content /tmp/prompt.txt \ - '{model: $model, messages: [{role: "user", content: $content}], stream: false}') - echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API (${#BODY} bytes)..." + '{model: $model, messages: [{role: "user", content: $content}], stream: false}' \ + > /tmp/body.json + echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] PR #${PR_NUMBER} - Calling liteLLM API ($(wc -c < /tmp/body.json) bytes)..." HTTP_CODE=$(curl -s --max-time 300 --connect-timeout 30 \ --retry 3 --retry-delay 10 --retry-connrefused --retry-max-time 300 \ -o /tmp/llm_response.json -w "%{http_code}" \ -X POST "$LITELLM_URL/chat/completions" \ -H "Authorization: Bearer $LITELLM_API_KEY" \ -H "Content-Type: application/json" \ - -d "$BODY") + --data @/tmp/body.json) echo "HTTP status: $HTTP_CODE" echo "Response file size: $(wc -c < /tmp/llm_response.json) bytes" if [ "$HTTP_CODE" != "200" ]; then @@ -328,4 +330,4 @@ jobs: - name: Cleanup if: always() shell: bash - run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/prompt.txt /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt + run: rm -f /tmp/pr_diff.txt /tmp/pr_context.txt /tmp/codebase_index.txt /tmp/prompt.txt /tmp/body.json /tmp/llm_response.json /tmp/pr_review.txt /tmp/review_post_response.json /tmp/pr_files.txt From 26507ad3ff68006e62d0171b1ec4ae6e17c50315 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:37:10 -0500 Subject: [PATCH 18/19] fix(ci): install python3 in pr-review container (ubuntu:22.04 omits it) --- .gitea/workflows/pr-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/pr-review.yml b/.gitea/workflows/pr-review.yml index c91e7bac..5d3c39cd 100644 --- a/.gitea/workflows/pr-review.yml +++ b/.gitea/workflows/pr-review.yml @@ -18,7 +18,7 @@ jobs: shell: bash run: | set -euo pipefail - apt-get update -qq && apt-get install -y -qq git curl jq + apt-get update -qq && apt-get install -y -qq git curl jq python3 - name: Checkout code shell: bash From 1a9c3bd65a7175f600458dc5feb6d0439f133745 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 15:46:29 -0500 Subject: [PATCH 19/19] fix(sudo): enforce username scope and singleton row in sudo_config Fixes two bugs identified in the AI code review: 1. INSERT OR REPLACE with a freshly generated UUID never matches the existing primary key, so it appended rows instead of replacing. Switch to DELETE-then-INSERT to guarantee exactly one row. 2. Username defaulted to empty string. Resolve it to the current OS user (USER/LOGNAME env vars, fallback 'local') so credentials are always bound to a specific user identity. test_sudo_password now passes -u to sudo so the test runs scoped to the stored user, not an arbitrary one. UI: show the configured username prominently in status; relabel the field and add a scope hint below it. Tests: test_set_sudo_singleton_delete_then_insert, three username resolution tests. --- src-tauri/src/commands/system.rs | 117 ++++++++++++++++++++++++++++--- src/pages/Settings/Security.tsx | 14 ++-- 2 files changed, 116 insertions(+), 15 deletions(-) diff --git a/src-tauri/src/commands/system.rs b/src-tauri/src/commands/system.rs index d31a537d..9757747b 100644 --- a/src-tauri/src/commands/system.rs +++ b/src-tauri/src/commands/system.rs @@ -294,6 +294,17 @@ pub struct SudoConfigStatus { pub updated_at: String, } +/// Resolve the OS username to bind sudo credentials to. +fn resolve_sudo_username(provided: Option) -> String { + provided + .filter(|u| !u.trim().is_empty()) + .unwrap_or_else(|| { + std::env::var("USER") + .or_else(|_| std::env::var("LOGNAME")) + .unwrap_or_else(|_| "local".to_string()) + }) +} + #[tauri::command] pub async fn set_sudo_password( password: String, @@ -301,13 +312,18 @@ pub async fn set_sudo_password( state: tauri::State<'_, AppState>, ) -> Result<(), String> { let encrypted = crate::integrations::auth::encrypt_token(&password)?; + let uname = resolve_sudo_username(username); let db = state.db.lock().map_err(|e| e.to_string())?; - let id = uuid::Uuid::now_v7().to_string(); - let uname = username.unwrap_or_default(); + // DELETE then INSERT to guarantee exactly one row at all times. + // INSERT OR REPLACE with a freshly generated UUID never matches the + // existing primary key, so it inserts an additional row instead of + // replacing — this is the correct singleton pattern for SQLite. + db.execute("DELETE FROM sudo_config", []) + .map_err(|e| format!("Failed to clear sudo config: {e}"))?; db.execute( - "INSERT OR REPLACE INTO sudo_config (id, username, encrypted_password, created_at, updated_at) \ + "INSERT INTO sudo_config (id, username, encrypted_password, created_at, updated_at) \ VALUES (?1, ?2, ?3, datetime('now'), datetime('now'))", - rusqlite::params![id, uname, encrypted], + rusqlite::params![uuid::Uuid::now_v7().to_string(), uname, encrypted], ) .map_err(|e| format!("Failed to store sudo config: {e}"))?; Ok(()) @@ -342,16 +358,26 @@ pub async fn get_sudo_config_status( #[tauri::command] pub async fn test_sudo_password(state: tauri::State<'_, AppState>) -> Result { - let encrypted: Option = { + let (encrypted, stored_username) = { let db = state.db.lock().map_err(|e| e.to_string())?; - db.prepare("SELECT encrypted_password FROM sudo_config LIMIT 1") - .and_then(|mut stmt| stmt.query_row([], |row| row.get::<_, String>(0))) + db.prepare("SELECT encrypted_password, username FROM sudo_config LIMIT 1") + .and_then(|mut stmt| { + stmt.query_row([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + }) .ok() + .ok_or("No sudo password configured".to_string())? }; - let encrypted = encrypted.ok_or("No sudo password configured")?; let password = crate::integrations::auth::decrypt_token(&encrypted)?; - let result = crate::commands::agentic::run_sudo_command(&password, &["true"]) - .map_err(|e| format!("Sudo test failed: {e}"))?; + // Scope the test to the stored username so credentials can only be + // verified for the user they were saved for. + let result = if stored_username.is_empty() { + crate::commands::agentic::run_sudo_command(&password, &["true"]) + } else { + crate::commands::agentic::run_sudo_command(&password, &["-u", &stored_username, "true"]) + } + .map_err(|e| format!("Sudo test failed: {e}"))?; Ok(result.success) } @@ -362,3 +388,74 @@ pub async fn clear_sudo_password(state: tauri::State<'_, AppState>) -> Result<() .map_err(|e| format!("Failed to clear sudo config: {e}"))?; Ok(()) } + +#[cfg(test)] +mod sudo_tests { + use super::*; + + fn setup_db() -> rusqlite::Connection { + let conn = rusqlite::Connection::open_in_memory().unwrap(); + crate::db::migrations::run_migrations(&conn).unwrap(); + conn + } + + #[test] + fn test_set_sudo_singleton_delete_then_insert() { + let conn = setup_db(); + // Insert two stale rows directly to simulate the old broken behaviour + conn.execute( + "INSERT INTO sudo_config (id, username, encrypted_password) VALUES ('id1', 'alice', 'enc1')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO sudo_config (id, username, encrypted_password) VALUES ('id2', 'bob', 'enc2')", + [], + ) + .unwrap(); + let count: i64 = conn + .query_row("SELECT COUNT(*) FROM sudo_config", [], |r| r.get(0)) + .unwrap(); + assert_eq!(count, 2); + + // Apply the correct singleton pattern + conn.execute("DELETE FROM sudo_config", []).unwrap(); + conn.execute( + "INSERT INTO sudo_config (id, username, encrypted_password) VALUES ('id3', 'charlie', 'enc3')", + [], + ) + .unwrap(); + + let count: i64 = conn + .query_row("SELECT COUNT(*) FROM sudo_config", [], |r| r.get(0)) + .unwrap(); + assert_eq!(count, 1, "exactly one row must remain after set"); + + let username: String = conn + .query_row("SELECT username FROM sudo_config", [], |r| r.get(0)) + .unwrap(); + assert_eq!(username, "charlie"); + } + + #[test] + fn test_resolve_sudo_username_uses_provided() { + let result = resolve_sudo_username(Some("alice".to_string())); + assert_eq!(result, "alice"); + } + + #[test] + fn test_resolve_sudo_username_rejects_blank() { + let result = resolve_sudo_username(Some(" ".to_string())); + // blank string should fall through to env-based default + assert!(!result.trim().is_empty(), "username must never be blank"); + } + + #[test] + fn test_resolve_sudo_username_defaults_to_env() { + let env_user = std::env::var("USER") + .or_else(|_| std::env::var("LOGNAME")) + .unwrap_or_else(|_| "local".to_string()); + let result = resolve_sudo_username(None); + assert_eq!(result, env_user); + } +} diff --git a/src/pages/Settings/Security.tsx b/src/pages/Settings/Security.tsx index 598ecb03..0c486de7 100644 --- a/src/pages/Settings/Security.tsx +++ b/src/pages/Settings/Security.tsx @@ -173,9 +173,10 @@ export default function Security() { {sudoStatus?.configured && ( -

- Configured (last updated: {sudoStatus.updated_at}) -

+
+

Configured for {sudoStatus.username}

+

Last updated: {sudoStatus.updated_at}

+
)} {sudoStatus && !sudoStatus.configured && (

Not configured

@@ -183,16 +184,19 @@ export default function Security() {
setSudoUsername(e.target.value)} - placeholder="Leave empty for current user" + placeholder="Defaults to current OS user" className="mt-1 w-full rounded-md border border-input bg-background px-3 py-2 text-sm" /> +

+ Credentials are scoped to this user. Leave blank to use the current OS user. +