From a04d6fc8f573215ecc2fa0de01c42db5d1a00573 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:36:44 -0500 Subject: [PATCH] fix(security): backend-only PII redaction; fix fmt CI failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves all three findings from the second automated review and fixes the cargo fmt --check CI failure (formatting drift in analysis.rs from a prior merge). [BLOCKER 1 + BLOCKER 2 + WARNING] Frontend no longer performs any PII scanning or redaction. All three concerns stemmed from the same root cause: outMessage was derived on the frontend and used for display, DB storage (via lastUserMsgRef and the chat bubble), and the AI payload — causing the original message to be silently replaced before the backend received it. Fix: frontend sends the original message verbatim. Backend is now the sole authority. chat_message auto-redacts the typed message text using PiiDetector + apply_redactions() before building the full payload, logs the PII types via tracing::warn, and stores only the redacted form in ai_messages and the audit log. The redacted form is returned to the caller as ChatResponse.user_message (Option, absent from direct provider calls). Frontend uses message (original) for the chat bubble and lastUserMsgRef — resolution steps show natural language, not [Password] tokens. The AI and DB see only the redacted version. CI fix: cargo fmt applied to analysis.rs; all format checks now pass. --- src-tauri/src/ai/anthropic.rs | 1 + src-tauri/src/ai/gemini.rs | 1 + src-tauri/src/ai/mistral.rs | 1 + src-tauri/src/ai/mod.rs | 4 ++++ src-tauri/src/ai/ollama.rs | 1 + src-tauri/src/ai/openai.rs | 2 ++ src-tauri/src/commands/ai.rs | 33 +++++++++++++++++++++++++++++---- src/lib/tauriCommands.ts | 2 ++ src/pages/Triage/index.tsx | 29 +++++------------------------ 9 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src-tauri/src/ai/anthropic.rs b/src-tauri/src/ai/anthropic.rs index cc329cf4..97f2f817 100644 --- a/src-tauri/src/ai/anthropic.rs +++ b/src-tauri/src/ai/anthropic.rs @@ -116,6 +116,7 @@ impl Provider for AnthropicProvider { content, model, usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/gemini.rs b/src-tauri/src/ai/gemini.rs index 27cc8422..3555e6fe 100644 --- a/src-tauri/src/ai/gemini.rs +++ b/src-tauri/src/ai/gemini.rs @@ -119,6 +119,7 @@ impl Provider for GeminiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mistral.rs b/src-tauri/src/ai/mistral.rs index f1a6745c..5df2f809 100644 --- a/src-tauri/src/ai/mistral.rs +++ b/src-tauri/src/ai/mistral.rs @@ -84,6 +84,7 @@ impl Provider for MistralProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index 3492dd97..092cd6b9 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -30,6 +30,10 @@ pub struct ChatResponse { pub content: String, pub model: String, pub usage: Option, + /// The user message as it was stored in the DB (may be auto-redacted). + /// Set by chat_message; absent from direct provider calls. + #[serde(skip_serializing_if = "Option::is_none")] + pub user_message: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, } diff --git a/src-tauri/src/ai/ollama.rs b/src-tauri/src/ai/ollama.rs index 54b3a331..1bde9746 100644 --- a/src-tauri/src/ai/ollama.rs +++ b/src-tauri/src/ai/ollama.rs @@ -100,6 +100,7 @@ impl Provider for OllamaProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/openai.rs b/src-tauri/src/ai/openai.rs index 73fc12b4..4d83bb31 100644 --- a/src-tauri/src/ai/openai.rs +++ b/src-tauri/src/ai/openai.rs @@ -197,6 +197,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls, }) } @@ -397,6 +398,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage: None, // This custom REST contract doesn't provide token usage in response + user_message: None, tool_calls, }) } diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index ddbc7757..a3545fec 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -234,9 +234,29 @@ pub async fn chat_message( .collect() }; - // Load attachment files from DB, scan for PII, and embed clean content into the message. - // File content never passes through the frontend — the backend is the single source of truth. + // Auto-redact PII in both the typed message and any file attachments. + // The backend is the sole authority for redaction; the frontend sends original content. let full_message = { + // Step 1: redact the typed user message text. + let base = { + let spans = crate::pii::PiiDetector::new().detect(&message); + if spans.is_empty() { + message.clone() + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in typed chat message — auto-redacting before AI send" + ); + crate::pii::apply_redactions(&message, &spans) + } + }; + + // Step 2: load attachment files from DB, scan, and embed clean content. let files: Vec<(String, String)> = if let Some(ref ids) = log_file_ids { let db = state.db.lock().map_err(|e| e.to_string())?; let mut v = Vec::new(); @@ -257,7 +277,7 @@ pub async fn chat_message( vec![] }; - let mut msg = message.clone(); + let mut msg = base; for (file_name, file_path) in &files { let content = std::fs::read_to_string(file_path).unwrap_or_default(); let preview = &content[..content.len().min(8000)]; @@ -409,9 +429,11 @@ pub async fn chat_message( } // Save both user message and response to DB + let stored_user_message; { let db = state.db.lock().map_err(|e| e.to_string())?; let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), full_message); + stored_user_message = user_msg.content.clone(); let asst_msg = AiMessage::new( conversation_id, "assistant".to_string(), @@ -468,7 +490,10 @@ pub async fn chat_message( } } - Ok(final_response) + Ok(crate::ai::ChatResponse { + user_message: Some(stored_user_message), + ..final_response + }) } #[tauri::command] diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 014049f5..62583d09 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -34,6 +34,8 @@ export interface ChatResponse { content: string; model: string; usage?: TokenUsage; + /** What was stored in the DB — may be auto-redacted. Use this for display and history. */ + user_message?: string; } export interface AnalysisResult { diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index 26e160c9..cfbe0daf 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -8,7 +8,6 @@ import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, - scanTextForPiiCmd, getIssueCmd, getIssueMessagesCmd, uploadLogFileCmd, @@ -135,29 +134,10 @@ export default function Triage() { setIsLoading(true); setError(null); - // Auto-redact PII in typed message text before sending to AI. - // Spans are replaced in reverse-start-offset order to preserve byte positions. - let outMessage = message; - if (message.trim()) { - try { - const textResult = await scanTextForPiiCmd(message); - if (textResult.total_pii_found > 0) { - const sorted = [...textResult.detections].sort((a, b) => b.start - a.start); - let redacted = message; - for (const span of sorted) { - redacted = redacted.slice(0, span.start) + span.replacement + redacted.slice(span.end); - } - outMessage = redacted; - } - } catch { - // Non-fatal: if the scan fails, send original - } - } - const displayContent = pendingFiles.length > 0 - ? `${outMessage}${outMessage ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` - : outMessage; + ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + : message; const userMsg: TriageMessage = { id: `user-${Date.now()}`, @@ -167,7 +147,7 @@ export default function Triage() { why_level: currentWhyLevel, created_at: Date.now(), }; - lastUserMsgRef.current = outMessage; + lastUserMsgRef.current = message; addMessage(userMsg); const logFileIds = pendingFiles.map((f) => f.logFileId); setPendingFiles([]); @@ -184,7 +164,8 @@ export default function Triage() { // Use the active domain for the system prompt const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; - const response = await chatMessageCmd(id, outMessage, logFileIds, provider, systemPrompt); + // Backend auto-redacts PII in both message text and attachments before sending to AI. + const response = await chatMessageCmd(id, message, logFileIds, provider, systemPrompt); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id,