From f05b9542502905879126a7fb4b4e38bcbe2039e6 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:20:46 -0500 Subject: [PATCH] =?UTF-8?q?fix(security):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20move=20attachment=20handling=20to=20backend,=20auto?= =?UTF-8?q?-redact=20PII?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves all four findings from the automated review: [BLOCKER 1] Attachment PII scan error path left pendingFiles intact, allowing retry with stale file references. Fix: file content is no longer held in frontend state at all — PendingFile drops the content field entirely. logFileIds are captured before setPendingFiles([]) and passed directly to the backend. [BLOCKER 2] Raw file content stored in PendingFile.content created a UI-visible PII surface and a data-residency risk. Fix: frontend never reads or stores file content. The backend loads file data from disk, auto-redacts PII in-memory using pii::apply_redactions(), and embeds the clean text into the AI message. No PII ever touches the frontend. [WARNING 1] String-based attachment header parsing was fragile and bypassable. Fix: parsing is gone — backend identifies attachments by log_file_id, reads them directly from the DB/disk path, and applies redaction at that level. [WARNING 2] Error message disclosed PII type list to the caller. Fix: PII types are logged via tracing::warn only; no type details in the user-facing error or API response. Additionally: typed chat messages are now auto-redacted rather than blocked. scanTextForPiiCmd runs on the typed text; detected spans are replaced in reverse-offset order before the message is sent to the AI and stored in the DB. The user sees the redacted form in their chat bubble. Architecture: - chat_message now accepts log_file_ids: Option> - Backend reads file → detects PII → redacts in memory → embeds - Frontend: no readTextFile, no content field, no frontend PII gate --- src-tauri/src/commands/ai.rs | 91 ++++++++++++++++++++---------------- src/lib/tauriCommands.ts | 16 ++++++- src/pages/Triage/index.tsx | 86 ++++++++-------------------------- 3 files changed, 85 insertions(+), 108 deletions(-) diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 0989eaf4..ddbc7757 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -165,6 +165,7 @@ fn extract_list(text: &str, header: &str) -> Vec { pub async fn chat_message( issue_id: String, message: String, + log_file_ids: Option>, provider_config: ProviderConfig, system_prompt: Option, app_handle: tauri::AppHandle, @@ -233,6 +234,54 @@ pub async fn chat_message( .collect() }; + // Load attachment files from DB, scan for PII, and embed clean content into the message. + // File content never passes through the frontend — the backend is the single source of truth. + let full_message = { + let files: Vec<(String, String)> = if let Some(ref ids) = log_file_ids { + let db = state.db.lock().map_err(|e| e.to_string())?; + let mut v = Vec::new(); + for file_id in ids { + if let Ok((name, path)) = db + .prepare("SELECT file_name, file_path FROM log_files WHERE id = ?1") + .and_then(|mut s| { + s.query_row([file_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + }) + { + v.push((name, path)); + } + } + v + } else { + vec![] + }; + + let mut msg = message.clone(); + for (file_name, file_path) in &files { + let content = std::fs::read_to_string(file_path).unwrap_or_default(); + let preview = &content[..content.len().min(8000)]; + let spans = crate::pii::PiiDetector::new().detect(preview); + let body = if spans.is_empty() { + preview.to_string() + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + file_name = %file_name, + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in chat attachment — auto-redacting before AI send" + ); + crate::pii::apply_redactions(preview, &spans) + }; + msg.push_str(&format!("\n\n--- Attached: {} ---\n{}", file_name, body)); + } + msg + }; + let provider = create_provider(&provider_config); // Search integration sources for relevant context @@ -288,47 +337,9 @@ pub async fn chat_message( messages.push(context_message); } - // Defence-in-depth: reject messages containing unredacted file attachment content. - // The frontend gates this too, but we enforce it here as a hard stop. - if message.contains("--- Attached:") { - let mut in_attachment = false; - let mut body = String::new(); - for line in message.lines() { - if line.starts_with("--- Attached:") { - in_attachment = true; - } else if in_attachment { - body.push_str(line); - body.push('\n'); - } - } - - if !body.is_empty() { - let detector = crate::pii::PiiDetector::new(); - let spans = detector.detect(&body); - if !spans.is_empty() { - let mut types: Vec<&str> = { - use std::collections::HashSet; - spans - .iter() - .map(|s| s.pii_type.as_str()) - .collect::>() - .into_iter() - .collect() - }; - types.sort_unstable(); - return Err(format!( - "PII detected in attached file content ({} items: {}). \ - Use Log Analysis to redact before attaching to AI messages.", - spans.len(), - types.join(", ") - )); - } - } - } - messages.push(Message { role: "user".into(), - content: message.clone(), + content: full_message.clone(), tool_call_id: None, tool_calls: None, }); @@ -400,7 +411,7 @@ pub async fn chat_message( // Save both user message and response to DB { let db = state.db.lock().map_err(|e| e.to_string())?; - let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), message); + let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), full_message); let asst_msg = AiMessage::new( conversation_id, "assistant".to_string(), diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 136ad7c5..014049f5 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -271,8 +271,20 @@ export interface TriageMessage { export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) => invoke("analyze_logs", { issueId, logFileIds, providerConfig }); -export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig, systemPrompt?: string) => - invoke("chat_message", { issueId, message, providerConfig, systemPrompt: systemPrompt ?? null }); +export const chatMessageCmd = ( + issueId: string, + message: string, + logFileIds: string[], + providerConfig: ProviderConfig, + systemPrompt?: string +) => + invoke("chat_message", { + issueId, + message, + logFileIds: logFileIds.length > 0 ? logFileIds : null, + providerConfig, + systemPrompt: systemPrompt ?? null, + }); export const listProvidersCmd = () => invoke("list_providers"); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index 26055180..26e160c9 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -2,14 +2,12 @@ import { useEffect, useRef, useState } from "react"; import { useParams, useNavigate } from "react-router-dom"; import { CheckCircle, ChevronRight } from "lucide-react"; import { open } from "@tauri-apps/plugin-dialog"; -import { readTextFile } from "@tauri-apps/plugin-fs"; import { ChatWindow } from "@/components/ChatWindow"; import { TriageProgress } from "@/components/TriageProgress"; import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, - detectPiiCmd, scanTextForPiiCmd, getIssueCmd, getIssueMessagesCmd, @@ -36,7 +34,7 @@ function isCloseIntent(message: string): boolean { return CLOSE_PATTERNS.some((p) => lower.includes(p)); } -type PendingFile = { name: string; content: string | null; logFileId: string }; +type PendingFile = { name: string; logFileId: string }; export default function Triage() { const { id } = useParams<{ id: string }>(); @@ -47,10 +45,6 @@ export default function Triage() { // Track the last user message so we can save it as a resolution step when why level advances const lastUserMsgRef = useRef(""); const initialized = useRef(false); - // PII warning state: if the user sends a message that triggers a PII warning, - // store it here. Sending the same message a second time bypasses the warning - // (explicit acknowledgment). Cleared after each successful send. - const piiWarnedMessageRef = useRef(""); const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } = useSessionStore(); @@ -107,14 +101,7 @@ export default function Triage() { const paths = Array.isArray(selected) ? selected : [selected]; for (const filePath of paths) { const logFile = await uploadLogFileCmd(id, filePath); - let content: string | null = null; - try { - const raw = await readTextFile(filePath); - content = raw.slice(0, 8000); // cap at 8 KB to keep context manageable - } catch { - // Binary file (image) — include filename only as context - } - setPendingFiles((prev) => [...prev, { name: logFile.file_name, content, logFileId: logFile.id }]); + setPendingFiles((prev) => [...prev, { name: logFile.file_name, logFileId: logFile.id }]); } } catch (e) { setError(`Attachment failed: ${String(e)}`); @@ -148,62 +135,29 @@ export default function Triage() { setIsLoading(true); setError(null); - // PII gate: scan each text attachment before it is sent to an AI provider. - // Images (content === null) have no text to scan. - for (const f of pendingFiles) { - if (f.content === null) continue; - try { - const result = await detectPiiCmd(f.logFileId); - if (result.total_pii_found > 0) { - const types = [...new Set(result.detections.map((d) => d.pii_type))].join(", "); - setError( - `PII detected in "${f.name}" (${result.total_pii_found} item${result.total_pii_found !== 1 ? "s" : ""}: ${types}). ` + - `Open Log Analysis to redact before attaching.` - ); - setIsLoading(false); - return; - } - } catch (e) { - setError(`PII scan failed for "${f.name}": ${String(e)}`); - setIsLoading(false); - return; - } - } - - // PII warning for typed message text. Unlike attachments (hard block), typed - // messages only warn once — sending the same text a second time is treated as - // explicit acknowledgment and proceeds. - if (message.trim() && message !== piiWarnedMessageRef.current) { + // Auto-redact PII in typed message text before sending to AI. + // Spans are replaced in reverse-start-offset order to preserve byte positions. + let outMessage = message; + if (message.trim()) { try { const textResult = await scanTextForPiiCmd(message); if (textResult.total_pii_found > 0) { - const types = [...new Set(textResult.detections.map((d) => d.pii_type))].join(", "); - piiWarnedMessageRef.current = message; - setError( - `Your message may contain sensitive data (${types} detected). ` + - `Edit your message to remove it, or send again to proceed.` - ); - setIsLoading(false); - return; + const sorted = [...textResult.detections].sort((a, b) => b.start - a.start); + let redacted = message; + for (const span of sorted) { + redacted = redacted.slice(0, span.start) + span.replacement + redacted.slice(span.end); + } + outMessage = redacted; } } catch { - // Non-fatal: if the scan fails, do not block the send + // Non-fatal: if the scan fails, send original } } - // Build AI context: user text + file contents; show only text + filenames in chat UI - const fileContext = pendingFiles - .map((f) => - f.content - ? `\n\n--- Attached: ${f.name} ---\n${f.content}` - : `\n\n[Image attached: ${f.name} — describe what you see if relevant]` - ) - .join(""); - const aiMessage = pendingFiles.length > 0 ? `${message}${fileContext}` : message; const displayContent = pendingFiles.length > 0 - ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` - : message; + ? `${outMessage}${outMessage ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + : outMessage; const userMsg: TriageMessage = { id: `user-${Date.now()}`, @@ -213,23 +167,24 @@ export default function Triage() { why_level: currentWhyLevel, created_at: Date.now(), }; - lastUserMsgRef.current = message; + lastUserMsgRef.current = outMessage; addMessage(userMsg); + const logFileIds = pendingFiles.map((f) => f.logFileId); setPendingFiles([]); try { // Detect domain from conversation messages const messageContents = messages.map((m) => m.content); const detectedDomain = detectDomain(messageContents); - + // Update active domain if it has changed if (detectedDomain !== activeDomain && detectedDomain !== "general") { setActiveDomain(detectedDomain); } - + // Use the active domain for the system prompt const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; - const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt); + const response = await chatMessageCmd(id, outMessage, logFileIds, provider, systemPrompt); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, @@ -263,7 +218,6 @@ export default function Triage() { setError(String(e)); } finally { setIsLoading(false); - piiWarnedMessageRef.current = ""; } };