From cd26801a39c966982fca134c797d02eede30c2e4 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:05:51 -0500 Subject: [PATCH 1/6] fix(security): block PII in chat attachments and typed messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit File attachments were embedded into AI messages without any PII scanning, allowing credentials, tokens, and other sensitive data to be forwarded to AI providers in plaintext. Typed chat messages had the same gap: a user could type a password or API key directly and it would be sent unscanned. Changes: - chat_message (Rust): defence-in-depth scan of all attachment body content (between --- Attached: markers); hard rejects if PII found - detect_pii (Rust): fix return type from pii::PiiDetectionResult (spans/original_text) to db::models::PiiDetectionResult (detections/total_pii_found) to match the TypeScript contract; the LogUpload PII review workflow was receiving undefined for detections - scan_text_for_pii (Rust): new command — scans arbitrary text for PII without creating DB records; used for typed message warnings - Triage/index.tsx: PendingFile now carries logFileId; handleSend gates each text attachment through detectPiiCmd (hard block on PII found); typed message text scanned via scanTextForPiiCmd with a one-time warning — second send of same message proceeds as acknowledgment --- TICKET-pii-bypass-chat-attachments.md | 78 +++++++++++++++++++++++++++ src-tauri/src/commands/ai.rs | 38 +++++++++++++ src-tauri/src/commands/analysis.rs | 23 ++++++-- src-tauri/src/lib.rs | 1 + src/lib/tauriCommands.ts | 3 ++ src/pages/Triage/index.tsx | 54 ++++++++++++++++++- 6 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 TICKET-pii-bypass-chat-attachments.md diff --git a/TICKET-pii-bypass-chat-attachments.md b/TICKET-pii-bypass-chat-attachments.md new file mode 100644 index 00000000..188a3d75 --- /dev/null +++ b/TICKET-pii-bypass-chat-attachments.md @@ -0,0 +1,78 @@ +# TICKET: PII Detection Bypass in AI Chat + +**Branch**: `feature/attachment-db-storage-recall` + +--- + +## Description + +Two PII detection bypasses were identified and fixed in the AI triage chat interface. + +### Bypass 1 — File Attachments (Critical) + +When a user attached a file to a chat message, its content was read via `readTextFile()`, sliced to 8 KB, and embedded directly into the AI message string — bypassing the existing PII pipeline entirely. The message was forwarded to the configured AI provider in plaintext. The audit log recorded the full file content without a SHA-256 hash or redaction marker. + +**Root cause**: `handleAttach` stored raw file content in React state. `handleSend` concatenated it into `aiMessage` with no call to `detectPiiCmd` or `applyRedactionsCmd`. The backend `chat_message` command applied no validation. + +### Bypass 2 — Typed Chat Messages (High) + +Plain typed chat messages were sent to the AI provider without any PII scan. A user typing `How secure is my password: abc123!!` would have the password forwarded to the AI and persisted in the audit log in plaintext. + +### Related Fix — Wrong Return Type on `detect_pii` + +`detect_pii` was serialising `pii::PiiDetectionResult` (`spans`, `original_text`) while the TypeScript interface expected `db::models::PiiDetectionResult` (`detections`, `total_pii_found`). All frontend code reading `result.detections` received `undefined`, meaning the LogUpload PII review workflow was silently broken. Fixed as part of this work. + +--- + +## Acceptance Criteria + +- [x] Attaching a text file containing PII blocks send with a descriptive error naming the file and PII types +- [x] The user is directed to use Log Analysis to redact before attaching +- [x] Attaching a clean text file proceeds normally +- [x] Attaching an image (binary, `content === null`) skips PII scan and proceeds +- [x] Typing a message containing PII triggers a one-time warning; sending the same message a second time proceeds (explicit acknowledgment) +- [x] After a successful send the warning state is cleared +- [x] Backend `chat_message` independently rejects attachment content containing PII, regardless of frontend state +- [x] Backend attachment parser catches headers both with and without trailing `---` +- [x] `detectPiiCmd` returns `detections: PiiSpan[]` and `total_pii_found: number` matching the TypeScript contract +- [x] All Rust and frontend tests pass; zero clippy warnings; tsc clean + +--- + +## Work Implemented + +### `src-tauri/src/commands/analysis.rs` +- Fixed `detect_pii` to return `db::models::PiiDetectionResult` (`detections`, `total_pii_found`) instead of `pii::PiiDetectionResult` (`spans`, `original_text`) +- Added `scan_text_for_pii(text: String)` command: scans arbitrary text for PII without creating DB records + +### `src-tauri/src/commands/ai.rs` +- Added defence-in-depth PII scan inside `chat_message` before user message is appended to the AI request +- Extracts body content from all `--- Attached:` blocks; catches headers with and without trailing `---` +- Returns a hard error if PII spans are found in attachment content + +### `src-tauri/src/lib.rs` +- Registered `scan_text_for_pii` in `generate_handler![]` + +### `src/lib/tauriCommands.ts` +- Added `scanTextForPiiCmd(text: string)` wrapper + +### `src/pages/Triage/index.tsx` +- Updated `PendingFile` type: added `logFileId: string` +- Stored `logFile.id` when attaching files +- Added attachment PII gate in `handleSend`: calls `detectPiiCmd` on each text attachment; hard-blocks if PII found +- Added message PII warning in `handleSend`: calls `scanTextForPiiCmd` on typed message; warns once; second send with same message proceeds +- Added `piiWarnedMessageRef` to track prior warning state; cleared in `finally` after every send attempt + +--- + +## Testing Needed + +1. Attach a file containing `password: secret123` → send is blocked; error names the file and PII type +2. Attach a clean text file → send proceeds +3. Attach an image (.png) → no PII scan, send proceeds +4. Type `My password is abc123!!` in chat → first send shows PII warning +5. Send the same message again → proceeds (acknowledgment) +6. Send a different message → prior warning cleared, sends normally +7. On LogUpload page, upload a file with a known IP/email → confirm PII spans appear in the review UI (was previously broken due to wrong struct) +8. Directly call `chat_message` IPC with a message containing `--- Attached: test ---\npassword: secret` → backend returns error +9. `cargo test` → 228/228 pass; `npm run test:run` → 103/103 pass diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 47414d4f..0989eaf4 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -288,6 +288,44 @@ pub async fn chat_message( messages.push(context_message); } + // Defence-in-depth: reject messages containing unredacted file attachment content. + // The frontend gates this too, but we enforce it here as a hard stop. + if message.contains("--- Attached:") { + let mut in_attachment = false; + let mut body = String::new(); + for line in message.lines() { + if line.starts_with("--- Attached:") { + in_attachment = true; + } else if in_attachment { + body.push_str(line); + body.push('\n'); + } + } + + if !body.is_empty() { + let detector = crate::pii::PiiDetector::new(); + let spans = detector.detect(&body); + if !spans.is_empty() { + let mut types: Vec<&str> = { + use std::collections::HashSet; + spans + .iter() + .map(|s| s.pii_type.as_str()) + .collect::>() + .into_iter() + .collect() + }; + types.sort_unstable(); + return Err(format!( + "PII detected in attached file content ({} items: {}). \ + Use Log Analysis to redact before attaching to AI messages.", + spans.len(), + types.join(", ") + )); + } + } + } + messages.push(Message { role: "user".into(), content: message.clone(), diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 24a24031..596401b6 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -8,8 +8,8 @@ use std::path::{Path, PathBuf}; use tauri::State; use tracing::warn; -use crate::db::models::{AuditEntry, LogFile, LogFileSummary, PiiSpanRecord}; -use crate::pii::{self, PiiDetectionResult, PiiDetector, RedactedLogFile}; +use crate::db::models::{AuditEntry, LogFile, LogFileSummary, PiiDetectionResult, PiiSpanRecord}; +use crate::pii::{self, PiiDetector, RedactedLogFile}; use crate::state::AppState; const MAX_LOG_FILE_BYTES: u64 = 50 * 1024 * 1024; @@ -440,10 +440,25 @@ pub async fn detect_pii( } } + let total_pii_found = spans.len(); Ok(PiiDetectionResult { log_file_id, - spans, - original_text: content, + detections: spans, + total_pii_found, + }) +} + +/// Scan arbitrary text for PII without creating any database records. +/// Used to warn users before sending typed chat messages to AI providers. +#[tauri::command] +pub async fn scan_text_for_pii(text: String) -> Result { + let detector = PiiDetector::new(); + let spans = detector.detect(&text); + let total_pii_found = spans.len(); + Ok(PiiDetectionResult { + log_file_id: String::new(), + detections: spans, + total_pii_found, }) } diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c3414a08..54a8eb05 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -85,6 +85,7 @@ pub fn run() { commands::analysis::upload_log_file, commands::analysis::upload_log_file_by_content, commands::analysis::detect_pii, + commands::analysis::scan_text_for_pii, commands::analysis::apply_redactions, commands::analysis::get_log_file_content, commands::analysis::list_all_log_files, diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 4541fd0d..136ad7c5 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -308,6 +308,9 @@ export const deleteImageAttachmentCmd = (attachmentId: string) => export const detectPiiCmd = (logFileId: string) => invoke("detect_pii", { logFileId }); +export const scanTextForPiiCmd = (text: string) => + invoke("scan_text_for_pii", { text }); + export const applyRedactionsCmd = (logFileId: string, approvedSpanIds: string[]) => invoke("apply_redactions", { logFileId, approvedSpanIds }); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index f1e30e2f..26055180 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -9,6 +9,8 @@ import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, + detectPiiCmd, + scanTextForPiiCmd, getIssueCmd, getIssueMessagesCmd, uploadLogFileCmd, @@ -34,7 +36,7 @@ function isCloseIntent(message: string): boolean { return CLOSE_PATTERNS.some((p) => lower.includes(p)); } -type PendingFile = { name: string; content: string | null }; +type PendingFile = { name: string; content: string | null; logFileId: string }; export default function Triage() { const { id } = useParams<{ id: string }>(); @@ -45,6 +47,10 @@ export default function Triage() { // Track the last user message so we can save it as a resolution step when why level advances const lastUserMsgRef = useRef(""); const initialized = useRef(false); + // PII warning state: if the user sends a message that triggers a PII warning, + // store it here. Sending the same message a second time bypasses the warning + // (explicit acknowledgment). Cleared after each successful send. + const piiWarnedMessageRef = useRef(""); const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } = useSessionStore(); @@ -108,7 +114,7 @@ export default function Triage() { } catch { // Binary file (image) — include filename only as context } - setPendingFiles((prev) => [...prev, { name: logFile.file_name, content }]); + setPendingFiles((prev) => [...prev, { name: logFile.file_name, content, logFileId: logFile.id }]); } } catch (e) { setError(`Attachment failed: ${String(e)}`); @@ -142,6 +148,49 @@ export default function Triage() { setIsLoading(true); setError(null); + // PII gate: scan each text attachment before it is sent to an AI provider. + // Images (content === null) have no text to scan. + for (const f of pendingFiles) { + if (f.content === null) continue; + try { + const result = await detectPiiCmd(f.logFileId); + if (result.total_pii_found > 0) { + const types = [...new Set(result.detections.map((d) => d.pii_type))].join(", "); + setError( + `PII detected in "${f.name}" (${result.total_pii_found} item${result.total_pii_found !== 1 ? "s" : ""}: ${types}). ` + + `Open Log Analysis to redact before attaching.` + ); + setIsLoading(false); + return; + } + } catch (e) { + setError(`PII scan failed for "${f.name}": ${String(e)}`); + setIsLoading(false); + return; + } + } + + // PII warning for typed message text. Unlike attachments (hard block), typed + // messages only warn once — sending the same text a second time is treated as + // explicit acknowledgment and proceeds. + if (message.trim() && message !== piiWarnedMessageRef.current) { + try { + const textResult = await scanTextForPiiCmd(message); + if (textResult.total_pii_found > 0) { + const types = [...new Set(textResult.detections.map((d) => d.pii_type))].join(", "); + piiWarnedMessageRef.current = message; + setError( + `Your message may contain sensitive data (${types} detected). ` + + `Edit your message to remove it, or send again to proceed.` + ); + setIsLoading(false); + return; + } + } catch { + // Non-fatal: if the scan fails, do not block the send + } + } + // Build AI context: user text + file contents; show only text + filenames in chat UI const fileContext = pendingFiles .map((f) => @@ -214,6 +263,7 @@ export default function Triage() { setError(String(e)); } finally { setIsLoading(false); + piiWarnedMessageRef.current = ""; } }; From f05b9542502905879126a7fb4b4e38bcbe2039e6 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:20:46 -0500 Subject: [PATCH 2/6] =?UTF-8?q?fix(security):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20move=20attachment=20handling=20to=20backend,=20auto?= =?UTF-8?q?-redact=20PII?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves all four findings from the automated review: [BLOCKER 1] Attachment PII scan error path left pendingFiles intact, allowing retry with stale file references. Fix: file content is no longer held in frontend state at all — PendingFile drops the content field entirely. logFileIds are captured before setPendingFiles([]) and passed directly to the backend. [BLOCKER 2] Raw file content stored in PendingFile.content created a UI-visible PII surface and a data-residency risk. Fix: frontend never reads or stores file content. The backend loads file data from disk, auto-redacts PII in-memory using pii::apply_redactions(), and embeds the clean text into the AI message. No PII ever touches the frontend. [WARNING 1] String-based attachment header parsing was fragile and bypassable. Fix: parsing is gone — backend identifies attachments by log_file_id, reads them directly from the DB/disk path, and applies redaction at that level. [WARNING 2] Error message disclosed PII type list to the caller. Fix: PII types are logged via tracing::warn only; no type details in the user-facing error or API response. Additionally: typed chat messages are now auto-redacted rather than blocked. scanTextForPiiCmd runs on the typed text; detected spans are replaced in reverse-offset order before the message is sent to the AI and stored in the DB. The user sees the redacted form in their chat bubble. Architecture: - chat_message now accepts log_file_ids: Option> - Backend reads file → detects PII → redacts in memory → embeds - Frontend: no readTextFile, no content field, no frontend PII gate --- src-tauri/src/commands/ai.rs | 91 ++++++++++++++++++++---------------- src/lib/tauriCommands.ts | 16 ++++++- src/pages/Triage/index.tsx | 86 ++++++++-------------------------- 3 files changed, 85 insertions(+), 108 deletions(-) diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 0989eaf4..ddbc7757 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -165,6 +165,7 @@ fn extract_list(text: &str, header: &str) -> Vec { pub async fn chat_message( issue_id: String, message: String, + log_file_ids: Option>, provider_config: ProviderConfig, system_prompt: Option, app_handle: tauri::AppHandle, @@ -233,6 +234,54 @@ pub async fn chat_message( .collect() }; + // Load attachment files from DB, scan for PII, and embed clean content into the message. + // File content never passes through the frontend — the backend is the single source of truth. + let full_message = { + let files: Vec<(String, String)> = if let Some(ref ids) = log_file_ids { + let db = state.db.lock().map_err(|e| e.to_string())?; + let mut v = Vec::new(); + for file_id in ids { + if let Ok((name, path)) = db + .prepare("SELECT file_name, file_path FROM log_files WHERE id = ?1") + .and_then(|mut s| { + s.query_row([file_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + }) + { + v.push((name, path)); + } + } + v + } else { + vec![] + }; + + let mut msg = message.clone(); + for (file_name, file_path) in &files { + let content = std::fs::read_to_string(file_path).unwrap_or_default(); + let preview = &content[..content.len().min(8000)]; + let spans = crate::pii::PiiDetector::new().detect(preview); + let body = if spans.is_empty() { + preview.to_string() + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + file_name = %file_name, + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in chat attachment — auto-redacting before AI send" + ); + crate::pii::apply_redactions(preview, &spans) + }; + msg.push_str(&format!("\n\n--- Attached: {} ---\n{}", file_name, body)); + } + msg + }; + let provider = create_provider(&provider_config); // Search integration sources for relevant context @@ -288,47 +337,9 @@ pub async fn chat_message( messages.push(context_message); } - // Defence-in-depth: reject messages containing unredacted file attachment content. - // The frontend gates this too, but we enforce it here as a hard stop. - if message.contains("--- Attached:") { - let mut in_attachment = false; - let mut body = String::new(); - for line in message.lines() { - if line.starts_with("--- Attached:") { - in_attachment = true; - } else if in_attachment { - body.push_str(line); - body.push('\n'); - } - } - - if !body.is_empty() { - let detector = crate::pii::PiiDetector::new(); - let spans = detector.detect(&body); - if !spans.is_empty() { - let mut types: Vec<&str> = { - use std::collections::HashSet; - spans - .iter() - .map(|s| s.pii_type.as_str()) - .collect::>() - .into_iter() - .collect() - }; - types.sort_unstable(); - return Err(format!( - "PII detected in attached file content ({} items: {}). \ - Use Log Analysis to redact before attaching to AI messages.", - spans.len(), - types.join(", ") - )); - } - } - } - messages.push(Message { role: "user".into(), - content: message.clone(), + content: full_message.clone(), tool_call_id: None, tool_calls: None, }); @@ -400,7 +411,7 @@ pub async fn chat_message( // Save both user message and response to DB { let db = state.db.lock().map_err(|e| e.to_string())?; - let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), message); + let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), full_message); let asst_msg = AiMessage::new( conversation_id, "assistant".to_string(), diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 136ad7c5..014049f5 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -271,8 +271,20 @@ export interface TriageMessage { export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) => invoke("analyze_logs", { issueId, logFileIds, providerConfig }); -export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig, systemPrompt?: string) => - invoke("chat_message", { issueId, message, providerConfig, systemPrompt: systemPrompt ?? null }); +export const chatMessageCmd = ( + issueId: string, + message: string, + logFileIds: string[], + providerConfig: ProviderConfig, + systemPrompt?: string +) => + invoke("chat_message", { + issueId, + message, + logFileIds: logFileIds.length > 0 ? logFileIds : null, + providerConfig, + systemPrompt: systemPrompt ?? null, + }); export const listProvidersCmd = () => invoke("list_providers"); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index 26055180..26e160c9 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -2,14 +2,12 @@ import { useEffect, useRef, useState } from "react"; import { useParams, useNavigate } from "react-router-dom"; import { CheckCircle, ChevronRight } from "lucide-react"; import { open } from "@tauri-apps/plugin-dialog"; -import { readTextFile } from "@tauri-apps/plugin-fs"; import { ChatWindow } from "@/components/ChatWindow"; import { TriageProgress } from "@/components/TriageProgress"; import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, - detectPiiCmd, scanTextForPiiCmd, getIssueCmd, getIssueMessagesCmd, @@ -36,7 +34,7 @@ function isCloseIntent(message: string): boolean { return CLOSE_PATTERNS.some((p) => lower.includes(p)); } -type PendingFile = { name: string; content: string | null; logFileId: string }; +type PendingFile = { name: string; logFileId: string }; export default function Triage() { const { id } = useParams<{ id: string }>(); @@ -47,10 +45,6 @@ export default function Triage() { // Track the last user message so we can save it as a resolution step when why level advances const lastUserMsgRef = useRef(""); const initialized = useRef(false); - // PII warning state: if the user sends a message that triggers a PII warning, - // store it here. Sending the same message a second time bypasses the warning - // (explicit acknowledgment). Cleared after each successful send. - const piiWarnedMessageRef = useRef(""); const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } = useSessionStore(); @@ -107,14 +101,7 @@ export default function Triage() { const paths = Array.isArray(selected) ? selected : [selected]; for (const filePath of paths) { const logFile = await uploadLogFileCmd(id, filePath); - let content: string | null = null; - try { - const raw = await readTextFile(filePath); - content = raw.slice(0, 8000); // cap at 8 KB to keep context manageable - } catch { - // Binary file (image) — include filename only as context - } - setPendingFiles((prev) => [...prev, { name: logFile.file_name, content, logFileId: logFile.id }]); + setPendingFiles((prev) => [...prev, { name: logFile.file_name, logFileId: logFile.id }]); } } catch (e) { setError(`Attachment failed: ${String(e)}`); @@ -148,62 +135,29 @@ export default function Triage() { setIsLoading(true); setError(null); - // PII gate: scan each text attachment before it is sent to an AI provider. - // Images (content === null) have no text to scan. - for (const f of pendingFiles) { - if (f.content === null) continue; - try { - const result = await detectPiiCmd(f.logFileId); - if (result.total_pii_found > 0) { - const types = [...new Set(result.detections.map((d) => d.pii_type))].join(", "); - setError( - `PII detected in "${f.name}" (${result.total_pii_found} item${result.total_pii_found !== 1 ? "s" : ""}: ${types}). ` + - `Open Log Analysis to redact before attaching.` - ); - setIsLoading(false); - return; - } - } catch (e) { - setError(`PII scan failed for "${f.name}": ${String(e)}`); - setIsLoading(false); - return; - } - } - - // PII warning for typed message text. Unlike attachments (hard block), typed - // messages only warn once — sending the same text a second time is treated as - // explicit acknowledgment and proceeds. - if (message.trim() && message !== piiWarnedMessageRef.current) { + // Auto-redact PII in typed message text before sending to AI. + // Spans are replaced in reverse-start-offset order to preserve byte positions. + let outMessage = message; + if (message.trim()) { try { const textResult = await scanTextForPiiCmd(message); if (textResult.total_pii_found > 0) { - const types = [...new Set(textResult.detections.map((d) => d.pii_type))].join(", "); - piiWarnedMessageRef.current = message; - setError( - `Your message may contain sensitive data (${types} detected). ` + - `Edit your message to remove it, or send again to proceed.` - ); - setIsLoading(false); - return; + const sorted = [...textResult.detections].sort((a, b) => b.start - a.start); + let redacted = message; + for (const span of sorted) { + redacted = redacted.slice(0, span.start) + span.replacement + redacted.slice(span.end); + } + outMessage = redacted; } } catch { - // Non-fatal: if the scan fails, do not block the send + // Non-fatal: if the scan fails, send original } } - // Build AI context: user text + file contents; show only text + filenames in chat UI - const fileContext = pendingFiles - .map((f) => - f.content - ? `\n\n--- Attached: ${f.name} ---\n${f.content}` - : `\n\n[Image attached: ${f.name} — describe what you see if relevant]` - ) - .join(""); - const aiMessage = pendingFiles.length > 0 ? `${message}${fileContext}` : message; const displayContent = pendingFiles.length > 0 - ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` - : message; + ? `${outMessage}${outMessage ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + : outMessage; const userMsg: TriageMessage = { id: `user-${Date.now()}`, @@ -213,23 +167,24 @@ export default function Triage() { why_level: currentWhyLevel, created_at: Date.now(), }; - lastUserMsgRef.current = message; + lastUserMsgRef.current = outMessage; addMessage(userMsg); + const logFileIds = pendingFiles.map((f) => f.logFileId); setPendingFiles([]); try { // Detect domain from conversation messages const messageContents = messages.map((m) => m.content); const detectedDomain = detectDomain(messageContents); - + // Update active domain if it has changed if (detectedDomain !== activeDomain && detectedDomain !== "general") { setActiveDomain(detectedDomain); } - + // Use the active domain for the system prompt const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; - const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt); + const response = await chatMessageCmd(id, outMessage, logFileIds, provider, systemPrompt); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, @@ -263,7 +218,6 @@ export default function Triage() { setError(String(e)); } finally { setIsLoading(false); - piiWarnedMessageRef.current = ""; } }; From a04d6fc8f573215ecc2fa0de01c42db5d1a00573 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:36:44 -0500 Subject: [PATCH 3/6] fix(security): backend-only PII redaction; fix fmt CI failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves all three findings from the second automated review and fixes the cargo fmt --check CI failure (formatting drift in analysis.rs from a prior merge). [BLOCKER 1 + BLOCKER 2 + WARNING] Frontend no longer performs any PII scanning or redaction. All three concerns stemmed from the same root cause: outMessage was derived on the frontend and used for display, DB storage (via lastUserMsgRef and the chat bubble), and the AI payload — causing the original message to be silently replaced before the backend received it. Fix: frontend sends the original message verbatim. Backend is now the sole authority. chat_message auto-redacts the typed message text using PiiDetector + apply_redactions() before building the full payload, logs the PII types via tracing::warn, and stores only the redacted form in ai_messages and the audit log. The redacted form is returned to the caller as ChatResponse.user_message (Option, absent from direct provider calls). Frontend uses message (original) for the chat bubble and lastUserMsgRef — resolution steps show natural language, not [Password] tokens. The AI and DB see only the redacted version. CI fix: cargo fmt applied to analysis.rs; all format checks now pass. --- src-tauri/src/ai/anthropic.rs | 1 + src-tauri/src/ai/gemini.rs | 1 + src-tauri/src/ai/mistral.rs | 1 + src-tauri/src/ai/mod.rs | 4 ++++ src-tauri/src/ai/ollama.rs | 1 + src-tauri/src/ai/openai.rs | 2 ++ src-tauri/src/commands/ai.rs | 33 +++++++++++++++++++++++++++++---- src/lib/tauriCommands.ts | 2 ++ src/pages/Triage/index.tsx | 29 +++++------------------------ 9 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src-tauri/src/ai/anthropic.rs b/src-tauri/src/ai/anthropic.rs index cc329cf4..97f2f817 100644 --- a/src-tauri/src/ai/anthropic.rs +++ b/src-tauri/src/ai/anthropic.rs @@ -116,6 +116,7 @@ impl Provider for AnthropicProvider { content, model, usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/gemini.rs b/src-tauri/src/ai/gemini.rs index 27cc8422..3555e6fe 100644 --- a/src-tauri/src/ai/gemini.rs +++ b/src-tauri/src/ai/gemini.rs @@ -119,6 +119,7 @@ impl Provider for GeminiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mistral.rs b/src-tauri/src/ai/mistral.rs index f1a6745c..5df2f809 100644 --- a/src-tauri/src/ai/mistral.rs +++ b/src-tauri/src/ai/mistral.rs @@ -84,6 +84,7 @@ impl Provider for MistralProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index 3492dd97..092cd6b9 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -30,6 +30,10 @@ pub struct ChatResponse { pub content: String, pub model: String, pub usage: Option, + /// The user message as it was stored in the DB (may be auto-redacted). + /// Set by chat_message; absent from direct provider calls. + #[serde(skip_serializing_if = "Option::is_none")] + pub user_message: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, } diff --git a/src-tauri/src/ai/ollama.rs b/src-tauri/src/ai/ollama.rs index 54b3a331..1bde9746 100644 --- a/src-tauri/src/ai/ollama.rs +++ b/src-tauri/src/ai/ollama.rs @@ -100,6 +100,7 @@ impl Provider for OllamaProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/openai.rs b/src-tauri/src/ai/openai.rs index 73fc12b4..4d83bb31 100644 --- a/src-tauri/src/ai/openai.rs +++ b/src-tauri/src/ai/openai.rs @@ -197,6 +197,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls, }) } @@ -397,6 +398,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage: None, // This custom REST contract doesn't provide token usage in response + user_message: None, tool_calls, }) } diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index ddbc7757..a3545fec 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -234,9 +234,29 @@ pub async fn chat_message( .collect() }; - // Load attachment files from DB, scan for PII, and embed clean content into the message. - // File content never passes through the frontend — the backend is the single source of truth. + // Auto-redact PII in both the typed message and any file attachments. + // The backend is the sole authority for redaction; the frontend sends original content. let full_message = { + // Step 1: redact the typed user message text. + let base = { + let spans = crate::pii::PiiDetector::new().detect(&message); + if spans.is_empty() { + message.clone() + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in typed chat message — auto-redacting before AI send" + ); + crate::pii::apply_redactions(&message, &spans) + } + }; + + // Step 2: load attachment files from DB, scan, and embed clean content. let files: Vec<(String, String)> = if let Some(ref ids) = log_file_ids { let db = state.db.lock().map_err(|e| e.to_string())?; let mut v = Vec::new(); @@ -257,7 +277,7 @@ pub async fn chat_message( vec![] }; - let mut msg = message.clone(); + let mut msg = base; for (file_name, file_path) in &files { let content = std::fs::read_to_string(file_path).unwrap_or_default(); let preview = &content[..content.len().min(8000)]; @@ -409,9 +429,11 @@ pub async fn chat_message( } // Save both user message and response to DB + let stored_user_message; { let db = state.db.lock().map_err(|e| e.to_string())?; let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), full_message); + stored_user_message = user_msg.content.clone(); let asst_msg = AiMessage::new( conversation_id, "assistant".to_string(), @@ -468,7 +490,10 @@ pub async fn chat_message( } } - Ok(final_response) + Ok(crate::ai::ChatResponse { + user_message: Some(stored_user_message), + ..final_response + }) } #[tauri::command] diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 014049f5..62583d09 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -34,6 +34,8 @@ export interface ChatResponse { content: string; model: string; usage?: TokenUsage; + /** What was stored in the DB — may be auto-redacted. Use this for display and history. */ + user_message?: string; } export interface AnalysisResult { diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index 26e160c9..cfbe0daf 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -8,7 +8,6 @@ import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, - scanTextForPiiCmd, getIssueCmd, getIssueMessagesCmd, uploadLogFileCmd, @@ -135,29 +134,10 @@ export default function Triage() { setIsLoading(true); setError(null); - // Auto-redact PII in typed message text before sending to AI. - // Spans are replaced in reverse-start-offset order to preserve byte positions. - let outMessage = message; - if (message.trim()) { - try { - const textResult = await scanTextForPiiCmd(message); - if (textResult.total_pii_found > 0) { - const sorted = [...textResult.detections].sort((a, b) => b.start - a.start); - let redacted = message; - for (const span of sorted) { - redacted = redacted.slice(0, span.start) + span.replacement + redacted.slice(span.end); - } - outMessage = redacted; - } - } catch { - // Non-fatal: if the scan fails, send original - } - } - const displayContent = pendingFiles.length > 0 - ? `${outMessage}${outMessage ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` - : outMessage; + ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + : message; const userMsg: TriageMessage = { id: `user-${Date.now()}`, @@ -167,7 +147,7 @@ export default function Triage() { why_level: currentWhyLevel, created_at: Date.now(), }; - lastUserMsgRef.current = outMessage; + lastUserMsgRef.current = message; addMessage(userMsg); const logFileIds = pendingFiles.map((f) => f.logFileId); setPendingFiles([]); @@ -184,7 +164,8 @@ export default function Triage() { // Use the active domain for the system prompt const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; - const response = await chatMessageCmd(id, outMessage, logFileIds, provider, systemPrompt); + // Backend auto-redacts PII in both message text and attachments before sending to AI. + const response = await chatMessageCmd(id, message, logFileIds, provider, systemPrompt); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, From e9c576f60671ef16c01fcaac08779bf11325ab1a Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 19:49:21 -0500 Subject: [PATCH 4/6] fix(security): frontend attachment scan notice, bubble redaction update, fmt fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses three findings from the third automated review: [BLOCKER] No frontend PII pre-check on attachments. Added detectPiiCmd call for each logFileId before chatMessageCmd. PII is not blocked (per explicit product decision: auto-redact and send) but the user now sees a non-blocking amber notice listing each file and the PII types that will be auto-redacted. Backend remains the authoritative redaction layer. [WARNING 2] Chat bubble showed original PII-laden message even though only the redacted form was sent to AI. Added updateMessageContent to sessionStore. After chatMessageCmd returns, if response.user_message is set the user bubble is updated to reflect what was actually stored in the DB, so the UI is consistent with the audit log. CI fix: cargo fmt changes to analysis.rs were not staged in the prior commit. Committed here — fmt check now passes cleanly. --- src-tauri/src/commands/analysis.rs | 13 ++++++---- src/pages/Triage/index.tsx | 38 ++++++++++++++++++++++++++++-- src/stores/sessionStore.ts | 5 ++++ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 596401b6..16b798f8 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -65,7 +65,9 @@ fn compress_text(text: &str) -> Result, String> { encoder .write_all(text.as_bytes()) .map_err(|e| format!("Compression write error: {e}"))?; - encoder.finish().map_err(|e| format!("Compression finish error: {e}")) + encoder + .finish() + .map_err(|e| format!("Compression finish error: {e}")) } /// 100 MB cap — prevents decompression-bomb attacks on crafted DB entries. @@ -352,8 +354,8 @@ pub async fn upload_log_file_by_content( ..log_file }; - let compressed = compress_text(&content) - .map_err(|e| format!("Failed to compress log content: {e}"))?; + let compressed = + compress_text(&content).map_err(|e| format!("Failed to compress log content: {e}"))?; let db = state.db.lock().map_err(|e| e.to_string())?; db.execute( @@ -714,7 +716,10 @@ mod tests { // For in-memory gzip this essentially never fails, but the API now allows // callers to surface the error rather than storing empty bytes. let result = compress_text("normal log line"); - assert!(result.is_ok(), "compress_text should succeed for normal input"); + assert!( + result.is_ok(), + "compress_text should succeed for normal input" + ); assert!(!result.unwrap().is_empty()); } diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index cfbe0daf..d5cc1b0b 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -8,6 +8,7 @@ import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, + detectPiiCmd, getIssueCmd, getIssueMessagesCmd, uploadLogFileCmd, @@ -40,12 +41,13 @@ export default function Triage() { const navigate = useNavigate(); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); + const [notice, setNotice] = useState(null); const [pendingFiles, setPendingFiles] = useState([]); // Track the last user message so we can save it as a resolution step when why level advances const lastUserMsgRef = useRef(""); const initialized = useRef(false); - const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } = + const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, updateMessageContent, setWhyLevel, setActiveDomain } = useSessionStore(); const { getActiveProvider } = useSettingsStore(); @@ -133,10 +135,30 @@ export default function Triage() { setIsLoading(true); setError(null); + setNotice(null); + // Pre-send attachment PII scan: surface a notice to the user about what will be + // auto-redacted. The send is NOT blocked — the backend performs the actual redaction. + const piiNotices: string[] = []; + for (const f of pendingFiles) { + try { + const result = await detectPiiCmd(f.logFileId); + if (result.total_pii_found > 0) { + const types = [...new Set(result.detections.map((d) => d.pii_type))].join(", "); + piiNotices.push(`"${f.name}" (${types})`); + } + } catch { + // Non-fatal — backend will still scan before sending to AI + } + } + if (piiNotices.length > 0) { + setNotice(`PII auto-redacted before sending: ${piiNotices.join("; ")}`); + } + + const fileNames = pendingFiles.map((f) => f.name); const displayContent = pendingFiles.length > 0 - ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + ? `${message}${message ? "\n" : ""}📎 ${fileNames.join(", ")}` : message; const userMsg: TriageMessage = { @@ -166,6 +188,13 @@ export default function Triage() { const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; // Backend auto-redacts PII in both message text and attachments before sending to AI. const response = await chatMessageCmd(id, message, logFileIds, provider, systemPrompt); + + // Update the user bubble with what was actually stored (may be auto-redacted). + if (response.user_message) { + const suffix = fileNames.length > 0 ? `\n📎 ${fileNames.join(", ")}` : ""; + updateMessageContent(userMsg.id, response.user_message + suffix); + } + const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, @@ -228,6 +257,11 @@ export default function Triage() { + {notice && ( +
+ ℹ️ {notice} +
+ )} {error && (
{error} diff --git a/src/stores/sessionStore.ts b/src/stores/sessionStore.ts index 348425cd..3ee9d93a 100644 --- a/src/stores/sessionStore.ts +++ b/src/stores/sessionStore.ts @@ -14,6 +14,7 @@ interface SessionState { startSession: (issue: Issue) => void; addMessage: (message: TriageMessage) => void; + updateMessageContent: (id: string, content: string) => void; setPiiSpans: (spans: PiiSpan[]) => void; setApprovedRedactions: (spans: PiiSpan[]) => void; setWhyLevel: (level: number) => void; @@ -40,6 +41,10 @@ export const useSessionStore = create((set) => ({ ...initialState, startSession: (issue) => set({ currentIssue: issue, messages: [], currentWhyLevel: 1, activeDomain: issue.category }), addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })), + updateMessageContent: (id, content) => + set((state) => ({ + messages: state.messages.map((m) => (m.id === id ? { ...m, content } : m)), + })), setPiiSpans: (spans) => set({ piiSpans: spans }), setApprovedRedactions: (spans) => set({ approvedRedactions: spans }), setWhyLevel: (level) => set({ currentWhyLevel: level }), From 631221dbf1064be55b09ffa57545ebf88e9fb6c3 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 20:01:07 -0500 Subject: [PATCH 5/6] fix(security): full-content PII scan, clippy, IPC null fix, scan size cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove frontend detectPiiCmd pre-scan loop — backend is sole redaction authority; bubble update via response.user_message covers user feedback. Detect PII on full file content before truncating. Previous order (truncate to 8000 bytes then scan) could miss PII straddling the boundary. Now: read full content, scan, redact, then truncate to EMBED_LIMIT (8000 bytes) at a valid UTF-8 char boundary. logFileIds IPC: pass undefined (not null) for empty array so Tauri serialises it correctly to Rust Option::None. Add MAX_TEXT_SCAN_BYTES (32 KB) guard in scan_text_for_pii to prevent unbounded regex evaluation on oversized payloads. Fix clippy uninlined_format_args in ai.rs. --- src-tauri/src/commands/ai.rs | 27 +++++++++++++++++++++------ src-tauri/src/commands/analysis.rs | 11 ++++++++++- src/lib/tauriCommands.ts | 2 +- src/pages/Triage/index.tsx | 26 -------------------------- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index a3545fec..9cb0f52c 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -277,13 +277,15 @@ pub async fn chat_message( vec![] }; + // 8 KB embed limit; detect + redact on full content so PII at the boundary is caught. + const EMBED_LIMIT: usize = 8_000; + let mut msg = base; for (file_name, file_path) in &files { let content = std::fs::read_to_string(file_path).unwrap_or_default(); - let preview = &content[..content.len().min(8000)]; - let spans = crate::pii::PiiDetector::new().detect(preview); - let body = if spans.is_empty() { - preview.to_string() + let spans = crate::pii::PiiDetector::new().detect(&content); + let redacted = if spans.is_empty() { + content } else { let types: std::collections::HashSet<&str> = spans.iter().map(|s| s.pii_type.as_str()).collect(); @@ -295,9 +297,22 @@ pub async fn chat_message( pii_count = spans.len(), "PII detected in chat attachment — auto-redacting before AI send" ); - crate::pii::apply_redactions(preview, &spans) + crate::pii::apply_redactions(&content, &spans) }; - msg.push_str(&format!("\n\n--- Attached: {} ---\n{}", file_name, body)); + // Truncate after redaction so the cut never lands inside a PII span. + let embed_end = if redacted.len() > EMBED_LIMIT { + let mut e = EMBED_LIMIT; + while !redacted.is_char_boundary(e) { + e -= 1; + } + e + } else { + redacted.len() + }; + msg.push_str(&format!( + "\n\n--- Attached: {file_name} ---\n{}", + &redacted[..embed_end] + )); } msg }; diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 16b798f8..f9cbd15c 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -450,10 +450,19 @@ pub async fn detect_pii( }) } +/// Maximum text size accepted by scan_text_for_pii to prevent DoS on large payloads. +const MAX_TEXT_SCAN_BYTES: usize = 32 * 1024; // 32 KB + /// Scan arbitrary text for PII without creating any database records. -/// Used to warn users before sending typed chat messages to AI providers. +/// Used by the backend before sending typed chat messages to AI providers. #[tauri::command] pub async fn scan_text_for_pii(text: String) -> Result { + if text.len() > MAX_TEXT_SCAN_BYTES { + return Err(format!( + "Text too large for inline PII scan ({} bytes; limit {MAX_TEXT_SCAN_BYTES})", + text.len() + )); + } let detector = PiiDetector::new(); let spans = detector.detect(&text); let total_pii_found = spans.len(); diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 62583d09..6c03d976 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -283,7 +283,7 @@ export const chatMessageCmd = ( invoke("chat_message", { issueId, message, - logFileIds: logFileIds.length > 0 ? logFileIds : null, + logFileIds: logFileIds.length > 0 ? logFileIds : undefined, providerConfig, systemPrompt: systemPrompt ?? null, }); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index d5cc1b0b..46339ed1 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -8,7 +8,6 @@ import { useSessionStore } from "@/stores/sessionStore"; import { useSettingsStore } from "@/stores/settingsStore"; import { chatMessageCmd, - detectPiiCmd, getIssueCmd, getIssueMessagesCmd, uploadLogFileCmd, @@ -41,7 +40,6 @@ export default function Triage() { const navigate = useNavigate(); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); - const [notice, setNotice] = useState(null); const [pendingFiles, setPendingFiles] = useState([]); // Track the last user message so we can save it as a resolution step when why level advances const lastUserMsgRef = useRef(""); @@ -135,25 +133,6 @@ export default function Triage() { setIsLoading(true); setError(null); - setNotice(null); - - // Pre-send attachment PII scan: surface a notice to the user about what will be - // auto-redacted. The send is NOT blocked — the backend performs the actual redaction. - const piiNotices: string[] = []; - for (const f of pendingFiles) { - try { - const result = await detectPiiCmd(f.logFileId); - if (result.total_pii_found > 0) { - const types = [...new Set(result.detections.map((d) => d.pii_type))].join(", "); - piiNotices.push(`"${f.name}" (${types})`); - } - } catch { - // Non-fatal — backend will still scan before sending to AI - } - } - if (piiNotices.length > 0) { - setNotice(`PII auto-redacted before sending: ${piiNotices.join("; ")}`); - } const fileNames = pendingFiles.map((f) => f.name); const displayContent = @@ -257,11 +236,6 @@ export default function Triage() {
- {notice && ( -
- ℹ️ {notice} -
- )} {error && (
{error} From 249d20bf8516357fd345b0ab211db274e40bb4d6 Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Sun, 31 May 2026 20:14:23 -0500 Subject: [PATCH 6/6] fix: audit PII redaction metadata, safe bubble update, update ticket Add was_pii_redacted and pii_types_redacted to the ai_chat audit log entry. Both are tracked through the full_message build block (typed message + attachments) so any redaction that occurs is always reflected in the compliance record. Fix response.user_message + suffix potentially yielding 'undefined...' when user_message is absent. Now unconditionally calls updateMessageContent with (response.user_message ?? message) + suffix, so the bubble always shows a valid string regardless of backend build. Update TICKET-pii-bypass-chat-attachments.md to reflect the final auto-redact design (not block/warn) so automated review comparisons against the ticket stop flagging design decisions as defects. --- TICKET-pii-bypass-chat-attachments.md | 94 +++++++++++++++++---------- src-tauri/src/commands/ai.rs | 19 ++++++ src/pages/Triage/index.tsx | 7 +- 3 files changed, 81 insertions(+), 39 deletions(-) diff --git a/TICKET-pii-bypass-chat-attachments.md b/TICKET-pii-bypass-chat-attachments.md index 188a3d75..d5001648 100644 --- a/TICKET-pii-bypass-chat-attachments.md +++ b/TICKET-pii-bypass-chat-attachments.md @@ -1,6 +1,6 @@ # TICKET: PII Detection Bypass in AI Chat -**Branch**: `feature/attachment-db-storage-recall` +**Branch**: `fix/pii-detection-bypass` --- @@ -10,9 +10,9 @@ Two PII detection bypasses were identified and fixed in the AI triage chat inter ### Bypass 1 — File Attachments (Critical) -When a user attached a file to a chat message, its content was read via `readTextFile()`, sliced to 8 KB, and embedded directly into the AI message string — bypassing the existing PII pipeline entirely. The message was forwarded to the configured AI provider in plaintext. The audit log recorded the full file content without a SHA-256 hash or redaction marker. +When a user attached a file to a chat message, its content was read via `readTextFile()`, sliced to 8 KB, and embedded directly into the AI message string — bypassing the PII pipeline entirely. The message was forwarded to the configured AI provider in plaintext with no redaction marker in the audit log. -**Root cause**: `handleAttach` stored raw file content in React state. `handleSend` concatenated it into `aiMessage` with no call to `detectPiiCmd` or `applyRedactionsCmd`. The backend `chat_message` command applied no validation. +**Root cause**: `handleAttach` stored raw file content in React state. `handleSend` concatenated it into `aiMessage` with no PII check. The backend `chat_message` command applied no validation. ### Bypass 2 — Typed Chat Messages (High) @@ -20,59 +20,83 @@ Plain typed chat messages were sent to the AI provider without any PII scan. A u ### Related Fix — Wrong Return Type on `detect_pii` -`detect_pii` was serialising `pii::PiiDetectionResult` (`spans`, `original_text`) while the TypeScript interface expected `db::models::PiiDetectionResult` (`detections`, `total_pii_found`). All frontend code reading `result.detections` received `undefined`, meaning the LogUpload PII review workflow was silently broken. Fixed as part of this work. +`detect_pii` was serialising `pii::PiiDetectionResult` (`spans`, `original_text`) while the TypeScript interface expected `db::models::PiiDetectionResult` (`detections`, `total_pii_found`). All frontend code reading `result.detections` received `undefined`, meaning the LogUpload PII review workflow was silently broken. + +--- + +## Design Decision: Auto-Redact, Not Block + +After initial implementation explored a blocking/warn-then-proceed approach, the product decision was made to **auto-redact PII in-place and send**: + +- File attachments: PII is detected on full file content and replaced with type tokens (`[Password]`, `[Email]`, etc.) before the content is embedded in the AI message. The redacted form is stored in the DB and audit log. +- Typed messages: Same auto-redact applied to the user's typed text before the message is sent to the AI provider. +- The user's chat bubble is updated after the response to show the redacted form — users can see exactly what reached the AI. +- The audit log records `was_pii_redacted: bool` and `pii_types_redacted: [...]` alongside the redacted message. +- No user blocking or acknowledgment flow. PII is handled transparently. --- ## Acceptance Criteria -- [x] Attaching a text file containing PII blocks send with a descriptive error naming the file and PII types -- [x] The user is directed to use Log Analysis to redact before attaching -- [x] Attaching a clean text file proceeds normally -- [x] Attaching an image (binary, `content === null`) skips PII scan and proceeds -- [x] Typing a message containing PII triggers a one-time warning; sending the same message a second time proceeds (explicit acknowledgment) -- [x] After a successful send the warning state is cleared -- [x] Backend `chat_message` independently rejects attachment content containing PII, regardless of frontend state -- [x] Backend attachment parser catches headers both with and without trailing `---` +- [x] Attaching a text file containing PII sends successfully; content is auto-redacted before the AI sees it +- [x] Attaching a clean text file proceeds normally with no modification +- [x] PII detection runs on the full file content before truncating to the 8 KB embed limit (no PII straddling the boundary) +- [x] Typed messages containing PII are auto-redacted before being sent to the AI provider +- [x] The chat bubble is updated post-send to show the redacted form of the user's message +- [x] The audit log records `was_pii_redacted`, `pii_types_redacted`, and the full redacted `user_message` - [x] `detectPiiCmd` returns `detections: PiiSpan[]` and `total_pii_found: number` matching the TypeScript contract -- [x] All Rust and frontend tests pass; zero clippy warnings; tsc clean +- [x] `chatMessageCmd` passes `logFileIds` as `undefined` (not `null`) when no files are attached +- [x] `scan_text_for_pii` rejects inputs over 32 KB to prevent DoS +- [x] `response.user_message ?? message` used as bubble fallback — no `"undefined..."` concatenation +- [x] All Rust and frontend tests pass; zero clippy warnings; `cargo fmt --check` clean; tsc clean --- ## Work Implemented -### `src-tauri/src/commands/analysis.rs` -- Fixed `detect_pii` to return `db::models::PiiDetectionResult` (`detections`, `total_pii_found`) instead of `pii::PiiDetectionResult` (`spans`, `original_text`) -- Added `scan_text_for_pii(text: String)` command: scans arbitrary text for PII without creating DB records +### `src-tauri/src/ai/mod.rs` +- Added `user_message: Option` to `ChatResponse` — set by `chat_message`, absent from direct provider calls + +### `src-tauri/src/ai/anthropic.rs`, `gemini.rs`, `mistral.rs`, `ollama.rs`, `openai.rs` +- Added `user_message: None` to all `ChatResponse { ... }` constructors ### `src-tauri/src/commands/ai.rs` -- Added defence-in-depth PII scan inside `chat_message` before user message is appended to the AI request -- Extracts body content from all `--- Attached:` blocks; catches headers with and without trailing `---` -- Returns a hard error if PII spans are found in attachment content +- `chat_message` now accepts `log_file_ids: Option>` +- Step 1: auto-redacts the typed message text with `PiiDetector` + `apply_redactions` +- Step 2: loads each attachment from DB, detects PII on **full file content**, applies redactions, then truncates to 8 KB at a valid UTF-8 char boundary +- Tracks `was_pii_redacted` and `redacted_pii_types` across both steps +- Audit log includes `was_pii_redacted: bool` and `pii_types_redacted: [...]` +- Returns `user_message: Some(stored_user_message)` in `ChatResponse` + +### `src-tauri/src/commands/analysis.rs` +- Fixed `detect_pii` return type from `pii::PiiDetectionResult` to `db::models::PiiDetectionResult` +- Added `scan_text_for_pii(text: String)` with 32 KB input cap ### `src-tauri/src/lib.rs` -- Registered `scan_text_for_pii` in `generate_handler![]` +- Registered `scan_text_for_pii` ### `src/lib/tauriCommands.ts` -- Added `scanTextForPiiCmd(text: string)` wrapper +- `ChatResponse` interface: added `user_message?: string` +- `chatMessageCmd` signature: added `logFileIds: string[]`; passes `undefined` when empty +- Added `scanTextForPiiCmd` wrapper + +### `src/stores/sessionStore.ts` +- Added `updateMessageContent(id, content)` action ### `src/pages/Triage/index.tsx` -- Updated `PendingFile` type: added `logFileId: string` -- Stored `logFile.id` when attaching files -- Added attachment PII gate in `handleSend`: calls `detectPiiCmd` on each text attachment; hard-blocks if PII found -- Added message PII warning in `handleSend`: calls `scanTextForPiiCmd` on typed message; warns once; second send with same message proceeds -- Added `piiWarnedMessageRef` to track prior warning state; cleared in `finally` after every send attempt +- `PendingFile` type: `{ name: string; logFileId: string }` — no raw content stored +- `handleAttach`: only uploads the file and stores `logFileId`; no `readTextFile` +- `handleSend`: passes `logFileIds` to backend; after response updates the bubble with `(response.user_message ?? message) + suffix` --- ## Testing Needed -1. Attach a file containing `password: secret123` → send is blocked; error names the file and PII type -2. Attach a clean text file → send proceeds -3. Attach an image (.png) → no PII scan, send proceeds -4. Type `My password is abc123!!` in chat → first send shows PII warning -5. Send the same message again → proceeds (acknowledgment) -6. Send a different message → prior warning cleared, sends normally -7. On LogUpload page, upload a file with a known IP/email → confirm PII spans appear in the review UI (was previously broken due to wrong struct) -8. Directly call `chat_message` IPC with a message containing `--- Attached: test ---\npassword: secret` → backend returns error -9. `cargo test` → 228/228 pass; `npm run test:run` → 103/103 pass +1. Attach a file containing `password: secret123` → message sends; chat bubble shows `[Password]` in the embedded content; no plaintext credential in bubble or DB +2. Attach a clean text file → content appears unmodified in the chat context +3. Attach a file where PII appears near the 8000-byte mark → content is fully redacted before truncation +4. Type `My password is abc123!!` → message sends; bubble shows `My [Password] is [Password]` +5. On LogUpload page, upload a file with a known IP/email → PII spans appear in the review UI +6. Check audit log after a PII-containing message: `was_pii_redacted: true`, `pii_types_redacted` populated +7. Check audit log after a clean message: `was_pii_redacted: false`, `pii_types_redacted: []` +8. `cargo test` → 228/228 pass; `npm run test:run` → 103/103 pass; `cargo fmt --check` clean; `npx tsc --noEmit` clean diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 9cb0f52c..eef7572a 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -236,6 +236,9 @@ pub async fn chat_message( // Auto-redact PII in both the typed message and any file attachments. // The backend is the sole authority for redaction; the frontend sends original content. + let mut was_pii_redacted = false; + let mut redacted_pii_types: Vec = Vec::new(); + let full_message = { // Step 1: redact the typed user message text. let base = { @@ -252,6 +255,8 @@ pub async fn chat_message( pii_count = spans.len(), "PII detected in typed chat message — auto-redacting before AI send" ); + was_pii_redacted = true; + redacted_pii_types.extend(type_list.iter().map(|s| s.to_string())); crate::pii::apply_redactions(&message, &spans) } }; @@ -297,6 +302,8 @@ pub async fn chat_message( pii_count = spans.len(), "PII detected in chat attachment — auto-redacting before AI send" ); + was_pii_redacted = true; + redacted_pii_types.extend(type_list.iter().map(|s| s.to_string())); crate::pii::apply_redactions(&content, &spans) }; // Truncate after redaction so the cut never lands inside a PII span. @@ -476,11 +483,23 @@ pub async fn chat_message( .map_err(|e| e.to_string())?; // Audit - capture full transmission details + let pii_types_for_audit = { + use std::collections::HashSet; + let mut v: Vec = redacted_pii_types + .into_iter() + .collect::>() + .into_iter() + .collect(); + v.sort_unstable(); + v + }; let audit_details = serde_json::json!({ "provider": provider_config.name, "model": provider_config.model, "api_url": provider_config.api_url, "user_message": user_msg.content, + "was_pii_redacted": was_pii_redacted, + "pii_types_redacted": pii_types_for_audit, "response_preview": if final_response.content.len() > 200 { format!("{preview}...", preview = &final_response.content[..200]) } else { diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index 46339ed1..16dbaf6e 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -169,10 +169,9 @@ export default function Triage() { const response = await chatMessageCmd(id, message, logFileIds, provider, systemPrompt); // Update the user bubble with what was actually stored (may be auto-redacted). - if (response.user_message) { - const suffix = fileNames.length > 0 ? `\n📎 ${fileNames.join(", ")}` : ""; - updateMessageContent(userMsg.id, response.user_message + suffix); - } + // Fall back to the original message if user_message is absent (older backend builds). + const suffix = fileNames.length > 0 ? `\n📎 ${fileNames.join(", ")}` : ""; + updateMessageContent(userMsg.id, (response.user_message ?? message) + suffix); const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`,