diff --git a/TICKET-pii-bypass-chat-attachments.md b/TICKET-pii-bypass-chat-attachments.md new file mode 100644 index 00000000..d5001648 --- /dev/null +++ b/TICKET-pii-bypass-chat-attachments.md @@ -0,0 +1,102 @@ +# TICKET: PII Detection Bypass in AI Chat + +**Branch**: `fix/pii-detection-bypass` + +--- + +## Description + +Two PII detection bypasses were identified and fixed in the AI triage chat interface. + +### Bypass 1 — File Attachments (Critical) + +When a user attached a file to a chat message, its content was read via `readTextFile()`, sliced to 8 KB, and embedded directly into the AI message string — bypassing the PII pipeline entirely. The message was forwarded to the configured AI provider in plaintext with no redaction marker in the audit log. + +**Root cause**: `handleAttach` stored raw file content in React state. `handleSend` concatenated it into `aiMessage` with no PII check. The backend `chat_message` command applied no validation. + +### Bypass 2 — Typed Chat Messages (High) + +Plain typed chat messages were sent to the AI provider without any PII scan. A user typing `How secure is my password: abc123!!` would have the password forwarded to the AI and persisted in the audit log in plaintext. + +### Related Fix — Wrong Return Type on `detect_pii` + +`detect_pii` was serialising `pii::PiiDetectionResult` (`spans`, `original_text`) while the TypeScript interface expected `db::models::PiiDetectionResult` (`detections`, `total_pii_found`). All frontend code reading `result.detections` received `undefined`, meaning the LogUpload PII review workflow was silently broken. + +--- + +## Design Decision: Auto-Redact, Not Block + +After initial implementation explored a blocking/warn-then-proceed approach, the product decision was made to **auto-redact PII in-place and send**: + +- File attachments: PII is detected on full file content and replaced with type tokens (`[Password]`, `[Email]`, etc.) before the content is embedded in the AI message. The redacted form is stored in the DB and audit log. +- Typed messages: Same auto-redact applied to the user's typed text before the message is sent to the AI provider. +- The user's chat bubble is updated after the response to show the redacted form — users can see exactly what reached the AI. +- The audit log records `was_pii_redacted: bool` and `pii_types_redacted: [...]` alongside the redacted message. +- No user blocking or acknowledgment flow. PII is handled transparently. + +--- + +## Acceptance Criteria + +- [x] Attaching a text file containing PII sends successfully; content is auto-redacted before the AI sees it +- [x] Attaching a clean text file proceeds normally with no modification +- [x] PII detection runs on the full file content before truncating to the 8 KB embed limit (no PII straddling the boundary) +- [x] Typed messages containing PII are auto-redacted before being sent to the AI provider +- [x] The chat bubble is updated post-send to show the redacted form of the user's message +- [x] The audit log records `was_pii_redacted`, `pii_types_redacted`, and the full redacted `user_message` +- [x] `detectPiiCmd` returns `detections: PiiSpan[]` and `total_pii_found: number` matching the TypeScript contract +- [x] `chatMessageCmd` passes `logFileIds` as `undefined` (not `null`) when no files are attached +- [x] `scan_text_for_pii` rejects inputs over 32 KB to prevent DoS +- [x] `response.user_message ?? message` used as bubble fallback — no `"undefined..."` concatenation +- [x] All Rust and frontend tests pass; zero clippy warnings; `cargo fmt --check` clean; tsc clean + +--- + +## Work Implemented + +### `src-tauri/src/ai/mod.rs` +- Added `user_message: Option` to `ChatResponse` — set by `chat_message`, absent from direct provider calls + +### `src-tauri/src/ai/anthropic.rs`, `gemini.rs`, `mistral.rs`, `ollama.rs`, `openai.rs` +- Added `user_message: None` to all `ChatResponse { ... }` constructors + +### `src-tauri/src/commands/ai.rs` +- `chat_message` now accepts `log_file_ids: Option>` +- Step 1: auto-redacts the typed message text with `PiiDetector` + `apply_redactions` +- Step 2: loads each attachment from DB, detects PII on **full file content**, applies redactions, then truncates to 8 KB at a valid UTF-8 char boundary +- Tracks `was_pii_redacted` and `redacted_pii_types` across both steps +- Audit log includes `was_pii_redacted: bool` and `pii_types_redacted: [...]` +- Returns `user_message: Some(stored_user_message)` in `ChatResponse` + +### `src-tauri/src/commands/analysis.rs` +- Fixed `detect_pii` return type from `pii::PiiDetectionResult` to `db::models::PiiDetectionResult` +- Added `scan_text_for_pii(text: String)` with 32 KB input cap + +### `src-tauri/src/lib.rs` +- Registered `scan_text_for_pii` + +### `src/lib/tauriCommands.ts` +- `ChatResponse` interface: added `user_message?: string` +- `chatMessageCmd` signature: added `logFileIds: string[]`; passes `undefined` when empty +- Added `scanTextForPiiCmd` wrapper + +### `src/stores/sessionStore.ts` +- Added `updateMessageContent(id, content)` action + +### `src/pages/Triage/index.tsx` +- `PendingFile` type: `{ name: string; logFileId: string }` — no raw content stored +- `handleAttach`: only uploads the file and stores `logFileId`; no `readTextFile` +- `handleSend`: passes `logFileIds` to backend; after response updates the bubble with `(response.user_message ?? message) + suffix` + +--- + +## Testing Needed + +1. Attach a file containing `password: secret123` → message sends; chat bubble shows `[Password]` in the embedded content; no plaintext credential in bubble or DB +2. Attach a clean text file → content appears unmodified in the chat context +3. Attach a file where PII appears near the 8000-byte mark → content is fully redacted before truncation +4. Type `My password is abc123!!` → message sends; bubble shows `My [Password] is [Password]` +5. On LogUpload page, upload a file with a known IP/email → PII spans appear in the review UI +6. Check audit log after a PII-containing message: `was_pii_redacted: true`, `pii_types_redacted` populated +7. Check audit log after a clean message: `was_pii_redacted: false`, `pii_types_redacted: []` +8. `cargo test` → 228/228 pass; `npm run test:run` → 103/103 pass; `cargo fmt --check` clean; `npx tsc --noEmit` clean diff --git a/src-tauri/src/ai/anthropic.rs b/src-tauri/src/ai/anthropic.rs index cc329cf4..97f2f817 100644 --- a/src-tauri/src/ai/anthropic.rs +++ b/src-tauri/src/ai/anthropic.rs @@ -116,6 +116,7 @@ impl Provider for AnthropicProvider { content, model, usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/gemini.rs b/src-tauri/src/ai/gemini.rs index 27cc8422..3555e6fe 100644 --- a/src-tauri/src/ai/gemini.rs +++ b/src-tauri/src/ai/gemini.rs @@ -119,6 +119,7 @@ impl Provider for GeminiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mistral.rs b/src-tauri/src/ai/mistral.rs index f1a6745c..5df2f809 100644 --- a/src-tauri/src/ai/mistral.rs +++ b/src-tauri/src/ai/mistral.rs @@ -84,6 +84,7 @@ impl Provider for MistralProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index 3492dd97..092cd6b9 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -30,6 +30,10 @@ pub struct ChatResponse { pub content: String, pub model: String, pub usage: Option, + /// The user message as it was stored in the DB (may be auto-redacted). + /// Set by chat_message; absent from direct provider calls. + #[serde(skip_serializing_if = "Option::is_none")] + pub user_message: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, } diff --git a/src-tauri/src/ai/ollama.rs b/src-tauri/src/ai/ollama.rs index 54b3a331..1bde9746 100644 --- a/src-tauri/src/ai/ollama.rs +++ b/src-tauri/src/ai/ollama.rs @@ -100,6 +100,7 @@ impl Provider for OllamaProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls: None, }) } diff --git a/src-tauri/src/ai/openai.rs b/src-tauri/src/ai/openai.rs index 73fc12b4..4d83bb31 100644 --- a/src-tauri/src/ai/openai.rs +++ b/src-tauri/src/ai/openai.rs @@ -197,6 +197,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage, + user_message: None, tool_calls, }) } @@ -397,6 +398,7 @@ impl OpenAiProvider { content, model: config.model.clone(), usage: None, // This custom REST contract doesn't provide token usage in response + user_message: None, tool_calls, }) } diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 47414d4f..eef7572a 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -165,6 +165,7 @@ fn extract_list(text: &str, header: &str) -> Vec { pub async fn chat_message( issue_id: String, message: String, + log_file_ids: Option>, provider_config: ProviderConfig, system_prompt: Option, app_handle: tauri::AppHandle, @@ -233,6 +234,96 @@ pub async fn chat_message( .collect() }; + // Auto-redact PII in both the typed message and any file attachments. + // The backend is the sole authority for redaction; the frontend sends original content. + let mut was_pii_redacted = false; + let mut redacted_pii_types: Vec = Vec::new(); + + let full_message = { + // Step 1: redact the typed user message text. + let base = { + let spans = crate::pii::PiiDetector::new().detect(&message); + if spans.is_empty() { + message.clone() + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in typed chat message — auto-redacting before AI send" + ); + was_pii_redacted = true; + redacted_pii_types.extend(type_list.iter().map(|s| s.to_string())); + crate::pii::apply_redactions(&message, &spans) + } + }; + + // Step 2: load attachment files from DB, scan, and embed clean content. + let files: Vec<(String, String)> = if let Some(ref ids) = log_file_ids { + let db = state.db.lock().map_err(|e| e.to_string())?; + let mut v = Vec::new(); + for file_id in ids { + if let Ok((name, path)) = db + .prepare("SELECT file_name, file_path FROM log_files WHERE id = ?1") + .and_then(|mut s| { + s.query_row([file_id], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + }) + { + v.push((name, path)); + } + } + v + } else { + vec![] + }; + + // 8 KB embed limit; detect + redact on full content so PII at the boundary is caught. + const EMBED_LIMIT: usize = 8_000; + + let mut msg = base; + for (file_name, file_path) in &files { + let content = std::fs::read_to_string(file_path).unwrap_or_default(); + let spans = crate::pii::PiiDetector::new().detect(&content); + let redacted = if spans.is_empty() { + content + } else { + let types: std::collections::HashSet<&str> = + spans.iter().map(|s| s.pii_type.as_str()).collect(); + let mut type_list: Vec<&str> = types.into_iter().collect(); + type_list.sort_unstable(); + warn!( + file_name = %file_name, + pii_types = ?type_list, + pii_count = spans.len(), + "PII detected in chat attachment — auto-redacting before AI send" + ); + was_pii_redacted = true; + redacted_pii_types.extend(type_list.iter().map(|s| s.to_string())); + crate::pii::apply_redactions(&content, &spans) + }; + // Truncate after redaction so the cut never lands inside a PII span. + let embed_end = if redacted.len() > EMBED_LIMIT { + let mut e = EMBED_LIMIT; + while !redacted.is_char_boundary(e) { + e -= 1; + } + e + } else { + redacted.len() + }; + msg.push_str(&format!( + "\n\n--- Attached: {file_name} ---\n{}", + &redacted[..embed_end] + )); + } + msg + }; + let provider = create_provider(&provider_config); // Search integration sources for relevant context @@ -290,7 +381,7 @@ pub async fn chat_message( messages.push(Message { role: "user".into(), - content: message.clone(), + content: full_message.clone(), tool_call_id: None, tool_calls: None, }); @@ -360,9 +451,11 @@ pub async fn chat_message( } // Save both user message and response to DB + let stored_user_message; { let db = state.db.lock().map_err(|e| e.to_string())?; - let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), message); + let user_msg = AiMessage::new(conversation_id.clone(), "user".to_string(), full_message); + stored_user_message = user_msg.content.clone(); let asst_msg = AiMessage::new( conversation_id, "assistant".to_string(), @@ -390,11 +483,23 @@ pub async fn chat_message( .map_err(|e| e.to_string())?; // Audit - capture full transmission details + let pii_types_for_audit = { + use std::collections::HashSet; + let mut v: Vec = redacted_pii_types + .into_iter() + .collect::>() + .into_iter() + .collect(); + v.sort_unstable(); + v + }; let audit_details = serde_json::json!({ "provider": provider_config.name, "model": provider_config.model, "api_url": provider_config.api_url, "user_message": user_msg.content, + "was_pii_redacted": was_pii_redacted, + "pii_types_redacted": pii_types_for_audit, "response_preview": if final_response.content.len() > 200 { format!("{preview}...", preview = &final_response.content[..200]) } else { @@ -419,7 +524,10 @@ pub async fn chat_message( } } - Ok(final_response) + Ok(crate::ai::ChatResponse { + user_message: Some(stored_user_message), + ..final_response + }) } #[tauri::command] diff --git a/src-tauri/src/commands/analysis.rs b/src-tauri/src/commands/analysis.rs index 24a24031..f9cbd15c 100644 --- a/src-tauri/src/commands/analysis.rs +++ b/src-tauri/src/commands/analysis.rs @@ -8,8 +8,8 @@ use std::path::{Path, PathBuf}; use tauri::State; use tracing::warn; -use crate::db::models::{AuditEntry, LogFile, LogFileSummary, PiiSpanRecord}; -use crate::pii::{self, PiiDetectionResult, PiiDetector, RedactedLogFile}; +use crate::db::models::{AuditEntry, LogFile, LogFileSummary, PiiDetectionResult, PiiSpanRecord}; +use crate::pii::{self, PiiDetector, RedactedLogFile}; use crate::state::AppState; const MAX_LOG_FILE_BYTES: u64 = 50 * 1024 * 1024; @@ -65,7 +65,9 @@ fn compress_text(text: &str) -> Result, String> { encoder .write_all(text.as_bytes()) .map_err(|e| format!("Compression write error: {e}"))?; - encoder.finish().map_err(|e| format!("Compression finish error: {e}")) + encoder + .finish() + .map_err(|e| format!("Compression finish error: {e}")) } /// 100 MB cap — prevents decompression-bomb attacks on crafted DB entries. @@ -352,8 +354,8 @@ pub async fn upload_log_file_by_content( ..log_file }; - let compressed = compress_text(&content) - .map_err(|e| format!("Failed to compress log content: {e}"))?; + let compressed = + compress_text(&content).map_err(|e| format!("Failed to compress log content: {e}"))?; let db = state.db.lock().map_err(|e| e.to_string())?; db.execute( @@ -440,10 +442,34 @@ pub async fn detect_pii( } } + let total_pii_found = spans.len(); Ok(PiiDetectionResult { log_file_id, - spans, - original_text: content, + detections: spans, + total_pii_found, + }) +} + +/// Maximum text size accepted by scan_text_for_pii to prevent DoS on large payloads. +const MAX_TEXT_SCAN_BYTES: usize = 32 * 1024; // 32 KB + +/// Scan arbitrary text for PII without creating any database records. +/// Used by the backend before sending typed chat messages to AI providers. +#[tauri::command] +pub async fn scan_text_for_pii(text: String) -> Result { + if text.len() > MAX_TEXT_SCAN_BYTES { + return Err(format!( + "Text too large for inline PII scan ({} bytes; limit {MAX_TEXT_SCAN_BYTES})", + text.len() + )); + } + let detector = PiiDetector::new(); + let spans = detector.detect(&text); + let total_pii_found = spans.len(); + Ok(PiiDetectionResult { + log_file_id: String::new(), + detections: spans, + total_pii_found, }) } @@ -699,7 +725,10 @@ mod tests { // For in-memory gzip this essentially never fails, but the API now allows // callers to surface the error rather than storing empty bytes. let result = compress_text("normal log line"); - assert!(result.is_ok(), "compress_text should succeed for normal input"); + assert!( + result.is_ok(), + "compress_text should succeed for normal input" + ); assert!(!result.unwrap().is_empty()); } diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c3414a08..54a8eb05 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -85,6 +85,7 @@ pub fn run() { commands::analysis::upload_log_file, commands::analysis::upload_log_file_by_content, commands::analysis::detect_pii, + commands::analysis::scan_text_for_pii, commands::analysis::apply_redactions, commands::analysis::get_log_file_content, commands::analysis::list_all_log_files, diff --git a/src/lib/tauriCommands.ts b/src/lib/tauriCommands.ts index 4541fd0d..6c03d976 100644 --- a/src/lib/tauriCommands.ts +++ b/src/lib/tauriCommands.ts @@ -34,6 +34,8 @@ export interface ChatResponse { content: string; model: string; usage?: TokenUsage; + /** What was stored in the DB — may be auto-redacted. Use this for display and history. */ + user_message?: string; } export interface AnalysisResult { @@ -271,8 +273,20 @@ export interface TriageMessage { export const analyzeLogsCmd = (issueId: string, logFileIds: string[], providerConfig: ProviderConfig) => invoke("analyze_logs", { issueId, logFileIds, providerConfig }); -export const chatMessageCmd = (issueId: string, message: string, providerConfig: ProviderConfig, systemPrompt?: string) => - invoke("chat_message", { issueId, message, providerConfig, systemPrompt: systemPrompt ?? null }); +export const chatMessageCmd = ( + issueId: string, + message: string, + logFileIds: string[], + providerConfig: ProviderConfig, + systemPrompt?: string +) => + invoke("chat_message", { + issueId, + message, + logFileIds: logFileIds.length > 0 ? logFileIds : undefined, + providerConfig, + systemPrompt: systemPrompt ?? null, + }); export const listProvidersCmd = () => invoke("list_providers"); @@ -308,6 +322,9 @@ export const deleteImageAttachmentCmd = (attachmentId: string) => export const detectPiiCmd = (logFileId: string) => invoke("detect_pii", { logFileId }); +export const scanTextForPiiCmd = (text: string) => + invoke("scan_text_for_pii", { text }); + export const applyRedactionsCmd = (logFileId: string, approvedSpanIds: string[]) => invoke("apply_redactions", { logFileId, approvedSpanIds }); diff --git a/src/pages/Triage/index.tsx b/src/pages/Triage/index.tsx index f1e30e2f..16dbaf6e 100644 --- a/src/pages/Triage/index.tsx +++ b/src/pages/Triage/index.tsx @@ -2,7 +2,6 @@ import { useEffect, useRef, useState } from "react"; import { useParams, useNavigate } from "react-router-dom"; import { CheckCircle, ChevronRight } from "lucide-react"; import { open } from "@tauri-apps/plugin-dialog"; -import { readTextFile } from "@tauri-apps/plugin-fs"; import { ChatWindow } from "@/components/ChatWindow"; import { TriageProgress } from "@/components/TriageProgress"; import { useSessionStore } from "@/stores/sessionStore"; @@ -34,7 +33,7 @@ function isCloseIntent(message: string): boolean { return CLOSE_PATTERNS.some((p) => lower.includes(p)); } -type PendingFile = { name: string; content: string | null }; +type PendingFile = { name: string; logFileId: string }; export default function Triage() { const { id } = useParams<{ id: string }>(); @@ -46,7 +45,7 @@ export default function Triage() { const lastUserMsgRef = useRef(""); const initialized = useRef(false); - const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, setWhyLevel, setActiveDomain } = + const { currentIssue, messages, currentWhyLevel, activeDomain, startSession, addMessage, updateMessageContent, setWhyLevel, setActiveDomain } = useSessionStore(); const { getActiveProvider } = useSettingsStore(); @@ -101,14 +100,7 @@ export default function Triage() { const paths = Array.isArray(selected) ? selected : [selected]; for (const filePath of paths) { const logFile = await uploadLogFileCmd(id, filePath); - let content: string | null = null; - try { - const raw = await readTextFile(filePath); - content = raw.slice(0, 8000); // cap at 8 KB to keep context manageable - } catch { - // Binary file (image) — include filename only as context - } - setPendingFiles((prev) => [...prev, { name: logFile.file_name, content }]); + setPendingFiles((prev) => [...prev, { name: logFile.file_name, logFileId: logFile.id }]); } } catch (e) { setError(`Attachment failed: ${String(e)}`); @@ -142,18 +134,10 @@ export default function Triage() { setIsLoading(true); setError(null); - // Build AI context: user text + file contents; show only text + filenames in chat UI - const fileContext = pendingFiles - .map((f) => - f.content - ? `\n\n--- Attached: ${f.name} ---\n${f.content}` - : `\n\n[Image attached: ${f.name} — describe what you see if relevant]` - ) - .join(""); - const aiMessage = pendingFiles.length > 0 ? `${message}${fileContext}` : message; + const fileNames = pendingFiles.map((f) => f.name); const displayContent = pendingFiles.length > 0 - ? `${message}${message ? "\n" : ""}📎 ${pendingFiles.map((f) => f.name).join(", ")}` + ? `${message}${message ? "\n" : ""}📎 ${fileNames.join(", ")}` : message; const userMsg: TriageMessage = { @@ -166,21 +150,29 @@ export default function Triage() { }; lastUserMsgRef.current = message; addMessage(userMsg); + const logFileIds = pendingFiles.map((f) => f.logFileId); setPendingFiles([]); try { // Detect domain from conversation messages const messageContents = messages.map((m) => m.content); const detectedDomain = detectDomain(messageContents); - + // Update active domain if it has changed if (detectedDomain !== activeDomain && detectedDomain !== "general") { setActiveDomain(detectedDomain); } - + // Use the active domain for the system prompt const systemPrompt = activeDomain ? getDomainPrompt(activeDomain) : undefined; - const response = await chatMessageCmd(id, aiMessage, provider, systemPrompt); + // Backend auto-redacts PII in both message text and attachments before sending to AI. + const response = await chatMessageCmd(id, message, logFileIds, provider, systemPrompt); + + // Update the user bubble with what was actually stored (may be auto-redacted). + // Fall back to the original message if user_message is absent (older backend builds). + const suffix = fileNames.length > 0 ? `\n📎 ${fileNames.join(", ")}` : ""; + updateMessageContent(userMsg.id, (response.user_message ?? message) + suffix); + const assistantMsg: TriageMessage = { id: `asst-${Date.now()}`, issue_id: id, diff --git a/src/stores/sessionStore.ts b/src/stores/sessionStore.ts index 348425cd..3ee9d93a 100644 --- a/src/stores/sessionStore.ts +++ b/src/stores/sessionStore.ts @@ -14,6 +14,7 @@ interface SessionState { startSession: (issue: Issue) => void; addMessage: (message: TriageMessage) => void; + updateMessageContent: (id: string, content: string) => void; setPiiSpans: (spans: PiiSpan[]) => void; setApprovedRedactions: (spans: PiiSpan[]) => void; setWhyLevel: (level: number) => void; @@ -40,6 +41,10 @@ export const useSessionStore = create((set) => ({ ...initialState, startSession: (issue) => set({ currentIssue: issue, messages: [], currentWhyLevel: 1, activeDomain: issue.category }), addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })), + updateMessageContent: (id, content) => + set((state) => ({ + messages: state.messages.map((m) => (m.id === id ? { ...m, content } : m)), + })), setPiiSpans: (spans) => set({ piiSpans: spans }), setApprovedRedactions: (spans) => set({ approvedRedactions: spans }), setWhyLevel: (level) => set({ currentWhyLevel: level }),