From e6d1965342b5e1c71d08996295e80d63dfc5e2db Mon Sep 17 00:00:00 2001 From: Shaun Arman Date: Tue, 14 Apr 2026 20:26:05 -0500 Subject: [PATCH] security: address all issues from automated PR review - Add missing CQL escaping for &, |, +, - characters - Improve escape_wiql() to escape more dangerous characters: ", \, (, ), ~, *, ?, ;, = - Sanitize HTML in excerpts using strip_html_tags() to prevent XSS - Add unit tests for escape_wiql, escape_cql, canonicalize_url functions - Document expand_query() behavior (always returns at least original query) - All tests pass (158/158), cargo fmt and clippy pass --- .../src/integrations/azuredevops_search.rs | 92 ++++++++++++++++++- .../src/integrations/confluence_search.rs | 53 +++++++++-- src-tauri/src/integrations/query_expansion.rs | 3 +- 3 files changed, 138 insertions(+), 10 deletions(-) diff --git a/src-tauri/src/integrations/azuredevops_search.rs b/src-tauri/src/integrations/azuredevops_search.rs index 993fbdb3..cc662b8f 100644 --- a/src-tauri/src/integrations/azuredevops_search.rs +++ b/src-tauri/src/integrations/azuredevops_search.rs @@ -16,6 +16,29 @@ fn escape_wiql(s: &str) -> String { .replace('=', "\\=") } +/// Basic HTML tag stripping to prevent XSS in excerpts +fn strip_html_tags(html: &str) -> String { + let mut result = String::new(); + let mut in_tag = false; + + for ch in html.chars() { + match ch { + '<' => in_tag = true, + '>' => in_tag = false, + _ if !in_tag => result.push(ch), + _ => {} + } + } + + // Clean up whitespace + result + .split_whitespace() + .collect::>() + .join(" ") + .trim() + .to_string() +} + /// Search Azure DevOps Wiki for content matching the query pub async fn search_wiki( org_url: &str, @@ -81,9 +104,7 @@ pub async fn search_wiki( path ); - let excerpt = item["content"] - .as_str() - .unwrap_or("") + let excerpt = strip_html_tags(item["content"].as_str().unwrap_or("")) .chars() .take(300) .collect::(); @@ -298,3 +319,68 @@ async fn fetch_work_item_details( source: "Azure DevOps".to_string(), }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_escape_wiql_escapes_single_quotes() { + assert_eq!(escape_wiql("test'single"), "test''single"); + } + + #[test] + fn test_escape_wiql_escapes_double_quotes() { + assert_eq!(escape_wiql("test\"double"), "test\\\\\"double"); + } + + #[test] + fn test_escape_wiql_escapes_backslash() { + assert_eq!(escape_wiql("test\\backslash"), r#"test\\backslash"#); + } + + #[test] + fn test_escape_wiql_escapes_parens() { + assert_eq!(escape_wiql("test(paren"), r#"test\(paren"#); + assert_eq!(escape_wiql("test)paren"), r#"test\)paren"#); + } + + #[test] + fn test_escape_wiql_escapes_tilde() { + assert_eq!(escape_wiql("test~tilde"), r#"test\~tilde"#); + } + + #[test] + fn test_escape_wiql_escapes_asterisk() { + assert_eq!(escape_wiql("test*star"), r#"test\*star"#); + } + + #[test] + fn test_escape_wiql_escapes_question() { + assert_eq!(escape_wiql("test?quest"), r#"test\?quest"#); + } + + #[test] + fn test_escape_wiql_escapes_semicolon() { + assert_eq!(escape_wiql("test;semi"), r#"test\;semi"#); + } + + #[test] + fn test_escape_wiql_escapes_equals() { + assert_eq!(escape_wiql("test=equal"), r#"test\=equal"#); + } + + #[test] + fn test_escape_wiql_no_special_chars() { + assert_eq!(escape_wiql("simple query"), "simple query"); + } + + #[test] + fn test_strip_html_tags() { + let html = "

Hello world!

"; + assert_eq!(strip_html_tags(html), "Hello world!"); + + let html2 = "

Title

Content

"; + assert_eq!(strip_html_tags(html2), "TitleContent"); + } +} diff --git a/src-tauri/src/integrations/confluence_search.rs b/src-tauri/src/integrations/confluence_search.rs index 66528d16..e5abb620 100644 --- a/src-tauri/src/integrations/confluence_search.rs +++ b/src-tauri/src/integrations/confluence_search.rs @@ -31,6 +31,10 @@ fn escape_cql(s: &str) -> String { .replace(')', "\\)") .replace('(', "\\(") .replace('~', "\\~") + .replace('&', "\\&") + .replace('|', "\\|") + .replace('+', "\\+") + .replace('-', "\\-") } /// Search Confluence for content matching the query @@ -100,12 +104,10 @@ pub async fn search_confluence( base_url.to_string() }; - let excerpt = item["excerpt"] - .as_str() - .unwrap_or("") - .to_string() - .replace("", "") - .replace("", ""); + let excerpt = strip_html_tags(item["excerpt"].as_str().unwrap_or("")) + .chars() + .take(300) + .collect::(); let content = if let Some(content_id) = id { fetch_page_content(base_url, content_id, &cookie_header) @@ -217,4 +219,43 @@ mod tests { let html2 = "

Title

Content

"; assert_eq!(strip_html_tags(html2), "TitleContent"); } + + #[test] + fn test_escape_cql_escapes_special_chars() { + assert_eq!(escape_cql("test\"quote"), r#"test\"quote"#); + assert_eq!(escape_cql("test(paren"), r#"test\(paren"#); + assert_eq!(escape_cql("test)paren"), r#"test\)paren"#); + assert_eq!(escape_cql("test~tilde"), r#"test\~tilde"#); + assert_eq!(escape_cql("test&and"), r#"test\&and"#); + assert_eq!(escape_cql("test|or"), r#"test\|or"#); + assert_eq!(escape_cql("test+plus"), r#"test\+plus"#); + assert_eq!(escape_cql("test-minus"), r#"test\-minus"#); + } + + #[test] + fn test_escape_cql_no_special_chars() { + assert_eq!(escape_cql("simple query"), "simple query"); + } + + #[test] + fn test_canonicalize_url_removes_fragment() { + assert_eq!( + canonicalize_url("https://example.com/page#section"), + "https://example.com/page" + ); + } + + #[test] + fn test_canonicalize_url_removes_query() { + assert_eq!( + canonicalize_url("https://example.com/page?param=value"), + "https://example.com/page" + ); + } + + #[test] + fn test_canonicalize_url_handles_malformed() { + // Malformed URLs fall back to original + assert_eq!(canonicalize_url("not a url"), "not a url"); + } } diff --git a/src-tauri/src/integrations/query_expansion.rs b/src-tauri/src/integrations/query_expansion.rs index cd1a9b5b..a765cc07 100644 --- a/src-tauri/src/integrations/query_expansion.rs +++ b/src-tauri/src/integrations/query_expansion.rs @@ -76,7 +76,8 @@ fn get_product_synonyms(query: &str) -> Vec { /// /// # Returns /// A vector of query strings to search, with the original query first -/// followed by expanded variations +/// followed by expanded variations. Returns empty only if input is empty or +/// whitespace-only. Otherwise, always returns at least the original query. pub fn expand_query(query: &str) -> Vec { if query.trim().is_empty() { return Vec::new();