All checks were successful
Test / rust-fmt-check (pull_request) Successful in 1m20s
Test / frontend-typecheck (pull_request) Successful in 1m37s
Test / frontend-tests (pull_request) Successful in 1m35s
Test / rust-clippy (pull_request) Successful in 3m11s
PR Review Automation / review (pull_request) Successful in 4m22s
Test / rust-tests (pull_request) Successful in 4m28s
Two credential patterns were missing from the PiiDetector, confirmed by live audit log showing was_pii_redacted: false with plaintext creds: 1. Abbreviated key form (pass: abc123!!): the pattern only matched password|passwd|pwd. Added pass, passphrase, secret with a word boundary to prevent substring false positives (bypass:, compass:). 2. Natural language form (Is the password password123 good): added a second Password sub-pattern for keyword-adjacent values without a key separator. Value must contain a digit or special char to avoid flagging plain words (password strength, password policy). 5 new regression tests added. 233/233 Rust tests pass.
187 lines
5.7 KiB
Rust
187 lines
5.7 KiB
Rust
use crate::pii::patterns::get_patterns;
|
|
use crate::pii::{PiiSpan, PiiType};
|
|
use regex::Regex;
|
|
|
|
pub struct PiiDetector {
|
|
patterns: Vec<(PiiType, Regex)>,
|
|
}
|
|
|
|
impl PiiDetector {
|
|
pub fn new() -> Self {
|
|
PiiDetector {
|
|
patterns: get_patterns(),
|
|
}
|
|
}
|
|
|
|
pub fn detect(&self, text: &str) -> Vec<PiiSpan> {
|
|
let mut spans: Vec<PiiSpan> = Vec::new();
|
|
|
|
for (pii_type, regex) in &self.patterns {
|
|
for mat in regex.find_iter(text) {
|
|
spans.push(PiiSpan::new(
|
|
pii_type.clone(),
|
|
mat.start(),
|
|
mat.end(),
|
|
mat.as_str().to_string(),
|
|
));
|
|
}
|
|
}
|
|
|
|
// Sort by start position
|
|
spans.sort_by_key(|s| s.start);
|
|
|
|
// Remove overlapping spans (keep longer one)
|
|
let mut filtered: Vec<PiiSpan> = Vec::new();
|
|
for span in spans {
|
|
if let Some(last) = filtered.last() {
|
|
if span.start < last.end {
|
|
// Overlap: keep the longer one
|
|
if span.end - span.start > last.end - last.start {
|
|
filtered.pop();
|
|
filtered.push(span);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
filtered.push(span);
|
|
}
|
|
|
|
filtered
|
|
}
|
|
}
|
|
|
|
impl Default for PiiDetector {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_detect_ipv4() {
|
|
let detector = PiiDetector::new();
|
|
let text = "Connected to 192.168.1.100 from 10.0.0.1";
|
|
let spans = detector.detect(text);
|
|
let ipv4_spans: Vec<_> = spans.iter().filter(|s| s.pii_type == "IPv4").collect();
|
|
assert!(!ipv4_spans.is_empty());
|
|
assert!(ipv4_spans.iter().any(|s| s.original == "192.168.1.100"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_email() {
|
|
let detector = PiiDetector::new();
|
|
let text = "User admin@example.com logged in";
|
|
let spans = detector.detect(text);
|
|
assert!(spans.iter().any(|s| s.pii_type == "Email"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_bearer_token() {
|
|
let detector = PiiDetector::new();
|
|
let text = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.test";
|
|
let spans = detector.detect(text);
|
|
assert!(spans.iter().any(|s| s.pii_type == "Bearer"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_password_keyword() {
|
|
let detector = PiiDetector::new();
|
|
// Full keyword forms
|
|
assert!(detector
|
|
.detect("password: hunter2")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
assert!(detector
|
|
.detect("passwd=hunter2")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
assert!(detector
|
|
.detect("pwd: hunter2")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_pass_abbreviation() {
|
|
let detector = PiiDetector::new();
|
|
// Abbreviated form used in credential files (was the failing case)
|
|
let text = "user: alpha\npass: abc123!!";
|
|
let spans = detector.detect(text);
|
|
assert!(
|
|
spans.iter().any(|s| s.pii_type == "Password"),
|
|
"Expected Password span for 'pass: abc123!!' — got: {spans:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_secret_keyword() {
|
|
let detector = PiiDetector::new();
|
|
assert!(detector
|
|
.detect("secret: mysecretvalue")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
assert!(detector
|
|
.detect("passphrase: correct horse battery staple")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_detect_password_natural_language() {
|
|
let detector = PiiDetector::new();
|
|
// Direct juxtaposition: "password <value>" (was the second failing case)
|
|
let spans = detector.detect("Is the password password123 good");
|
|
assert!(
|
|
spans.iter().any(|s| s.pii_type == "Password"),
|
|
"Expected Password span for natural-language 'password password123' — got: {spans:?}"
|
|
);
|
|
// "password is X"
|
|
assert!(detector
|
|
.detect("my password is hunter2")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"));
|
|
// Value must have digit or special — plain words should not trigger
|
|
assert!(
|
|
!detector
|
|
.detect("password strength")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"),
|
|
"False positive: 'password strength' should not match"
|
|
);
|
|
assert!(
|
|
!detector
|
|
.detect("password policy")
|
|
.iter()
|
|
.any(|s| s.pii_type == "Password"),
|
|
"False positive: 'password policy' should not match"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_password_no_false_positive_bypass() {
|
|
let detector = PiiDetector::new();
|
|
// "bypass" contains "pass" as a substring — must NOT match
|
|
let spans = detector.detect("bypass: enabled");
|
|
assert!(
|
|
!spans.iter().any(|s| s.pii_type == "Password"),
|
|
"False positive: 'bypass:' should not match Password pattern"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_overlap() {
|
|
let detector = PiiDetector::new();
|
|
let text = "IP: 192.168.1.1 user: test@test.com";
|
|
let spans = detector.detect(text);
|
|
// Verify no two spans overlap
|
|
for i in 0..spans.len() {
|
|
for j in (i + 1)..spans.len() {
|
|
assert!(spans[i].end <= spans[j].start, "Spans overlap!");
|
|
}
|
|
}
|
|
}
|
|
}
|