tftsr-devops_investigation/src-tauri/src/pii/patterns.rs
Shaun Arman 0a25ca7692 fix(pii): remove lookahead from hostname regex, fix fmt in analysis test
Rust's `regex` crate does not support lookaround assertions. The hostname
pattern `(?=.{1,253}\b)` caused a panic on every `PiiDetector::new()` call,
failing all four PII detector tests in CI (rust-fmt-check, rust-clippy,
rust-tests). Removed the lookahead; the remaining pattern correctly matches
valid FQDNs without the RFC 1035 length pre-check.

Also reformatted analysis.rs:253 to satisfy `rustfmt` (line break after `=`).

All 127 Rust tests pass and `cargo fmt --check` and `cargo clippy -- -D
warnings` are clean.
2026-04-05 09:59:19 -05:00

85 lines
2.8 KiB
Rust

use crate::pii::PiiType;
use regex::Regex;
/// Returns a vector of (PiiType, compiled Regex) pairs for all supported PII patterns.
pub fn get_patterns() -> Vec<(PiiType, Regex)> {
vec![
// URL with credentials (check before email to avoid partial matches)
(
PiiType::UrlWithCreds,
Regex::new(r"[a-z][a-z0-9+\-.]*://[^:@/\s]+:[^@/\s]+@").unwrap(),
),
// Bearer token
(
PiiType::BearerToken,
Regex::new(r"(?i)bearer\s+[A-Za-z0-9\-._~+/]+=*").unwrap(),
),
// API key
(
PiiType::ApiKey,
Regex::new(
r"(?i)(?:api[_\-]?key|apikey|access[_\-]?token)\s*[=:]\s*[A-Za-z0-9\-._~+/]{16,}",
)
.unwrap(),
),
// Password
(
PiiType::Password,
Regex::new(r"(?i)(?:password|passwd|pwd)\s*[=:]\s*\S+").unwrap(),
),
// SSN (check before phone to avoid partial matches)
(
PiiType::Ssn,
Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").unwrap(),
),
// Credit card
(
PiiType::CreditCard,
Regex::new(
r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12}|3(?:0[0-5]|[68][0-9])[0-9]{11}|35(?:2[89]|[3-8][0-9])[0-9]{12})\b",
)
.unwrap(),
),
// Email
(
PiiType::Email,
Regex::new(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b").unwrap(),
),
// MAC address
(
PiiType::MacAddress,
Regex::new(r"\b(?:[0-9A-Fa-f]{2}[:\-]){5}[0-9A-Fa-f]{2}\b").unwrap(),
),
// IPv6 (check before IPv4 since IPv6 can contain IPv4-like segments)
(
PiiType::Ipv6,
Regex::new(
r"(?i)\b(?:[0-9a-f]{1,4}:){7}[0-9a-f]{1,4}\b|(?i)\b(?:[0-9a-f]{1,4}:){1,7}:\b|(?i)\b(?:[0-9a-f]{1,4}:){1,6}:[0-9a-f]{1,4}\b|(?i)\b::(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4}\b|(?i)\b[0-9a-f]{1,4}::(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4}\b",
)
.unwrap(),
),
// IPv4
(
PiiType::Ipv4,
Regex::new(
r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
)
.unwrap(),
),
// Phone number
(
PiiType::PhoneNumber,
Regex::new(r"\b(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b")
.unwrap(),
),
// Hostname / FQDN
(
PiiType::Hostname,
Regex::new(
r"\b(?:[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?\.)+[A-Za-z]{2,63}\b",
)
.unwrap(),
),
]
}