tftsr-devops_investigation/node_modules/micromark-util-sanitize-uri/index.js
Shaun Arman 8839075805 feat: initial implementation of TFTSR IT Triage & RCA application
Implements Phases 1-8 of the TFTSR implementation plan.

Rust backend (Tauri 2.x, src-tauri/):
- Multi-provider AI: OpenAI-compatible, Anthropic, Gemini, Mistral, Ollama
- PII detection engine: 11 regex patterns with overlap resolution
- SQLCipher AES-256 encrypted database with 10 versioned migrations
- 28 Tauri IPC commands for triage, analysis, document, and system ops
- Ollama: hardware probe, model recommendations, pull/delete with events
- RCA and blameless post-mortem Markdown document generators
- PDF export via printpdf
- Audit log: SHA-256 hash of every external data send
- Integration stubs for Confluence, ServiceNow, Azure DevOps (v0.2)

Frontend (React 18 + TypeScript + Vite, src/):
- 9 pages: full triage workflow NewIssue→LogUpload→Triage→Resolution→RCA→Postmortem→History+Settings
- 7 components: ChatWindow, TriageProgress, PiiDiffViewer, DocEditor, HardwareReport, ModelSelector, UI primitives
- 3 Zustand stores: session, settings (persisted), history
- Type-safe tauriCommands.ts matching Rust backend types exactly
- 8 IT domain system prompts (Linux, Windows, Network, K8s, DB, Virt, HW, Obs)

DevOps:
- .woodpecker/test.yml: rustfmt, clippy, cargo test, tsc, vitest on every push
- .woodpecker/release.yml: linux/amd64 + linux/arm64 builds, Gogs release upload

Verified:
- cargo check: zero errors
- tsc --noEmit: zero errors
- vitest run: 13/13 unit tests passing

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 22:36:25 -05:00

107 lines
3.1 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { asciiAlphanumeric } from 'micromark-util-character';
import { encode } from 'micromark-util-encode';
/**
* Make a value safe for injection as a URL.
*
* This encodes unsafe characters with percent-encoding and skips already
* encoded sequences (see `normalizeUri`).
* Further unsafe characters are encoded as character references (see
* `micromark-util-encode`).
*
* A regex of allowed protocols can be given, in which case the URL is
* sanitized.
* For example, `/^(https?|ircs?|mailto|xmpp)$/i` can be used for `a[href]`, or
* `/^https?$/i` for `img[src]` (this is what `github.com` allows).
* If the URL includes an unknown protocol (one not matched by `protocol`, such
* as a dangerous example, `javascript:`), the value is ignored.
*
* @param {string | null | undefined} url
* URI to sanitize.
* @param {RegExp | null | undefined} [protocol]
* Allowed protocols.
* @returns {string}
* Sanitized URI.
*/
export function sanitizeUri(url, protocol) {
const value = encode(normalizeUri(url || ''));
if (!protocol) {
return value;
}
const colon = value.indexOf(':');
const questionMark = value.indexOf('?');
const numberSign = value.indexOf('#');
const slash = value.indexOf('/');
if (
// If there is no protocol, its relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, its not a protocol.
slash > -1 && colon > slash || questionMark > -1 && colon > questionMark || numberSign > -1 && colon > numberSign ||
// It is a protocol, it should be allowed.
protocol.test(value.slice(0, colon))) {
return value;
}
return '';
}
/**
* Normalize a URL.
*
* Encode unsafe characters with percent-encoding, skipping already encoded
* sequences.
*
* @param {string} value
* URI to normalize.
* @returns {string}
* Normalized URI.
*/
export function normalizeUri(value) {
/** @type {Array<string>} */
const result = [];
let index = -1;
let start = 0;
let skip = 0;
while (++index < value.length) {
const code = value.charCodeAt(index);
/** @type {string} */
let replace = '';
// A correct percent encoded value.
if (code === 37 && asciiAlphanumeric(value.charCodeAt(index + 1)) && asciiAlphanumeric(value.charCodeAt(index + 2))) {
skip = 2;
}
// ASCII.
else if (code < 128) {
if (!/[!#$&-;=?-Z_a-z~]/.test(String.fromCharCode(code))) {
replace = String.fromCharCode(code);
}
}
// Astral.
else if (code > 55_295 && code < 57_344) {
const next = value.charCodeAt(index + 1);
// A correct surrogate pair.
if (code < 56_320 && next > 56_319 && next < 57_344) {
replace = String.fromCharCode(code, next);
skip = 1;
}
// Lone surrogate.
else {
replace = "\uFFFD";
}
}
// Unicode.
else {
replace = String.fromCharCode(code);
}
if (replace) {
result.push(value.slice(start, index), encodeURIComponent(replace));
start = index + skip + 1;
replace = '';
}
if (skip) {
index += skip;
skip = 0;
}
}
return result.join('') + value.slice(start);
}