mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-06-20 05:09:30 +00:00
158 lines
7.6 KiB
Python
158 lines
7.6 KiB
Python
"""Nemotron Mini client for recommendation explanations.
|
|
|
|
Talks to a local Ollama server (the one started by assets/setup.sh) using
|
|
the `nemotron-mini` model. Returns short natural-language explanations of
|
|
why a set of items was recommended for a user.
|
|
|
|
The serving stack matches the one configured by setup.sh and used by
|
|
assets/app.py (the web UI) — Ollama on port 11434, model `nemotron-mini`.
|
|
Both can be overridden with OLLAMA_URL and NEMOTRON_MODEL.
|
|
"""
|
|
|
|
import os
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
OLLAMA_URL = os.environ.get('OLLAMA_URL', 'http://localhost:11434')
|
|
NEMOTRON_MODEL = os.environ.get('NEMOTRON_MODEL', 'nemotron-mini')
|
|
|
|
|
|
def build_explanation_prompt(user_history: list[dict], recommendations: list[dict]) -> str:
|
|
"""Build a prompt for Nemotron to output a strict single-sentence rationale.
|
|
|
|
Output must match the schema:
|
|
"Based on your <attr-1>, <attr-2>, and <attr-3>, these picks share
|
|
<characteristic-1>, <characteristic-2>, and <characteristic-3>."
|
|
|
|
Hard rules enforce no preamble, no greeting, no price, no generic phrasing.
|
|
"""
|
|
history_titles = [h.get('title', 'Unknown') for h in user_history[-5:]]
|
|
rec_titles = [r.get('title', 'Unknown') for r in recommendations]
|
|
history_str = "\n".join(f"- {t}" for t in history_titles)
|
|
recs_str = "\n".join(f"- {t}" for t in rec_titles)
|
|
|
|
return (
|
|
"Customer's recent dresses:\n"
|
|
f"{history_str}\n\n"
|
|
"Recommended dresses:\n"
|
|
f"{recs_str}\n\n"
|
|
"Output 3 numbered lines. Each line states a SPECIFIC style pattern "
|
|
"observed across the customer's history and (optionally) shows how the "
|
|
"recommendations match that pattern. The voice is impersonal — describe "
|
|
"the pattern factually, do not address 'you' or 'your'.\n\n"
|
|
"Two acceptable line formats:\n"
|
|
" Format A — observation only:\n"
|
|
" <history pattern>\n"
|
|
" Format B — observation + match:\n"
|
|
" <history pattern> → <how the recs match>\n\n"
|
|
"Use the literal Unicode arrow → (NOT '->' or '=>' or hyphens) for the "
|
|
"transition in Format B.\n\n"
|
|
"Use SPECIFIC style values, not the abstract category word. Examples of "
|
|
"allowed values: A-line, sheath, wrap, bodycon, fit-and-flare, maxi, midi, "
|
|
"mini, shift, V-neck, scoop, halter, off-shoulder, square neck, flutter "
|
|
"sleeve, cap sleeve, puff sleeve, short sleeve, lace, chiffon, satin, "
|
|
"smocked, ruffled, tiered, pleated, floral, polka-dot, striped, tie-dye, "
|
|
"color-block, solid.\n\n"
|
|
"EXAMPLES of the format — these are SAMPLE bullets only. You MUST "
|
|
"generate DIFFERENT content tailored to the actual items above:\n"
|
|
" [1] Short flowy A-line cuts are common across history → recs continue this\n"
|
|
" [2] V-neck and square-neck necklines dominate past picks\n"
|
|
" [3] Floral and ruffle detailing recur → 3 of 4 recs share both\n"
|
|
" [1] Maxi lengths appear repeatedly → recs lean short and midi\n"
|
|
" [2] Past purchases skew tie-dye → 2 picks feature similar prints\n"
|
|
" [3] Smocked bodices and elastic waists recurring → all 4 recs match\n"
|
|
" [1] Polka-dot and solid blacks recur in history → 3 of 4 recs share\n"
|
|
" [2] Halter and off-shoulder dominant in past picks\n"
|
|
" [3] Cap-sleeve and short-sleeve recur → recs match in 4 of 4\n\n"
|
|
"Rules:\n"
|
|
" - Output EXACTLY 3 lines. Not 2, not 4 — exactly 3.\n"
|
|
" - Each line starts with [1], [2], or [3] respectively. Nothing else.\n"
|
|
" - Each line follows Format A (observation only) or Format B (observation → match).\n"
|
|
" - Mix the formats across the 3 lines for variety.\n"
|
|
" - Vary the wording per call — do NOT repeat the exact example bullets.\n"
|
|
" - When using Format B, use the Unicode arrow → (never '->' or '=>').\n"
|
|
" - 8-15 words per line. Concise and factual.\n"
|
|
" - NEVER use 'you' or 'your'. Stay impersonal.\n"
|
|
" - Identify patterns that GENUINELY appear in the customer's history.\n"
|
|
" - Use specific values (e.g. 'A-line' not 'silhouette').\n"
|
|
" - No greetings, no 'Sure', no 'Here', no preamble.\n"
|
|
" - DO NOT mention occasions/events/vibes — no 'casual', 'work', "
|
|
"'cocktail', 'brunch', 'wedding', 'summer', 'evening', 'perfect for' etc.\n"
|
|
" - No vague phrases ('matches your style', 'fits your taste').\n"
|
|
" - NEVER wrap item names in square brackets [].\n"
|
|
" - DO NOT include item titles in the bullets (no 'on dresses like XYZ').\n"
|
|
" - DO NOT meta-reference the data — no 'in the customer's history and "
|
|
"recommendations', 'across both collections', 'in customer history'.\n"
|
|
" - NEVER output words like 'Cut/silhouette:', 'Pairing verb:', "
|
|
"'Trait A:' — those are prompt structure, not output.\n"
|
|
" - No trailing colons.\n"
|
|
" - No price, ratings, shopping logistics.\n"
|
|
" - Tailor every line to the actual items above — don't reuse the example traits.\n"
|
|
)
|
|
|
|
|
|
def generate_explanation(
|
|
user_history: list[dict],
|
|
recommendations: list[dict],
|
|
ollama_url: str = OLLAMA_URL,
|
|
model: str = NEMOTRON_MODEL,
|
|
timeout: float = 10.0,
|
|
) -> Optional[str]:
|
|
"""Generate a recommendation explanation via Ollama / Nemotron Mini."""
|
|
payload = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": "You are a concise fashion recommendation assistant."},
|
|
{"role": "user", "content": build_explanation_prompt(user_history, recommendations)},
|
|
],
|
|
"stream": False,
|
|
# 0.5 was too rigid — model locked to specific example bullets and
|
|
# truncated to 2 lines. 0.65 keeps format adherence + enough variety
|
|
# to vary content per call. num_predict caps output so the model
|
|
# finishes all 3 bullets rather than stopping mid-stream.
|
|
"options": {"temperature": 0.65, "top_p": 0.9, "num_predict": 300},
|
|
}
|
|
try:
|
|
resp = requests.post(f"{ollama_url}/api/chat", json=payload, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.json()["message"]["content"].strip()
|
|
except requests.exceptions.ConnectionError:
|
|
return None # Ollama not running — caller falls back to template
|
|
except Exception as e:
|
|
return f"[Explanation unavailable: {e}]"
|
|
|
|
|
|
def get_explanation_or_fallback(
|
|
user_history: list[dict],
|
|
recommendations: list[dict],
|
|
fallback: str,
|
|
) -> tuple[str, bool]:
|
|
"""Try Nemotron via Ollama; fall back to caller-provided template if unreachable.
|
|
|
|
Returns (explanation, is_llm_generated).
|
|
"""
|
|
explanation = generate_explanation(user_history, recommendations)
|
|
if explanation is not None:
|
|
return explanation, True
|
|
return fallback, False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
history = [
|
|
{"title": "Women's Floral Summer Maxi Dress", "price": 34.99},
|
|
{"title": "Elegant Lace Cocktail Dress", "price": 52.00},
|
|
{"title": "Casual V-Neck T-Shirt Dress", "price": 24.99},
|
|
]
|
|
recs = [
|
|
{"title": "Boho Beach Wrap Dress", "price": 29.99, "score": 0.85},
|
|
{"title": "Vintage A-Line Swing Dress", "price": 39.99, "score": 0.82},
|
|
{"title": "Sleeveless Midi Sundress", "price": 32.00, "score": 0.79},
|
|
]
|
|
print(f"Calling {NEMOTRON_MODEL} at {OLLAMA_URL}...")
|
|
explanation = generate_explanation(history, recs)
|
|
if explanation:
|
|
print(f"\nExplanation:\n{explanation}")
|
|
else:
|
|
print(f"\nOllama not reachable at {OLLAMA_URL}. Start it with `ollama serve`.")
|