"""Enterprise Fashion Recommender — FastAPI + vanilla HTML/CSS Serves real HLLM-trained recommendations via FAISS nearest-neighbor search, re-ranked by the LightGBM lambdarank model trained in Step 5. Item prices shown in the UI are the PPO pricing agent's optimized prices by default; pass --static-prices to display the original catalog (MSRP) prices instead. Default: python app.py (HLLM retrieval + LightGBM rerank, PPO-optimized prices) Retriever only: python app.py --retriever-only (HLLM retrieval, no rerank — for ablation) Static prices: python app.py --static-prices (display original MSRP instead of PPO-optimized prices) Opens at http://localhost:7860 """ import argparse import os import subprocess import time from datetime import datetime from pathlib import Path import faiss import lightgbm as lgb import numpy as np import pandas as pd import uvicorn from fastapi import FastAPI from fastapi.responses import HTMLResponse from prepare_data import looks_non_dress def _parse_args() -> argparse.Namespace: ap = argparse.ArgumentParser(description=__doc__.split('\n\n', 1)[0]) ap.add_argument('--retriever-only', action='store_true', help='Skip the LightGBM re-ranker. Returns FAISS top-K directly.') ap.add_argument('--static-prices', action='store_true', help='Show the original MSRP from the catalog instead of ' 'the PPO pricing agent\'s optimized prices.') ap.add_argument('--host', default='0.0.0.0') ap.add_argument('--port', type=int, default=7860) return ap.parse_args() ARGS = _parse_args() USE_RERANKER = not ARGS.retriever_only USE_OPTIMIZED_PRICES = not ARGS.static_prices WORKSPACE = os.environ.get('PLAYBOOK_WORKSPACE', os.path.expanduser('~')) DATA_DIR = os.path.join(WORKSPACE, "data") MODELS_DIR = Path(WORKSPACE) / "models" # Optional Nemotron client (gracefully degrades if server not running) try: from nemotron_client import get_explanation_or_fallback HAS_NEMOTRON_CLIENT = True except ImportError: HAS_NEMOTRON_CLIENT = False # --- Load item metadata --- print("Loading data...") item_meta = pd.read_parquet(os.path.join(DATA_DIR, "processed", "dress_metadata.parquet")) interactions = pd.read_parquet(os.path.join(DATA_DIR, "processed", "dress_interactions.parquet")) print(f" Raw: {len(item_meta):,} items, {len(interactions):,} interactions") # Serve-time non-dress filter. The upstream `prepare_data.py` filter is a # loose substring match that admits ~244 items obviously not dresses # (socks, jackets, men's pants — see findings.md). Trust the upstream # positive selection; just subtract obvious false-positives here. _drop_mask = item_meta['title'].fillna('').apply(looks_non_dress) _n_dropped = int(_drop_mask.sum()) item_meta = item_meta.loc[~_drop_mask].reset_index(drop=True) _valid_ids = set(item_meta['item_id']) interactions = interactions[interactions['item_id'].isin(_valid_ids)] interactions = interactions.sort_values(['user_id', 'timestamp']) print(f" Filtered: dropped {_n_dropped:,} non-dress items; " f"{len(item_meta):,} items, {len(interactions):,} interactions remain") # --- PPO-optimized prices (default) ----------------------------------------- # The pricing agent's trained PPO policy chooses a per-item price multiplier; # we evaluate it once at day=0 on a fresh inventory state and cache the result # so /api/recommend stays a cheap read. --static-prices skips this entirely # and falls back to the catalog's MSRP. optimized_prices: dict[str, float] = {} if USE_OPTIMIZED_PRICES: from pricing_agent import ( PRICING_CONFIG, InventoryState, PPOPolicy, _build_actor_critic, load_amazon_dresses_catalog, load_checkpoint, ) PPO_CKPT = MODELS_DIR / "pricing_ppo" / "policy.pt" if not PPO_CKPT.exists(): raise FileNotFoundError( f"PPO pricing checkpoint not found at {PPO_CKPT}.\n" "Train it first (`bash assets/pricing_agent.sh`) " "or launch with --static-prices to display the original MSRP." ) print(f"Loading PPO pricing policy from {PPO_CKPT}...") import torch # local; pricing_agent imports torch lazily _ppo_ckpt = load_checkpoint(PPO_CKPT) _multipliers = np.asarray(_ppo_ckpt["multipliers"], dtype=np.float64) _ppo_catalog = load_amazon_dresses_catalog(n_items=0, seed=0) _ppo_state = InventoryState.initialize(_ppo_catalog, PRICING_CONFIG, seed=0) _ppo_device = "cuda" if torch.cuda.is_available() else "cpu" _ppo_net = _build_actor_critic(len(_multipliers)).to(_ppo_device) _ppo_net.load_state_dict(_ppo_ckpt["state_dict"]) _ppo_policy = PPOPolicy( net=_ppo_net, multipliers=_multipliers, device=_ppo_device, horizon=_ppo_ckpt["config"]["horizon"], price_norm=float(_ppo_state.base_prices.max()), inv_norm=float(_ppo_state._initial_inventories.max()), greedy=True, ) _ppo_day0 = _ppo_policy.select_prices(_ppo_state, day=0) optimized_prices = { str(iid): float(p) for iid, p in zip(_ppo_catalog["item_id"].tolist(), _ppo_day0) } print(f" Optimized prices computed for {len(optimized_prices):,} items") else: print("Static prices mode (--static-prices): showing catalog MSRP.") item_lookup = {} for _, row in item_meta.iterrows(): iid = row['item_id'] base = float(row['price']) if pd.notna(row.get('price')) else None if USE_OPTIMIZED_PRICES and str(iid) in optimized_prices: display_price = optimized_prices[str(iid)] else: display_price = base item_lookup[iid] = { 'title': str(row.get('title', ''))[:55], 'price': display_price, 'base_price': base, 'image_url': str(row.get('image_url', '')), } user_histories = {} for uid, group in interactions.groupby('user_id'): user_histories[uid] = group.to_dict('records') good_users_raw = sorted( [(uid, len(h)) for uid, h in user_histories.items() if len(h) >= 10], key=lambda x: -x[1] )[:200] # Each of the 200 displayed users gets a stable, UNIQUE women's name # (no hash collisions). Assigned by alphabetical user_id so it's # deterministic across restarts regardless of interaction-count ties. NAME_POOL = [ 'Aaliyah','Abigail','Adelina','Aisha','Alaia','Alessia','Alina','Amara','Amaya','Amelia', 'Amira','Anais','Andrea','Anika','Anya','Aoife','Aria','Ariana','Astrid','Aurora', 'Ava','Aya','Ayla','Azure','Beatrix','Belen','Bianca','Briar','Brigitte','Calla', 'Camila','Carmen','Cassia','Celeste','Charlotte','Chiara','Chloe','Claire','Clara','Constance', 'Coralie','Dahlia','Daniela','Daphne','Delfina','Devi','Dunja','Eden','Edith','Elena', 'Eliana','Elif','Elin','Elisa','Eloise','Elsa','Ember','Emilia','Emma','Esme', 'Esra','Estelle','Eva','Evelyn','Farah','Fatima','Faye','Felicia','Fiona','Florence', 'Freya','Frida','Gabriela','Genevieve','Giulia','Greta','Gwen','Hadley','Hana','Hannah', 'Harlow','Hazel','Helena','Hira','Iara','Ida','Imani','Imogen','Inara','Ingrid', 'Iris','Isabella','Isadora','Isla','Ivy','Iza','Jade','Jana','Jasmine','Josephine', 'Juliette','June','Juno','Kaia','Kalia','Kara','Karina','Katia','Kavya','Khloe', 'Kira','Kiri','Lailah','Lana','Larisa','Lavinia','Layla','Leila','Lena','Lila', 'Liliana','Lily','Linnea','Lior','Liv','Liya','Lola','Lorena','Luna','Lyla', 'Maeve','Magnolia','Maja','Malia','Manon','Mara','Marcela','Margot','Maria','Marina', 'Marisol','Marlowe','Maya','Mei','Melisa','Mia','Mila','Mira','Miriam','Moana', 'Nadia','Naia','Naomi','Nara','Natasha','Nia','Niamh','Nika','Nina','Nisha', 'Nora','Noor','Nova','Oksana','Olivia','Ophelia','Paloma','Penelope','Petra','Phoebe', 'Priya','Quinn','Quintessa','Rachel','Raina','Raisa','Reema','Renata','Rhea','Riya', 'Romy','Rosalie','Ruby','Saanvi','Saba','Saoirse','Sasha','Selena','Selma','Senna', 'Serena','Shira','Sienna','Simone','Sloan','Sofia','Soleil','Sophia','Stella','Talia', ] assert len(NAME_POOL) == 200, f"NAME_POOL must have exactly 200 entries (has {len(NAME_POOL)})" assert len(set(NAME_POOL)) == len(NAME_POOL), "NAME_POOL has duplicates" _sorted_uids = sorted(uid for uid, _ in good_users_raw) _uid_to_name = {uid: NAME_POOL[i] for i, uid in enumerate(_sorted_uids)} good_users = [(uid, count, _uid_to_name[uid]) for uid, count in good_users_raw] item_popularity = interactions['item_id'].value_counts().index.tolist() # --- Load trained HLLM embeddings + FAISS index --- print("Loading HLLM embeddings...") hllm_embeddings = np.load(os.path.join(DATA_DIR, "processed", "hllm_item_embeddings.npy")).astype(np.float32) hllm_id_map = np.load(os.path.join(DATA_DIR, "processed", "hllm_item_id_map.npy"), allow_pickle=True) # Build item_id → HLLM index mapping (skip padding at index 0) item_to_hllm_idx = {str(iid): i for i, iid in enumerate(hllm_id_map) if iid != '[PAD]'} hllm_idx_to_item = {i: str(iid) for i, iid in enumerate(hllm_id_map) if iid != '[PAD]'} # Build FAISS index over item embeddings (skip padding row 0) # Use all rows including padding for index consistency (HLLM uses 1-indexed) print("Building FAISS index...") faiss_index = faiss.IndexFlatIP(hllm_embeddings.shape[1]) faiss_index.add(hllm_embeddings) print(f" FAISS index: {faiss_index.ntotal:,} vectors, {hllm_embeddings.shape[1]} dims") # Build user embeddings from purchase history (mean of item embeddings) print("Building user embeddings...") user_embeddings = {} for uid, records in user_histories.items(): item_ids = [r['item_id'] for r in records] idxs = [item_to_hllm_idx[iid] for iid in item_ids if iid in item_to_hllm_idx] if idxs: emb = hllm_embeddings[idxs].mean(axis=0) emb = emb / (np.linalg.norm(emb) + 1e-8) user_embeddings[uid] = emb print(f" {len(user_embeddings):,} user embeddings built") # --- Re-ranker (default: LightGBM; bypass with --retriever-only) --- RERANKER = None ITEM_FEAT_ARR = None USER_SCALARS = {} RETRIEVAL_TOP_K = 100 # candidates pulled from FAISS before rerank FINAL_TOP_K = 4 # items returned to the UI (top-4 gives recs more room beside the sidebar) if USE_RERANKER: from train_reranker_lightgbm import ( FEATURE_COLS, build_user_samples, compute_item_stats, compute_user_stats, ) LGBM_PATH = MODELS_DIR / "reranker_lightgbm" / "reranker_lightgbm.txt" if not LGBM_PATH.exists(): raise FileNotFoundError( f"LightGBM re-ranker checkpoint not found at {LGBM_PATH}.\n" "Train it first (`bash assets/train_reranker.sh`) " "or launch with --retriever-only to skip the re-ranker." ) print(f"Loading LightGBM re-ranker from {LGBM_PATH}...") RERANKER = lgb.Booster(model_file=str(LGBM_PATH)) print("Precomputing item / user feature tables for re-ranker...") _t = time.time() _item_stats = compute_item_stats(interactions, item_meta) _item_idx_to_id = [str(i) for i in hllm_id_map] _item_feat_cols = [ 'item_total_purchases', 'item_unique_buyers', 'item_pop_30d', 'item_pop_90d', 'item_pop_180d', 'item_trend', 'item_recency_days', 'item_age_days', 'log_price', 'title_length', 'desc_length', 'has_image', ] ITEM_FEAT_ARR = ( _item_stats.reindex(_item_idx_to_id).fillna(0)[_item_feat_cols] .to_numpy(dtype=np.float32) ) # Build user samples in the format compute_user_stats expects. For inference # we use the FULL history (no leave-last-out), so hist_idxs == idxs. _item_to_idx = {str(iid): i for i, iid in enumerate(hllm_id_map) if iid != '[PAD]'} _samples_for_inference = [] for _uid, _records in user_histories.items(): _seq = [r['item_id'] for r in _records] _ts = [int(r['timestamp']) for r in _records] _idxs = [_item_to_idx[i] for i in _seq if i in _item_to_idx] _ts_kept = [t for i, t in zip(_seq, _ts) if i in _item_to_idx] if _idxs: _samples_for_inference.append((_uid, _idxs, _idxs[-1], _ts_kept)) USER_SCALARS = compute_user_stats(_samples_for_inference, item_meta, _item_idx_to_id) _LOG_PRICE_COL = _item_feat_cols.index('log_price') _USER_HISTORY_HLLM_IDX = { uid: _idxs for (uid, _idxs, _, _) in _samples_for_inference } print(f" Precompute done in {time.time()-_t:.1f}s " f"({RERANKER.num_trees()} trees, {len(FEATURE_COLS)} features, " f"{len(USER_SCALARS):,} user scalar rows)") else: print("Re-ranker disabled (--retriever-only).") def _rerank_candidates( user_id: str, cand_hllm_idxs: np.ndarray, cand_scores: np.ndarray, ) -> np.ndarray: """Score candidates with LightGBM. Returns indices into ``cand_hllm_idxs`` sorted by descending model score. Caller filters / takes top-K. Vectorized over candidates only (one user per request, K~=100). """ history = _USER_HISTORY_HLLM_IDX.get(user_id, []) K = len(cand_hllm_idxs) F = len(FEATURE_COLS) f2p = {c: i for i, c in enumerate(FEATURE_COLS)} X = np.zeros((K, F), dtype=np.float32) # HLLM signals X[:, f2p['hllm_dot_product']] = cand_scores if history: recent = history[-10:] hist_emb = hllm_embeddings[recent] # (h, dim) cand_emb = hllm_embeddings[cand_hllm_idxs] # (K, dim) sims = cand_emb @ hist_emb.T # (K, h) X[:, f2p['hllm_max_hist_sim']] = sims.max(axis=1) X[:, f2p['hllm_avg_hist_sim']] = sims.mean(axis=1) # Item-side item_block = ITEM_FEAT_ARR[cand_hllm_idxs] # (K, 12) for c, name in enumerate(_item_feat_cols): X[:, f2p[name]] = item_block[:, c] # User-side us = USER_SCALARS.get(user_id, {}) for col in ('user_total_purchases', 'user_unique_items', 'user_avg_price', 'user_price_std', 'user_recency_days'): X[:, f2p[col]] = us.get(col, 0.0) # Cross cand_price = np.expm1(item_block[:, _LOG_PRICE_COL]) u_avg = us.get('user_avg_price', 0.0) X[:, f2p['price_ratio']] = cand_price / (u_avg + 1e-8) X[:, f2p['price_diff']] = cand_price - u_avg hist_set = set(history) X[:, f2p['is_repurchase']] = np.fromiter( (1.0 if int(c) in hist_set else 0.0 for c in cand_hllm_idxs), dtype=np.float32, count=K, ) scores = RERANKER.predict(X) return np.argsort(-scores), scores app = FastAPI() _gpu_cache = {'data': None, 'ts': 0} def gpu_stats(): """GPU stats with 5-second cache to avoid nvidia-smi overhead on every request.""" now = time.time() if _gpu_cache['data'] and now - _gpu_cache['ts'] < 5: return _gpu_cache['data'] try: r = subprocess.run( ['nvidia-smi', '--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw', '--format=csv,noheader,nounits', '-i=1'], capture_output=True, text=True, timeout=5) p = r.stdout.strip().split(', ') data = {'util': float(p[0]), 'vram_used': float(p[1])/1024, 'vram_total': float(p[2])/1024, 'temp': float(p[3]), 'power': float(p[4])} except: data = {'util': 0, 'vram_used': 0, 'vram_total': 252, 'temp': 0, 'power': 0} _gpu_cache['data'] = data _gpu_cache['ts'] = now return data @app.get("/api/users") def api_users(): return [{"id": uid, "count": c, "name": name} for uid, c, name in good_users] @app.get("/api/history/{user_id}") def api_history(user_id: str): history = user_histories.get(user_id, [])[-5:] return [{ 'item_id': h['item_id'], 'title': item_lookup.get(h['item_id'], {}).get('title', ''), 'price': item_lookup.get(h['item_id'], {}).get('price'), 'base_price': item_lookup.get(h['item_id'], {}).get('base_price'), 'image_url': item_lookup.get(h['item_id'], {}).get('image_url', ''), 'date': datetime.fromtimestamp(int(h['timestamp'])).strftime('%b %Y'), } for h in history] @app.get("/api/recommend/{user_id}") def api_recommend(user_id: str): t0 = time.time() history = user_histories.get(user_id, []) user_items = set(h['item_id'] for h in history) lat_retriever_ms = 0.0 lat_reranker_ms = 0.0 lat_explainer_ms = 0.0 user_emb = user_embeddings.get(user_id) if user_emb is not None: t = time.time() query = user_emb.reshape(1, -1).astype(np.float32) distances, indices = faiss_index.search(query, RETRIEVAL_TOP_K) lat_retriever_ms = (time.time() - t) * 1000 cand_hllm_idxs = indices[0] cand_scores = distances[0].astype(np.float32) if USE_RERANKER: t = time.time() order, rerank_scores = _rerank_candidates(user_id, cand_hllm_idxs, cand_scores) lat_reranker_ms = (time.time() - t) * 1000 display_scores = rerank_scores method = "HLLM + LightGBM" else: order = np.arange(len(cand_hllm_idxs)) display_scores = cand_scores method = "HLLM" candidates = [] scores = [] for rank in order: idx = int(cand_hllm_idxs[rank]) item_id = hllm_idx_to_item.get(idx) # Require item to be in the filtered metadata (drops non-dress # items that the unfiltered FAISS index still surfaces) and not # already in the user's history. if item_id and item_id in item_lookup and item_id not in user_items: candidates.append(item_id) scores.append(float(display_scores[rank])) if len(candidates) >= FINAL_TOP_K: break else: # Cold-start fallback: popularity (re-ranker requires a user embedding) candidates = [i for i in item_popularity if i not in user_items][:FINAL_TOP_K] scores = [0.97 - i * 0.04 for i in range(len(candidates))] method = "Popularity" recs = [] for item_id, score in zip(candidates, scores): info = item_lookup.get(item_id, {}) recs.append({ 'item_id': item_id, 'title': info.get('title', ''), 'price': info.get('price'), 'base_price': info.get('base_price'), 'image_url': info.get('image_url', ''), 'score': round(score, 3), }) # Build style-focused fallback (used only if Nemotron unreachable) recent_titles = [item_lookup.get(h['item_id'], {}).get('title', '') for h in history[-3:]] recent_titles = [t for t in recent_titles if t] if recent_titles: fallback = ( "These dresses share style characteristics with your recent picks — " "similar cuts, silhouettes, and visual themes identified by the HLLM retriever " "and ranked by the LightGBM re-ranker." ) else: fallback = "Showing popular dresses. Purchase a few items for personalized style matches." # Try Nemotron explanation (graceful fallback if Ollama unreachable) llm_generated = False if HAS_NEMOTRON_CLIENT and recent_titles: hist_for_llm = [{'title': item_lookup.get(h['item_id'], {}).get('title', ''), 'price': item_lookup.get(h['item_id'], {}).get('price')} for h in history[-5:]] t = time.time() explanation, llm_generated = get_explanation_or_fallback(hist_for_llm, recs, fallback) lat_explainer_ms = (time.time() - t) * 1000 else: explanation = fallback gpu = gpu_stats() return { 'recommendations': recs, 'explanation': explanation, 'llm_generated': llm_generated, 'method': method, 'latency_ms': round((time.time() - t0) * 1000), 'latency_retriever_ms': round(lat_retriever_ms, 1), 'latency_reranker_ms': round(lat_reranker_ms, 1), 'latency_explainer_ms': round(lat_explainer_ms), 'reranker_active': USE_RERANKER, 'gpu': gpu, } @app.get("/", response_class=HTMLResponse) def index(): return PAGE PAGE = r"""