dgx-spark-playbooks/nvidia/station-rec-sys/assets/setup.sh

504 lines
20 KiB
Bash
Raw Normal View History

2026-05-26 18:25:53 +00:00
#!/usr/bin/env bash
# setup.sh — Automated setup for the standalone recommender playbook.
#
# Installs all dependencies, downloads models and data, and prepares the
# environment to run the E2E HLLM recommendation pipeline.
# Idempotent: safe to re-run if interrupted — skips completed steps.
#
# USAGE EXAMPLES:
#
# # Default setup — stores data/models under $HOME
# bash setup.sh
#
# # Preview what would be done without executing
# bash setup.sh --dry-run
#
# # Verify prerequisites only
# bash setup.sh --check
#
# # Store large files on a different mount (SSD, NVMe, RAID array, etc.)
# bash setup.sh --workspace /raid/recsys-playbook
# bash setup.sh --workspace /mnt/nvme/recsys-playbook
# bash setup.sh --workspace /data/recsys-playbook
# bash setup.sh --data-dir /raid/recsys-playbook
#
# # Use environment variable instead of CLI flag
# export PLAYBOOK_WORKSPACE=/raid
# bash setup.sh
#
# # Show all options
# bash setup.sh --help
#
# WHAT IT INSTALLS under $PLAYBOOK_WORKSPACE (~80 GB total):
# - uv (Python project manager) — ~/.local/bin/
# - Ollama (LLM serving) — /usr/local/bin/ (requires sudo)
# - Nemotron Mini 4B (Ollama) — ~/.ollama/models/ (~3 GB)
# - Python deps (uv venv) — this repo's .venv/
# - HLLM code (ByteDance) — $WORKSPACE/hllm-code/ (~1 GB)
# - TinyLlama-1.1B model — $WORKSPACE/models/ (~2 GB, HF cache stays at default)
# - Amazon Clothing dataset — $WORKSPACE/data/ (~46 GB)
# - Training checkpoints — $WORKSPACE/checkpoints/ (~2030 GB after training)
#
# REQUIRES: NVIDIA GPU with drivers installed, internet access.
# SUDO: Required only for Ollama install (can be pre-installed by an admin).
set -euo pipefail
DRY_RUN=false
CHECK_ONLY=false
WORKSPACE="${PLAYBOOK_WORKSPACE:-$HOME}"
for arg in "$@"; do
case "$arg" in
--dry-run) DRY_RUN=true ;;
--check) CHECK_ONLY=true ;;
--workspace=*) WORKSPACE="${arg#*=}" ;;
--data-dir=*) WORKSPACE="${arg#*=}" ;;
--workspace) ;; # handled below with next arg
--data-dir) ;; # handled below with next arg
-h|--help)
cat <<'HELPEOF'
Usage: bash setup.sh [OPTIONS]
Options:
(no flags) Run full setup
--dry-run Print commands without executing
--check Validate prerequisites only
--workspace PATH Root directory for all playbook artifacts
(default: $HOME, or $PLAYBOOK_WORKSPACE if set)
Example: bash setup.sh --workspace /raid/recsys-playbook
--data-dir PATH Alias for --workspace
-h, --help Show this help
Environment:
PLAYBOOK_WORKSPACE Same as --workspace (CLI flag takes precedence)
What gets created under WORKSPACE (~80 GB total):
WORKSPACE/
├── station-rec-sys/ The playbook repo + uv venv
├── hllm-code/ ByteDance HLLM (~1 GB)
├── data/ Amazon dataset — raw + processed (~46 GB)
├── models/ TinyLlama-1.1B (~2 GB)
└── checkpoints/ HLLM training output (~2030 GB after training)
HuggingFace and Ollama caches stay at their default locations (~/.cache/huggingface,
~/.ollama) so they can be shared with other projects on the machine.
HELPEOF
exit 0
;;
*)
if [[ "${prev_arg:-}" == "--workspace" ]]; then
WORKSPACE="$arg"
elif [[ "${prev_arg:-}" == "--data-dir" ]]; then
WORKSPACE="$arg"
else
echo "Unknown option: $arg (run --help for usage)"
exit 1
fi
;;
esac
prev_arg="$arg"
done
WORKSPACE="${WORKSPACE%/}" # strip trailing slash if any
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
HLLM_CODE_DIR="$WORKSPACE/hllm-code"
DATA_DIR="$WORKSPACE/data"
MODELS_DIR="$WORKSPACE/models"
CHECKPOINTS_DIR="$WORKSPACE/checkpoints"
if $CHECK_ONLY; then
echo "Checking pre-requisites..."
echo ""
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
RED='\033[0;31m'
NC='\033[0m'
ok=0
warn=0
fail=0
if command -v nvidia-smi &>/dev/null; then
gpu_name=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
gpu_mem_mib=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -1)
gpu_mem_gb=$(( gpu_mem_mib / 1024 ))
echo -e " ${GREEN}[OK]${NC} GPU: $gpu_name (${gpu_mem_gb} GB)"
ok=$((ok + 1))
else
echo -e " ${RED}[FAIL]${NC} nvidia-smi not found — NVIDIA driver not installed"
fail=$((fail + 1))
fi
if command -v nvcc &>/dev/null; then
cuda_ver=$(nvcc --version | grep "release" | sed 's/.*release //' | sed 's/,.*//')
echo -e " ${GREEN}[OK]${NC} CUDA: $cuda_ver"
ok=$((ok + 1))
else
echo -e " ${YELLOW}[WARN]${NC} nvcc not found — CUDA toolkit not on PATH (driver CUDA may still work)"
warn=$((warn + 1))
fi
avail_gb=$(df -BG "$WORKSPACE" 2>/dev/null | tail -1 | awk '{print $4}' | tr -d 'G')
if [ "${avail_gb:-0}" -ge 80 ]; then
echo -e " ${GREEN}[OK]${NC} Disk: ${avail_gb} GB available at $WORKSPACE (need ~80 GB)"
ok=$((ok + 1))
else
echo -e " ${YELLOW}[WARN]${NC} Disk: ${avail_gb:-?} GB available at $WORKSPACE (need ~80 GB)"
echo " Set PLAYBOOK_WORKSPACE to a path with more space, or use --workspace"
warn=$((warn + 1))
fi
if command -v sudo &>/dev/null && sudo -n true 2>/dev/null; then
echo -e " ${GREEN}[OK]${NC} sudo: available (needed for Ollama install)"
ok=$((ok + 1))
else
echo -e " ${YELLOW}[WARN]${NC} sudo: not available or requires password"
echo " Ollama install needs sudo — ask an admin to pre-install if needed"
warn=$((warn + 1))
fi
for tool in git wget curl; do
if command -v "$tool" &>/dev/null; then
echo -e " ${GREEN}[OK]${NC} $tool: $(command -v "$tool")"
ok=$((ok + 1))
else
echo -e " ${RED}[FAIL]${NC} $tool: not found"
fail=$((fail + 1))
fi
done
echo ""
echo -e "Result: ${GREEN}$ok passed${NC}, ${YELLOW}$warn warnings${NC}, ${RED}$fail failed${NC}"
if [ "$fail" -gt 0 ]; then
echo "Fix failures above before running setup."
elif [ "$warn" -gt 0 ]; then
echo "Review warnings above before running setup."
else
echo "All checks passed. Ready to run: bash assets/setup.sh"
fi
exit 0
fi
run() {
echo " -> $*"
$DRY_RUN || "$@"
}
section() {
echo ""
echo "============================================================"
echo " $1"
echo "============================================================"
}
echo "Workspace: $WORKSPACE"
echo "Repo: $REPO_DIR"
# ---------------------------------------------------------------
section "Step 1: System tools (uv, Ollama)"
# ---------------------------------------------------------------
if ! command -v uv &>/dev/null; then
echo "Installing uv..."
run bash -c 'curl -LsSf https://astral.sh/uv/install.sh | sh'
export PATH="$HOME/.local/bin:$PATH"
else
echo "uv already installed: $(uv --version)"
fi
if ! command -v ollama &>/dev/null; then
echo "Installing Ollama..."
run bash -c 'curl -fsSL https://ollama.com/install.sh | sh'
else
echo "Ollama already installed: $(ollama --version)"
fi
# ---------------------------------------------------------------
section "Step 2: Python environment (uv)"
# ---------------------------------------------------------------
cd "$REPO_DIR"
if [ -d ".venv" ]; then
echo "Project venv already exists. Syncing dependencies..."
else
echo "Creating project venv and installing all dependencies..."
fi
run uv sync --inexact
echo ""
echo "Verifying key packages..."
CUDA_VISIBLE_DEVICES=0 uv run python -c "import torch; print(f' PyTorch {torch.__version__}, CUDA: {torch.version.cuda}, GPU available: {torch.cuda.is_available()}')" || echo " WARNING: PyTorch import failed"
uv run python -c "import faiss; print(f' FAISS installed')" || echo " WARNING: faiss not found"
uv run python -c "import transformers; print(f' transformers {transformers.__version__}')" || echo " WARNING: transformers import failed"
CUDA_VISIBLE_DEVICES=0 uv run python -c "import deepspeed; print(f' deepspeed {deepspeed.__version__}')" || echo " WARNING: deepspeed import failed"
uv run python -c "import peft; print(f' peft {peft.__version__}')" || echo " WARNING: peft import failed"
echo ""
echo "Optional W&B login..."
if uv run wandb status 2>/dev/null | grep -q "Logged in"; then
echo " W&B already logged in."
elif [ -t 0 ]; then
read -r -p " Log in to Weights & Biases now? [y/N] " wandb_confirm
if [[ "$wandb_confirm" =~ ^[Yy]$ ]]; then
run uv run wandb login
else
echo " Skipping W&B login. Run 'uv run wandb login' later to enable training dashboards."
fi
else
echo " Non-interactive shell; skipping W&B login prompt."
echo " Run 'uv run wandb login' later to enable training dashboards."
fi
# ---------------------------------------------------------------
section "Step 3: flash-attn (source build for target GPU)"
# ---------------------------------------------------------------
# flash-attn must be built from source to target specific GPU architectures.
# Pre-built wheels only cover older CUDA versions.
# Note: uv sync --inexact (Step 2) preserves the editable install, so
# re-runs skip this step entirely once flash-attn is built.
FLASH_ATTN_REPO="$HOME/dev/flash-attention"
FLASH_ATTN_PY="$REPO_DIR/.venv/bin/python"
FLASH_ATTN_EDITABLE_WHEEL="${FLASH_ATTN_EDITABLE_WHEEL:-}"
if uv run python -c "import flash_attn" 2>/dev/null; then
echo "flash-attn already installed: $(uv run python -c 'import flash_attn; print(flash_attn.__version__)')"
else
if [ -f "$FLASH_ATTN_REPO/flash_attn_2_cuda.cpython-313-aarch64-linux-gnu.so" ]; then
echo "Found existing flash-attn source build at $FLASH_ATTN_REPO"
if [ -z "$FLASH_ATTN_EDITABLE_WHEEL" ]; then
FLASH_ATTN_EDITABLE_WHEEL=$(find "$HOME/.cache/uv/sdists-v9/editable" \
-path "*/flash_attn-2.8.4-0.editable-cp313-cp313-linux_aarch64.whl" \
-print -quit 2>/dev/null || true)
fi
if [ -n "$FLASH_ATTN_EDITABLE_WHEEL" ] && [ -f "$FLASH_ATTN_EDITABLE_WHEEL" ]; then
echo " Installing editable flash-attn wheel without rebuilding:"
echo " $FLASH_ATTN_EDITABLE_WHEEL"
run uv pip install --python "$FLASH_ATTN_PY" "$FLASH_ATTN_EDITABLE_WHEEL"
else
echo " Existing compiled extension found, but no editable wheel was found in uv cache."
fi
fi
if uv run python -c "import flash_attn, flash_attn_2_cuda; print(f' flash-attn {flash_attn.__version__} from {flash_attn.__file__}')" 2>/dev/null; then
echo " Reused existing flash-attn source build."
else
# Full source compilation required (~20 min first time)
# Detect CUDA toolkit — prefer 13.1, fall back to whatever /usr/local/cuda points to
if [ -d "/usr/local/cuda-13.1" ]; then
export CUDA_HOME="/usr/local/cuda-13.1"
elif [ -d "/usr/local/cuda" ]; then
export CUDA_HOME="/usr/local/cuda"
fi
echo " CUDA_HOME: ${CUDA_HOME:-not set}"
# Target GPU architectures — auto-detect from nvidia-smi so we only compile
# kernels for GPUs actually present. Two env vars matter:
# - TORCH_CUDA_ARCH_LIST: dot-notation (e.g. "10.3"), used by torch's C++
# extension builder. Accepts the raw compute_cap string.
# - FLASH_ATTN_CUDA_ARCHS: flash-attn's own var (setup.py:72). Semicolon-
# separated family integers (80;90;100;110;120). Ignores
# TORCH_CUDA_ARCH_LIST entirely, so it must be set separately.
# Mapping: "10.3" -> major=10 -> "100" (the sm_100f family, forward-
# compatible with sm_101/102/103).
detected_cc=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | sort -u | tr '\n' ';' | sed 's/;$//')
if [ -z "${TORCH_CUDA_ARCH_LIST:-}" ]; then
export TORCH_CUDA_ARCH_LIST="${detected_cc:-10.3}"
fi
if [ -z "${FLASH_ATTN_CUDA_ARCHS:-}" ]; then
fa_archs=$(echo "$detected_cc" | tr ';' '\n' | awk -F. '{print $1*10}' | sort -u | tr '\n' ';' | sed 's/;$//')
export FLASH_ATTN_CUDA_ARCHS="${fa_archs:-100}"
fi
echo " Detected compute_cap: ${detected_cc:-unknown}"
echo " TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST (for torch extensions)"
echo " FLASH_ATTN_CUDA_ARCHS: $FLASH_ATTN_CUDA_ARCHS (flash-attn family targets)"
if [ -d "$FLASH_ATTN_REPO" ]; then
echo " Building flash-attn from local repo: $FLASH_ATTN_REPO"
echo " First-time build compiles CUDA kernels from source using all CPU cores."
echo " This may take up to 30 minutes. Subsequent runs are near-instant."
cd "$FLASH_ATTN_REPO"
run git pull
else
echo " Cloning flash-attention repo and building from source..."
echo " First-time build compiles CUDA kernels from source using all CPU cores."
echo " This may take up to 30 minutes. Subsequent runs are near-instant."
run mkdir -p "$HOME/dev"
run git clone https://github.com/Dao-AILab/flash-attention.git "$FLASH_ATTN_REPO"
cd "$FLASH_ATTN_REPO"
fi
# Install into the project venv (--python points uv to the right env)
run uv pip install --python "$FLASH_ATTN_PY" -e . --no-build-isolation
cd "$REPO_DIR"
# Verify
if uv run python -c "import flash_attn; print(f' flash-attn {flash_attn.__version__}')" 2>/dev/null; then
echo " flash-attn installed successfully."
else
echo " WARNING: flash-attn build failed. Training may still work without it (slower attention)."
fi
fi
fi
# ---------------------------------------------------------------
section "Step 4: Clone and patch HLLM"
# ---------------------------------------------------------------
if [ ! -d "$HLLM_CODE_DIR/REC" ]; then
echo "Cloning HLLM from github.com/bytedance/HLLM..."
run mkdir -p "$WORKSPACE"
UPSTREAM_TMP="$WORKSPACE/.hllm-upstream"
run git clone https://github.com/bytedance/HLLM.git "$UPSTREAM_TMP"
run cp -r "$UPSTREAM_TMP/code" "$HLLM_CODE_DIR"
run rm -rf "$UPSTREAM_TMP"
else
echo "HLLM code already present at $HLLM_CODE_DIR/"
fi
# Apply LoRA patches (overwrites upstream files with patched versions)
PATCHES_DIR="$REPO_DIR/assets/patches/HLLM"
if [ -d "$PATCHES_DIR" ]; then
echo " Applying LoRA patches from $PATCHES_DIR ..."
run cp "$PATCHES_DIR/hllm.py" "$HLLM_CODE_DIR/REC/model/HLLM/hllm.py"
run cp "$PATCHES_DIR/modeling_bert.py" "$HLLM_CODE_DIR/REC/model/HLLM/modeling_bert.py"
run cp "$PATCHES_DIR/trainer.py" "$HLLM_CODE_DIR/REC/trainer/trainer.py"
run cp "$PATCHES_DIR/wandblogger.py" "$HLLM_CODE_DIR/REC/utils/wandblogger.py"
run cp "$PATCHES_DIR/argument_list.py" "$HLLM_CODE_DIR/REC/utils/argument_list.py"
run cp "$PATCHES_DIR/utils.py" "$HLLM_CODE_DIR/REC/data/utils.py"
run cp "$PATCHES_DIR/dataload.py" "$HLLM_CODE_DIR/REC/data/dataload.py"
echo " LoRA patches applied."
fi
# Dataset + information dirs live alongside the HLLM code (HLLM config reads from them)
run mkdir -p "$HLLM_CODE_DIR/dataset" "$HLLM_CODE_DIR/information" "$CHECKPOINTS_DIR"
# Copy training/extraction scripts from the repo into hllm-code/ for HLLM to find
if [ -f "$REPO_DIR/assets/train_retriever.sh" ]; then
run cp "$REPO_DIR/assets/train_retriever.sh" "$HLLM_CODE_DIR/train_lora.sh"
run chmod +x "$HLLM_CODE_DIR/train_lora.sh"
fi
if [ -f "$REPO_DIR/assets/extract_embeddings.py" ]; then
run cp "$REPO_DIR/assets/extract_embeddings.py" "$HLLM_CODE_DIR/extract_embeddings.py"
fi
# Create extract_embeddings.sh wrapper inside hllm-code/
cat > "$HLLM_CODE_DIR/extract_embeddings.sh" << SCRIPT
#!/bin/bash
set -e
cd "$HLLM_CODE_DIR"
unset MPLBACKEND
CKPT_PATH="\${1:-$CHECKPOINTS_DIR/dresses_lora_r16/HLLM-0.pth/checkpoint/mp_rank_00_model_states.pt}"
OUTPUT_DIR="\${2:-$DATA_DIR/processed}"
MASTER_PORT="\${PLAYBOOK_EXTRACT_MASTER_PORT:-12399}"
[ "\$#" -ge 1 ] && shift
[ "\$#" -ge 1 ] && shift
exec uv run --project "$REPO_DIR" torchrun --master_port="\$MASTER_PORT" --nproc_per_node=1 --nnodes=1 extract_embeddings.py \\
--ckpt_path "\$CKPT_PATH" \\
--output_dir "\$OUTPUT_DIR" \\
"\$@"
SCRIPT
chmod +x "$HLLM_CODE_DIR/extract_embeddings.sh"
# ---------------------------------------------------------------
section "Step 5: Download models"
# ---------------------------------------------------------------
# TinyLlama-1.1B (backbone for HLLM retriever training)
# If the download fails with 401/403, log in first: uv run hf login
if [ -f "$MODELS_DIR/TinyLlama-1.1B/config.json" ]; then
echo "TinyLlama-1.1B already present at $MODELS_DIR/TinyLlama-1.1B/"
else
echo "Downloading TinyLlama-1.1B from HuggingFace (~2 GB)..."
echo " If this fails with a 401/403 error, run: uv run hf login"
run mkdir -p "$MODELS_DIR"
run uv run hf download \
TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
--local-dir "$MODELS_DIR/TinyLlama-1.1B"
fi
# Nemotron Mini via Ollama
if ollama list 2>/dev/null | grep -q "nemotron-mini"; then
echo "nemotron-mini already pulled in Ollama."
else
echo "Pulling nemotron-mini (2.7 GB)..."
run ollama pull nemotron-mini
fi
# ---------------------------------------------------------------
section "Step 6: Download and process Amazon data"
# ---------------------------------------------------------------
REVIEWS_FILE="$DATA_DIR/raw/raw/review_categories/Clothing_Shoes_and_Jewelry.jsonl"
META_FILE="$DATA_DIR/raw/raw/meta_categories/meta_Clothing_Shoes_and_Jewelry.jsonl"
run mkdir -p "$DATA_DIR/raw/raw/review_categories" "$DATA_DIR/raw/raw/meta_categories" "$DATA_DIR/processed"
# Dataset moved from datarepo.eng.ucsd.edu to HuggingFace (plain JSONL, not gzipped)
HF_DATASET_BASE="https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/resolve/main"
if [ -f "$REVIEWS_FILE" ]; then
echo "Reviews data already present ($(du -h "$REVIEWS_FILE" | cut -f1))"
else
echo "Downloading Amazon Reviews — Clothing, Shoes & Jewelry (~27.8 GB)..."
run wget -c \
"$HF_DATASET_BASE/raw/review_categories/Clothing_Shoes_and_Jewelry.jsonl" \
-O "$REVIEWS_FILE"
fi
if [ -f "$META_FILE" ]; then
echo "Metadata already present ($(du -h "$META_FILE" | cut -f1))"
else
echo "Downloading Amazon Metadata — Clothing, Shoes & Jewelry (~18 GB)..."
run wget -c \
"$HF_DATASET_BASE/raw/meta_categories/meta_Clothing_Shoes_and_Jewelry.jsonl" \
-O "$META_FILE"
fi
# Process into HLLM format
if [ -f "$DATA_DIR/processed/dress_metadata.parquet" ] && [ -f "$HLLM_CODE_DIR/dataset/amazon_dresses.csv" ]; then
echo "Processed data already exists. Skipping processing."
else
echo "Processing Amazon data into HLLM format..."
cd "$REPO_DIR"
run uv run python assets/prepare_data.py
fi
# ---------------------------------------------------------------
section "Step 7: Start Ollama"
# ---------------------------------------------------------------
if curl -s http://localhost:11434/api/tags &>/dev/null; then
echo "Ollama is already running."
ollama list
else
echo "Starting Ollama..."
run ollama serve &
sleep 3
if curl -s http://localhost:11434/api/tags &>/dev/null; then
echo "Ollama started successfully."
ollama list
else
echo "WARNING: Ollama may not have started. Run 'ollama serve' manually."
fi
fi
# ---------------------------------------------------------------
section "Setup complete!"
# ---------------------------------------------------------------
echo ""
echo "Workspace: $WORKSPACE"
echo "Repo: $REPO_DIR"
echo ""
echo "Next steps:"
echo " cd $REPO_DIR"
echo " bash assets/train_retriever.sh"
echo " uv run python assets/extract_embeddings.py"
echo ""
echo "Expected runtime: ~20 min (with training at bs=512) or ~5 min (with pre-computed embeddings)"
echo ""