mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-26 11:53:53 +00:00
chore: Regenerate all playbooks
This commit is contained in:
parent
e51dae47ec
commit
ba13d4dcc2
@ -134,8 +134,8 @@ python Llama3_3B_full_finetuning.py
|
|||||||
## LoRA fine-tuning on Llama 3.1 8B
|
## LoRA fine-tuning on Llama 3.1 8B
|
||||||
python Llama3_8B_LoRA_finetuning.py
|
python Llama3_8B_LoRA_finetuning.py
|
||||||
|
|
||||||
## LoRA fine-tuning on Llama 3.1 70B
|
## qLoRA fine-tuning on Llama 3.1 70B
|
||||||
python Llama3_70B_LoRA_finetuning.py
|
python Llama3_70B_qLoRA_finetuning.py
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Common Command-Line Arguments
|
#### Common Command-Line Arguments
|
||||||
@ -158,7 +158,7 @@ All scripts support the following command-line arguments for customization:
|
|||||||
- `--lora_rank`: LoRA rank - higher values = more trainable parameters (default: `8`)
|
- `--lora_rank`: LoRA rank - higher values = more trainable parameters (default: `8`)
|
||||||
|
|
||||||
##### Dataset Configuration
|
##### Dataset Configuration
|
||||||
- `--dataset_size`: Number of samples to use from the Alpaca dataset (default: `500`)
|
- `--dataset_size`: Number of samples to use from the Alpaca dataset (default: `512`)
|
||||||
|
|
||||||
##### Logging Configuration
|
##### Logging Configuration
|
||||||
- `--logging_steps`: Log metrics every N steps (default: `1`)
|
- `--logging_steps`: Log metrics every N steps (default: `1`)
|
||||||
@ -167,13 +167,6 @@ All scripts support the following command-line arguments for customization:
|
|||||||
##### Model Saving
|
##### Model Saving
|
||||||
- `--output_dir`: Directory to save the fine-tuned model (default: `None` - model not saved)
|
- `--output_dir`: Directory to save the fine-tuned model (default: `None` - model not saved)
|
||||||
|
|
||||||
##### Performance Optimization
|
|
||||||
- `--use_torch_compile`: Enable `torch.compile()` for faster training (flag)
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> **Important:** The `--use_torch_compile` flag is **not compatible with QLoRA** (`Llama3_70B_qLoRA_finetuning.py`).
|
|
||||||
> Only use this flag with full fine-tuning and standard LoRA scripts.
|
|
||||||
|
|
||||||
#### Usage Examples
|
#### Usage Examples
|
||||||
```bash
|
```bash
|
||||||
python Llama3_8B_LoRA_finetuning.py \
|
python Llama3_8B_LoRA_finetuning.py \
|
||||||
|
|||||||
@ -31,7 +31,7 @@ ALPACA_PROMPT_TEMPLATE = """Below is an instruction that describes a task, paire
|
|||||||
|
|
||||||
### Response: {}"""
|
### Response: {}"""
|
||||||
|
|
||||||
def get_alpaca_dataset(eos_token, dataset_size=500):
|
def get_alpaca_dataset(eos_token, dataset_size=512):
|
||||||
# Preprocess the dataset
|
# Preprocess the dataset
|
||||||
def preprocess(x):
|
def preprocess(x):
|
||||||
texts = [
|
texts = [
|
||||||
@ -69,7 +69,7 @@ def main(args):
|
|||||||
# Configure the SFT config
|
# Configure the SFT config
|
||||||
config = {
|
config = {
|
||||||
"per_device_train_batch_size": args.batch_size,
|
"per_device_train_batch_size": args.batch_size,
|
||||||
"num_train_epochs": 0.01, # Warmup epoch
|
"num_train_epochs": 0.05, # Warmup epoch
|
||||||
"gradient_accumulation_steps": args.gradient_accumulation_steps,
|
"gradient_accumulation_steps": args.gradient_accumulation_steps,
|
||||||
"learning_rate": args.learning_rate,
|
"learning_rate": args.learning_rate,
|
||||||
"optim": "adamw_torch",
|
"optim": "adamw_torch",
|
||||||
@ -79,15 +79,13 @@ def main(args):
|
|||||||
"dataset_text_field": "text",
|
"dataset_text_field": "text",
|
||||||
"packing": False,
|
"packing": False,
|
||||||
"max_length": args.seq_length,
|
"max_length": args.seq_length,
|
||||||
"torch_compile": False,
|
|
||||||
"report_to": "none",
|
"report_to": "none",
|
||||||
"logging_dir": args.log_dir,
|
"logging_dir": args.log_dir,
|
||||||
"logging_steps": args.logging_steps,
|
"logging_steps": args.logging_steps,
|
||||||
"gradient_checkpointing": args.gradient_checkpointing, # Save memory
|
"gradient_checkpointing": args.gradient_checkpointing, # Save memory
|
||||||
}
|
}
|
||||||
|
|
||||||
# Compile model if requested
|
# Compile model for faster training
|
||||||
if args.use_torch_compile:
|
|
||||||
print("Compiling model with torch.compile()...")
|
print("Compiling model with torch.compile()...")
|
||||||
model = torch.compile(model)
|
model = torch.compile(model)
|
||||||
|
|
||||||
@ -124,13 +122,6 @@ def main(args):
|
|||||||
print(f"Train loss: {trainer_stats.metrics['train_loss']:.4f}")
|
print(f"Train loss: {trainer_stats.metrics['train_loss']:.4f}")
|
||||||
print(f"{'='*60}\n")
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
# Save model if requested
|
|
||||||
if args.output_dir:
|
|
||||||
print(f"Saving model to {args.output_dir}...")
|
|
||||||
trainer.save_model(args.output_dir)
|
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
|
||||||
print("Model saved successfully!")
|
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser(description="Llama 3.2 3B Full Fine-tuning (SFT)")
|
parser = argparse.ArgumentParser(description="Llama 3.2 3B Full Fine-tuning (SFT)")
|
||||||
@ -157,7 +148,7 @@ def parse_arguments():
|
|||||||
help="Enable gradient checkpointing to save memory")
|
help="Enable gradient checkpointing to save memory")
|
||||||
|
|
||||||
# Dataset configuration
|
# Dataset configuration
|
||||||
parser.add_argument("--dataset_size", type=int, default=500,
|
parser.add_argument("--dataset_size", type=int, default=512,
|
||||||
help="Number of samples to use from dataset")
|
help="Number of samples to use from dataset")
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
@ -166,12 +157,6 @@ def parse_arguments():
|
|||||||
parser.add_argument("--log_dir", type=str, default="logs",
|
parser.add_argument("--log_dir", type=str, default="logs",
|
||||||
help="Directory for logs")
|
help="Directory for logs")
|
||||||
|
|
||||||
# Compilation and saving
|
|
||||||
parser.add_argument("--use_torch_compile", action="store_true",
|
|
||||||
help="Use torch.compile() for faster training")
|
|
||||||
parser.add_argument("--output_dir", type=str, default=None,
|
|
||||||
help="Directory to save the fine-tuned model")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -190,7 +175,6 @@ if __name__ == "__main__":
|
|||||||
print(f"Learning rate: {args.learning_rate}")
|
print(f"Learning rate: {args.learning_rate}")
|
||||||
print(f"Dataset size: {args.dataset_size}")
|
print(f"Dataset size: {args.dataset_size}")
|
||||||
print(f"Gradient checkpointing: {args.gradient_checkpointing}")
|
print(f"Gradient checkpointing: {args.gradient_checkpointing}")
|
||||||
print(f"Torch compile: {args.use_torch_compile}")
|
|
||||||
print(f"{'='*60}\n")
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
main(args)
|
main(args)
|
||||||
|
|||||||
@ -32,7 +32,7 @@ ALPACA_PROMPT_TEMPLATE = """Below is an instruction that describes a task, paire
|
|||||||
|
|
||||||
### Response: {}"""
|
### Response: {}"""
|
||||||
|
|
||||||
def get_alpaca_dataset(eos_token, dataset_size=500):
|
def get_alpaca_dataset(eos_token, dataset_size=512):
|
||||||
# Preprocess the dataset
|
# Preprocess the dataset
|
||||||
def preprocess(x):
|
def preprocess(x):
|
||||||
texts = [
|
texts = [
|
||||||
@ -67,15 +67,14 @@ def main(args):
|
|||||||
args.model_name,
|
args.model_name,
|
||||||
quantization_config=quantization_config,
|
quantization_config=quantization_config,
|
||||||
dtype=args.dtype,
|
dtype=args.dtype,
|
||||||
device_map=device_map_config,
|
device_map="cuda",
|
||||||
trust_remote_code=True
|
|
||||||
)
|
)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.model_name, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
|
||||||
# Prepare model for QLoRA training
|
# Prepare model for QLoRA training
|
||||||
print(f"Preparing model for QLoRA (4-bit) with rank {args.lora_rank}...")
|
print(f"Preparing model for QLoRA (4-bit) with rank {args.lora_rank}...")
|
||||||
# model = prepare_model_for_kbit_training(model)
|
model = prepare_model_for_kbit_training(model)
|
||||||
|
|
||||||
peft_config = LoraConfig(
|
peft_config = LoraConfig(
|
||||||
r=args.lora_rank,
|
r=args.lora_rank,
|
||||||
@ -96,7 +95,7 @@ def main(args):
|
|||||||
# Configure the SFT config
|
# Configure the SFT config
|
||||||
config = {
|
config = {
|
||||||
"per_device_train_batch_size": args.batch_size,
|
"per_device_train_batch_size": args.batch_size,
|
||||||
"num_train_epochs": 0.01, # Warmup epoch
|
"num_train_epochs": args.num_epochs,
|
||||||
"gradient_accumulation_steps": args.gradient_accumulation_steps,
|
"gradient_accumulation_steps": args.gradient_accumulation_steps,
|
||||||
"learning_rate": args.learning_rate,
|
"learning_rate": args.learning_rate,
|
||||||
"optim": "adamw_torch",
|
"optim": "adamw_torch",
|
||||||
@ -106,30 +105,14 @@ def main(args):
|
|||||||
"dataset_text_field": "text",
|
"dataset_text_field": "text",
|
||||||
"packing": False,
|
"packing": False,
|
||||||
"max_length": args.seq_length,
|
"max_length": args.seq_length,
|
||||||
"torch_compile": False,
|
|
||||||
"report_to": "none",
|
"report_to": "none",
|
||||||
"logging_dir": args.log_dir,
|
"logging_dir": args.log_dir,
|
||||||
"logging_steps": args.logging_steps,
|
"logging_steps": args.logging_steps,
|
||||||
"gradient_checkpointing": args.gradient_checkpointing
|
"gradient_checkpointing": args.gradient_checkpointing
|
||||||
}
|
}
|
||||||
|
|
||||||
# Compile model if requested
|
|
||||||
if args.use_torch_compile:
|
|
||||||
print("Compiling model with torch.compile()...")
|
|
||||||
model = torch.compile(model)
|
|
||||||
|
|
||||||
# Warmup for torch compile
|
|
||||||
print("Running warmup for torch.compile()...")
|
|
||||||
SFTTrainer(
|
|
||||||
model=model,
|
|
||||||
processing_class=tokenizer,
|
|
||||||
train_dataset=dataset,
|
|
||||||
args=SFTConfig(**config),
|
|
||||||
).train()
|
|
||||||
|
|
||||||
# Train the model
|
# Train the model
|
||||||
print(f"\nStarting QLoRA fine-tuning for {args.num_epochs} epoch(s)...")
|
print(f"\nStarting QLoRA fine-tuning for {args.num_epochs} epoch(s)...")
|
||||||
config["num_train_epochs"] = args.num_epochs
|
|
||||||
config["report_to"] = "tensorboard"
|
config["report_to"] = "tensorboard"
|
||||||
|
|
||||||
trainer = SFTTrainer(
|
trainer = SFTTrainer(
|
||||||
@ -164,7 +147,7 @@ def parse_arguments():
|
|||||||
parser = argparse.ArgumentParser(description="Llama 3.1 70B Fine-tuning with QLoRA")
|
parser = argparse.ArgumentParser(description="Llama 3.1 70B Fine-tuning with QLoRA")
|
||||||
|
|
||||||
# Model configuration
|
# Model configuration
|
||||||
parser.add_argument("--model_name", type=str, default="meta-llama/Llama-3.1-70B-Instruct",
|
parser.add_argument("--model_name", type=str, default="unsloth/Meta-Llama-3.1-70B-bnb-4bit",
|
||||||
help="Model name or path")
|
help="Model name or path")
|
||||||
parser.add_argument("--dtype", type=str, default="bfloat16",
|
parser.add_argument("--dtype", type=str, default="bfloat16",
|
||||||
help="Model dtype (e.g., float32, float16, bfloat16)")
|
help="Model dtype (e.g., float32, float16, bfloat16)")
|
||||||
@ -190,7 +173,7 @@ def parse_arguments():
|
|||||||
help="LoRA rank")
|
help="LoRA rank")
|
||||||
|
|
||||||
# Dataset configuration
|
# Dataset configuration
|
||||||
parser.add_argument("--dataset_size", type=int, default=500,
|
parser.add_argument("--dataset_size", type=int, default=512,
|
||||||
help="Number of samples to use from dataset")
|
help="Number of samples to use from dataset")
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
@ -199,12 +182,6 @@ def parse_arguments():
|
|||||||
parser.add_argument("--log_dir", type=str, default="logs",
|
parser.add_argument("--log_dir", type=str, default="logs",
|
||||||
help="Directory for logs")
|
help="Directory for logs")
|
||||||
|
|
||||||
# Compilation and saving
|
|
||||||
parser.add_argument("--use_torch_compile", action="store_true",
|
|
||||||
help="Use torch.compile() for faster training")
|
|
||||||
parser.add_argument("--output_dir", type=str, default=None,
|
|
||||||
help="Directory to save the fine-tuned model")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -224,7 +201,6 @@ if __name__ == "__main__":
|
|||||||
print(f"LoRA rank: {args.lora_rank}")
|
print(f"LoRA rank: {args.lora_rank}")
|
||||||
print(f"Dataset size: {args.dataset_size}")
|
print(f"Dataset size: {args.dataset_size}")
|
||||||
print(f"Gradient checkpointing: {args.gradient_checkpointing}")
|
print(f"Gradient checkpointing: {args.gradient_checkpointing}")
|
||||||
print(f"Torch compile: {args.use_torch_compile}")
|
|
||||||
print(f"{'='*60}\n")
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
main(args)
|
main(args)
|
||||||
|
|||||||
@ -31,7 +31,7 @@ ALPACA_PROMPT_TEMPLATE = """Below is an instruction that describes a task, paire
|
|||||||
|
|
||||||
### Response: {}"""
|
### Response: {}"""
|
||||||
|
|
||||||
def get_alpaca_dataset(eos_token, dataset_size=500):
|
def get_alpaca_dataset(eos_token, dataset_size=512):
|
||||||
# Preprocess the dataset
|
# Preprocess the dataset
|
||||||
def preprocess(x):
|
def preprocess(x):
|
||||||
texts = [
|
texts = [
|
||||||
@ -83,14 +83,12 @@ def main(args):
|
|||||||
"dataset_text_field": "text",
|
"dataset_text_field": "text",
|
||||||
"packing": False,
|
"packing": False,
|
||||||
"max_length": args.seq_length,
|
"max_length": args.seq_length,
|
||||||
"torch_compile": False,
|
|
||||||
"report_to": "none",
|
"report_to": "none",
|
||||||
"logging_dir": args.log_dir,
|
"logging_dir": args.log_dir,
|
||||||
"logging_steps": args.logging_steps
|
"logging_steps": args.logging_steps
|
||||||
}
|
}
|
||||||
|
|
||||||
# Compile model if requested
|
# Compile model for faster training
|
||||||
if args.use_torch_compile:
|
|
||||||
print("Compiling model with torch.compile()...")
|
print("Compiling model with torch.compile()...")
|
||||||
model = torch.compile(model)
|
model = torch.compile(model)
|
||||||
|
|
||||||
@ -138,7 +136,7 @@ def parse_arguments():
|
|||||||
help="Model dtype")
|
help="Model dtype")
|
||||||
|
|
||||||
# Training configuration
|
# Training configuration
|
||||||
parser.add_argument("--batch_size", type=int, default=4,
|
parser.add_argument("--batch_size", type=int, default=8,
|
||||||
help="Per device training batch size")
|
help="Per device training batch size")
|
||||||
parser.add_argument("--seq_length", type=int, default=2048,
|
parser.add_argument("--seq_length", type=int, default=2048,
|
||||||
help="Maximum sequence length")
|
help="Maximum sequence length")
|
||||||
@ -154,7 +152,7 @@ def parse_arguments():
|
|||||||
help="LoRA rank")
|
help="LoRA rank")
|
||||||
|
|
||||||
# Dataset configuration
|
# Dataset configuration
|
||||||
parser.add_argument("--dataset_size", type=int, default=500,
|
parser.add_argument("--dataset_size", type=int, default=512,
|
||||||
help="Number of samples to use from dataset")
|
help="Number of samples to use from dataset")
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
@ -162,9 +160,6 @@ def parse_arguments():
|
|||||||
help="Log every N steps")
|
help="Log every N steps")
|
||||||
parser.add_argument("--log_dir", type=str, default="logs",
|
parser.add_argument("--log_dir", type=str, default="logs",
|
||||||
help="Directory for logs")
|
help="Directory for logs")
|
||||||
# Compilation
|
|
||||||
parser.add_argument("--use_torch_compile", action="store_true",
|
|
||||||
help="Use torch.compile() for faster training")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@ -181,7 +176,6 @@ if __name__ == "__main__":
|
|||||||
print(f"Learning rate: {args.learning_rate}")
|
print(f"Learning rate: {args.learning_rate}")
|
||||||
print(f"LoRA rank: {args.lora_rank}")
|
print(f"LoRA rank: {args.lora_rank}")
|
||||||
print(f"Dataset size: {args.dataset_size}")
|
print(f"Dataset size: {args.dataset_size}")
|
||||||
print(f"Torch compile: {args.use_torch_compile}")
|
|
||||||
print(f"{'='*60}\n")
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
main(args)
|
main(args)
|
||||||
Loading…
Reference in New Issue
Block a user