2025-10-04 21:21:42 +00:00
|
|
|
data:
|
|
|
|
|
dataset_id: data
|
|
|
|
|
hyperparameters:
|
2025-10-06 12:57:08 +00:00
|
|
|
batch_size: 4
|
2025-10-04 21:21:42 +00:00
|
|
|
correctness_reward: 5.0
|
|
|
|
|
enable_grpo: true
|
|
|
|
|
format_reward: 2.0
|
|
|
|
|
learning_rate: 1.0e-05
|
2025-10-06 12:57:08 +00:00
|
|
|
num_generations: 2
|
2025-10-04 21:21:42 +00:00
|
|
|
optimizer: adamw_torch
|
|
|
|
|
output_dir: saved_model
|
2025-10-06 12:57:08 +00:00
|
|
|
steps: 5
|
2025-10-04 21:21:42 +00:00
|
|
|
model:
|
|
|
|
|
finetune_attention_modules: true
|
|
|
|
|
finetune_language_layers: true
|
|
|
|
|
finetune_mlp_modules: true
|
|
|
|
|
finetune_vision_layers: true
|
|
|
|
|
lora_config:
|
|
|
|
|
alpha: 64
|
|
|
|
|
dropout: 0.05
|
2025-10-06 12:57:08 +00:00
|
|
|
rank: 32
|
2025-10-04 21:21:42 +00:00
|
|
|
max_seq_length: 16384
|
|
|
|
|
model_id: unsloth/Qwen2.5-VL-7B-Instruct
|
|
|
|
|
use_lora: true
|