data: dataset_id: data hyperparameters: batch_size: 1 correctness_reward: 5.0 enable_grpo: true epochs: 2 format_reward: 2.0 learning_rate: 1.0e-05 num_generations: 4 optimizer: adamw_torch output_dir: saved_model model: finetune_attention_modules: true finetune_language_layers: true finetune_mlp_modules: true finetune_vision_layers: true lora_config: alpha: 64 dropout: 0.05 rank: 16 max_seq_length: 16384 model_id: unsloth/Qwen2.5-VL-7B-Instruct use_lora: true use_qlora: false