# # SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # inference: model_id: unsloth/Qwen2.5-VL-7B-Instruct finetuned_model_id: saved_model max_seq_length: 8192 train: model: model_id: unsloth/Qwen2.5-VL-7B-Instruct max_seq_length: 16384 use_lora: true lora_config: rank: 32 alpha: 64 dropout: 0.05 finetune_vision_layers: true finetune_language_layers: true finetune_attention_modules: true finetune_mlp_modules: true data: dataset_id: data hyperparameters: steps: 100 batch_size: 4 enable_grpo: true num_generations: 2 format_reward: 2.0 learning_rate: 1e-5 correctness_reward: 5.0 optimizer: adamw_torch output_dir: saved_model