|
12 | 12 | # MODEL_VERSION="llama-2-7b-chat" |
13 | 13 | ################## LLaMA-2 ################## |
14 | 14 |
|
| 15 | +PROMPT_VERSION="llava_llama_2" |
| 16 | +MODEL_VERSION=mistralai/Mistral-7B-v0.1 |
| 17 | + |
| 18 | + |
| 19 | + |
15 | 20 | deepspeed llava/train/train_mem.py \ |
16 | 21 | --deepspeed ./scripts/zero2.json \ |
17 | 22 | --model_name_or_path ./checkpoints/$MODEL_VERSION \ |
18 | 23 | --version $PROMPT_VERSION \ |
19 | | - --data_path ./playground/data/llava_instruct_80k.json \ |
20 | | - --image_folder /path/to/coco/train2017 \ |
| 24 | + --data_path finetune_data/llava_instruct_150k.json \ |
| 25 | + --image_folder finetune_data/images \ |
21 | 26 | --vision_tower openai/clip-vit-large-patch14 \ |
22 | 27 | --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ |
23 | 28 | --mm_vision_select_layer -2 \ |
24 | 29 | --mm_use_im_start_end False \ |
25 | 30 | --mm_use_im_patch_token False \ |
26 | 31 | --bf16 True \ |
27 | | - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \ |
28 | | - --num_train_epochs 1 \ |
| 32 | + --output_dir ../n6/checkpoints/llava-$MODEL_VERSION-finetune \ |
| 33 | + --num_train_epochs 3 \ |
29 | 34 | --per_device_train_batch_size 16 \ |
30 | 35 | --per_device_eval_batch_size 4 \ |
31 | 36 | --gradient_accumulation_steps 1 \ |
32 | 37 | --evaluation_strategy "no" \ |
33 | 38 | --save_strategy "steps" \ |
34 | | - --save_steps 50000 \ |
| 39 | + --save_steps 5000 \ |
35 | 40 | --save_total_limit 1 \ |
36 | 41 | --learning_rate 2e-5 \ |
37 | | - --weight_decay 0. \ |
| 42 | + --weight_decay 0.01 \ |
38 | 43 | --warmup_ratio 0.03 \ |
39 | 44 | --lr_scheduler_type "cosine" \ |
40 | 45 | --logging_steps 1 \ |
41 | 46 | --tf32 True \ |
42 | | - --model_max_length 2048 \ |
| 47 | + --model_max_length 8000 \ |
43 | 48 | --gradient_checkpointing True \ |
44 | | - --dataloader_num_workers 4 \ |
| 49 | + --dataloader_num_workers 8 \ |
45 | 50 | --lazy_preprocess True \ |
46 | 51 | --report_to wandb |
0 commit comments