base_model: anon8231489123/vicuna-13b-GPTQ-4bit-128g base_model_config: anon8231489123/vicuna-13b-GPTQ-4bit-128g model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: false load_4bit: true gptq_groupsize: 128 gptq_model_v1: false datasets: # https://github.com/vaguenebula/AlpacaDataReflect/blob/main/alpaca_reflect_pruned.json - path: data/alpaca_reflect_pruned.jsonl type: reflection dataset_prepared_path: data/last_run_prepared val_set_size: 0.04 adapter: lora lora_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: - q_proj - v_proj # - k_proj # - o_proj lora_fan_in_fan_out: false wandb_project: wandb_watch: wandb_run_id: wandb_log_model: checkpoint output_dir: ./lora-reflect batch_size: 8 micro_batch_size: 2 num_epochs: 3 learning_rate: 0.00003 train_on_inputs: false group_by_length: false bf16: true tf32: true gradient_checkpointing: false early_stopping_patience: 3 resume_from_checkpoint: local_rank: flash_attention: true