1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
| import os from unsloth import FastLanguageModel import torch from trl import SFTTrainer from transformers import TrainingArguments from datasets import load_dataset
max_seq_length = 2048 dtype = None load_in_4bit = True model, tokenizer = FastLanguageModel.from_pretrained( model_name = "/home/zk/ai/base_model/llama-3-8b-bnb-4bit", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, )
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}"""
EOS_TOKEN = tokenizer.eos_token def formatting_prompts_func(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [] for instruction, input, output in zip(instructions, inputs, outputs): text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN texts.append(text) return {"text": texts} pass
dataset = load_dataset("json", data_files = "/home/zk/ai/dataset/caishui_2011_100hao.json", split="train") dataset = dataset.map(formatting_prompts_func, batched = True)
model = FastLanguageModel.get_peft_model( model, r = 16, target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha = 16, lora_dropout = 0, bias = "none", use_gradient_checkpointing = True, random_state = 3407, max_seq_length = max_seq_length, use_rslora = False, loftq_config = None, )
trainer = SFTTrainer( model = model, train_dataset = dataset, dataset_text_field = "text", max_seq_length = max_seq_length, tokenizer = tokenizer, args = TrainingArguments( per_device_train_batch_size = 1, gradient_accumulation_steps = 4, warmup_steps = 2, max_steps = 20, fp16 = not torch.cuda.is_bf16_supported(), bf16 = torch.cuda.is_bf16_supported(), logging_steps = 1, output_dir = "outputs", optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "linear", seed = 3407, learning_rate = 2e-5, ), )
trainer.train()
model.save_pretrained("lora_model")
save_16bit = input("是否保存为16位hf模型?(y/n): ") if save_16bit.lower() == "y": model.save_pretrained_merged("outputs", tokenizer, save_method="merged_16bit")
save_gguf = input("是否保存为gguf模型?(y/n): ") if save_gguf.lower() == "y": os.system("python /home/zk/ai/llama.cpp/convert_hf_to_gguf.py --outfile /home/zk/ai/gguf_model/lm38b_tax_jzjt.gguf /home/zk/ai/outputs")
quantize_4bit = input("是否量化为4位gguf模型?(y/n): ") if quantize_4bit.lower() == "y": os.system("/home/zk/ai/llama.cpp/build/bin/llama-quantize /home/zk/ai/gguf_model/QWQ_tax_jzjt.gguf /home/zk/ai/gguf_model/lm38b_tax_jzjt-Q4_K_M.gguf Q4_K_M")
|