指令微调:让模型学会听从指令
--- title: "指令微调:让模型学会听从指令" description: "掌握指令微调的数据构建、训练策略和评估方法,提升模型的指令遵循能力" tags: ["指令微调", "Instruction Tuning", "监督学习", "任务泛化"] category: "llm" icon: "🧠"
指令微调:让模型学会听从指令
指令微调简介
指令微调(Instruction Tuning)是一种监督微调方法,通过在(指令, 响应)对上训练模型,使其学会遵循各种自然语言指令。指令微调是将预训练模型转变为有用助手的关键步骤。
指令微调的核心价值:
- 泛化能力:学会处理未见过的指令类型
- 格式遵循:输出符合预期格式的响应
- 安全对齐:拒绝有害请求,提供安全回答
- 任务适应:在保持通用能力的同时适应特定任务
数据格式
Alpaca格式
[
{
"instruction": "将以下文本翻译成英文",
"input": "今天天气很好",
"output": "The weather is nice today."
},
{
"instruction": "总结以下文章的主要观点",
"input": "",
"output": "文章主要讨论了..."
}
]
ShareGPT格式
[
{
"conversations": [
{"from": "human", "value": "什么是机器学习?"},
{"from": "gpt", "value": "机器学习是人工智能的一个分支..."},
{"from": "human", "value": "它有哪些应用?"},
{"from": "gpt", "value": "机器学习广泛应用于..."}
]
}
]
多轮对话格式
{
"messages": [
{"role": "system", "content": "你是一个有帮助的助手"},
{"role": "user", "content": "帮我写一首诗"},
{"role": "assistant", "content": "春风拂面柳絮飞..."},
{"role": "user", "content": "再写一首关于夏天的"},
{"role": "assistant", "content": "夏日炎炎蝉鸣声..."}
]
}
数据构建
使用GPT-4生成指令数据
from openai import OpenAI
client = OpenAI()
def generate_instruction_data(topic, num_samples=10):
"""使用GPT-4生成指令数据"""
prompt = f"""为以下主题生成{num_samples}个指令-响应对:
主题:{topic}
输出JSON格式,每个包含instruction, input, output字段。
确保指令多样化,包括:
- 解释概念
- 分析问题
- 提供示例
- 解决方案
"""
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
return eval(response.choices[0].message.content)
# 生成数据
data = generate_instruction_data("Python编程", 50)
自我指令生成
def self_instruct(seed_tasks, num_instructions=100):
"""自我指令生成"""
generated = []
for _ in range(num_instructions):
# 从种子任务中采样
sampled = random.sample(seed_tasks, 3)
prompt = f"""基于以下示例,生成一个新的指令-响应对:
示例1:{sampled[0]}
示例2:{sampled[1]}
示例3:{sampled[2]}
生成新的指令和响应:"""
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
generated.append(parse_response(response.choices[0].message.content))
return generated
训练实现
数据预处理
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
def format_instruction(sample):
"""格式化指令数据"""
if sample.get("input"):
text = f"""### Instruction:
{sample['instruction']}
### Input:
{sample['input']}
### Response:
{sample['output']}"""
else:
text = f"""### Instruction:
{sample['instruction']}
### Response:
{sample['output']}"""
return text
def preprocess_function(examples):
"""预处理训练数据"""
texts = [format_instruction(x) for x in examples]
tokenized = tokenizer(
texts,
truncation=True,
max_length=2048,
padding="max_length"
)
# 只在response部分计算loss
tokenized["labels"] = tokenized["input_ids"].copy()
return tokenized
训练配置
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
# LoRA配置
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
# 训练参数
training_args = TrainingArguments(
output_dir="./instruction_tuning",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
weight_decay=0.01,
warmup_steps=100,
logging_steps=10,
save_steps=500,
fp16=True,
optim="paged_adamw_8bit"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset,
tokenizer=tokenizer
)
trainer.train()
数据质量策略
多样性保证
def ensure_diversity(data, min_categories=10):
"""确保指令多样性"""
categories = {}
for item in data:
category = item.get("category", "general")
if category not in categories:
categories[category] = []
categories[category].append(item)
# 平衡采样
balanced_data = []
samples_per_category = len(data) // len(categories)
for cat, items in categories.items():
balanced_data.extend(random.sample(items, min(samples_per_category, len(items))))
return balanced_data
质量评估
def evaluate_instruction_quality(data):
"""评估指令质量"""
scores = []
for item in data:
score = 0
# 指令长度
if 10 < len(item["instruction"]) < 200:
score += 1
# 响应长度
if 50 < len(item["output"]) < 1000:
score += 1
# 响应是否包含具体信息
if any(keyword in item["output"] for keyword in ["例如", "比如", "具体"]):
score += 1
# 指令是否明确
if "?" in item["instruction"] or "请" in item["instruction"]:
score += 1
scores.append(score)
return sum(scores) / len(scores)
评估方法
def evaluate_model(model, tokenizer, test_data):
"""评估模型指令遵循能力"""
results = []
for item in test_data:
prompt = f"""### Instruction:
{item['instruction']}
### Response:
"""
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
results.append({
"instruction": item["instruction"],
"expected": item["output"],
"generated": response
})
return results
最佳实践
- 数据质量优先:高质量的10K数据优于低质量的100K数据
- 指令多样性:覆盖不同任务类型和难度级别
- 格式一致性:统一指令格式,避免模型混淆
- 渐进式训练:可以先用简单指令预热,再用复杂指令训练
- 持续评估:定期评估模型在各类指令上的表现
指令微调是将通用预训练模型转变为有用助手的关键步骤。