← 返回首页
🧠

指令微调:让模型学会听从指令

📂 llm ⏱ 3 min 446 words

--- title: "指令微调:让模型学会听从指令" description: "掌握指令微调的数据构建、训练策略和评估方法,提升模型的指令遵循能力" tags: ["指令微调", "Instruction Tuning", "监督学习", "任务泛化"] category: "llm" icon: "🧠"

指令微调:让模型学会听从指令

指令微调简介

指令微调(Instruction Tuning)是一种监督微调方法,通过在(指令, 响应)对上训练模型,使其学会遵循各种自然语言指令。指令微调是将预训练模型转变为有用助手的关键步骤。

指令微调的核心价值:

数据格式

Alpaca格式

[
    {
        "instruction": "将以下文本翻译成英文",
        "input": "今天天气很好",
        "output": "The weather is nice today."
    },
    {
        "instruction": "总结以下文章的主要观点",
        "input": "",
        "output": "文章主要讨论了..."
    }
]

ShareGPT格式

[
    {
        "conversations": [
            {"from": "human", "value": "什么是机器学习?"},
            {"from": "gpt", "value": "机器学习是人工智能的一个分支..."},
            {"from": "human", "value": "它有哪些应用?"},
            {"from": "gpt", "value": "机器学习广泛应用于..."}
        ]
    }
]

多轮对话格式

{
    "messages": [
        {"role": "system", "content": "你是一个有帮助的助手"},
        {"role": "user", "content": "帮我写一首诗"},
        {"role": "assistant", "content": "春风拂面柳絮飞..."},
        {"role": "user", "content": "再写一首关于夏天的"},
        {"role": "assistant", "content": "夏日炎炎蝉鸣声..."}
    ]
}

数据构建

使用GPT-4生成指令数据

from openai import OpenAI

client = OpenAI()

def generate_instruction_data(topic, num_samples=10):
    """使用GPT-4生成指令数据"""
    prompt = f"""为以下主题生成{num_samples}个指令-响应对:
    
主题:{topic}

输出JSON格式,每个包含instruction, input, output字段。
确保指令多样化,包括:
- 解释概念
- 分析问题
- 提供示例
- 解决方案
"""
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"}
    )
    
    return eval(response.choices[0].message.content)

# 生成数据
data = generate_instruction_data("Python编程", 50)

自我指令生成

def self_instruct(seed_tasks, num_instructions=100):
    """自我指令生成"""
    generated = []
    
    for _ in range(num_instructions):
        # 从种子任务中采样
        sampled = random.sample(seed_tasks, 3)
        
        prompt = f"""基于以下示例,生成一个新的指令-响应对:

示例1:{sampled[0]}
示例2:{sampled[1]}
示例3:{sampled[2]}

生成新的指令和响应:"""
        
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}]
        )
        
        generated.append(parse_response(response.choices[0].message.content))
    
    return generated

训练实现

数据预处理

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

def format_instruction(sample):
    """格式化指令数据"""
    if sample.get("input"):
        text = f"""### Instruction:
{sample['instruction']}

### Input:
{sample['input']}

### Response:
{sample['output']}"""
    else:
        text = f"""### Instruction:
{sample['instruction']}

### Response:
{sample['output']}"""
    return text

def preprocess_function(examples):
    """预处理训练数据"""
    texts = [format_instruction(x) for x in examples]
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=2048,
        padding="max_length"
    )
    
    # 只在response部分计算loss
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

训练配置

from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# LoRA配置
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# 训练参数
training_args = TrainingArguments(
    output_dir="./instruction_tuning",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_steps=100,
    logging_steps=10,
    save_steps=500,
    fp16=True,
    optim="paged_adamw_8bit"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer
)

trainer.train()

数据质量策略

多样性保证

def ensure_diversity(data, min_categories=10):
    """确保指令多样性"""
    categories = {}
    for item in data:
        category = item.get("category", "general")
        if category not in categories:
            categories[category] = []
        categories[category].append(item)
    
    # 平衡采样
    balanced_data = []
    samples_per_category = len(data) // len(categories)
    
    for cat, items in categories.items():
        balanced_data.extend(random.sample(items, min(samples_per_category, len(items))))
    
    return balanced_data

质量评估

def evaluate_instruction_quality(data):
    """评估指令质量"""
    scores = []
    
    for item in data:
        score = 0
        
        # 指令长度
        if 10 < len(item["instruction"]) < 200:
            score += 1
        
        # 响应长度
        if 50 < len(item["output"]) < 1000:
            score += 1
        
        # 响应是否包含具体信息
        if any(keyword in item["output"] for keyword in ["例如", "比如", "具体"]):
            score += 1
        
        # 指令是否明确
        if "?" in item["instruction"] or "请" in item["instruction"]:
            score += 1
        
        scores.append(score)
    
    return sum(scores) / len(scores)

评估方法

def evaluate_model(model, tokenizer, test_data):
    """评估模型指令遵循能力"""
    results = []
    
    for item in test_data:
        prompt = f"""### Instruction:
{item['instruction']}

### Response:
"""
        
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7
        )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        results.append({
            "instruction": item["instruction"],
            "expected": item["output"],
            "generated": response
        })
    
    return results

最佳实践

  1. 数据质量优先:高质量的10K数据优于低质量的100K数据
  2. 指令多样性:覆盖不同任务类型和难度级别
  3. 格式一致性:统一指令格式,避免模型混淆
  4. 渐进式训练:可以先用简单指令预热,再用复杂指令训练
  5. 持续评估:定期评估模型在各类指令上的表现

指令微调是将通用预训练模型转变为有用助手的关键步骤。