思维链数据
--- title: "思维链数据" description: "详细介绍思维链(Chain-of-Thought)推理数据的构建方法、标注技术和应用策略" tags: ["思维链", "CoT数据", "链式推理", "推理数据"] category: "llm" icon: "🧠"
思维链数据
思维链概述
思维链(Chain-of-Thought, CoT)是一种让大语言模型展示推理过程的技术。通过在训练数据中包含详细的推理步骤,模型可以学会分步解决问题,而不是直接给出答案。CoT数据对于提升模型的推理能力、可解释性和准确性至关重要。
CoT的核心思想是:
- 分步推理:将复杂问题分解为简单步骤
- 过程可见:展示中间推理过程
- 错误可追溯:便于发现和纠正推理错误
CoT数据格式
基本格式结构
# CoT数据的标准格式
cot_sample = {
"instruction": "计算 23 + 45 的结果",
"input": "",
"output": "让我一步步计算:\n\n1. 首先计算个位数:3 + 5 = 8\n2. 然后计算十位数:2 + 4 = 6\n3. 组合结果:68\n\n所以,23 + 45 = 68",
"chain_of_thought": [
{"step": 1, "reasoning": "分解数字为个位和十位", "result": "23=20+3, 45=40+5"},
{"step": 2, "reasoning": "计算个位数之和", "result": "3+5=8"},
{"step": 3, "reasoning": "计算十位数之和", "result": "20+40=60"},
{"step": 4, "reasoning": "组合结果", "result": "60+8=68"}
],
"final_answer": "68",
"difficulty": "easy",
"domain": "math"
}
多种CoT变体
cot_variants = {
"standard": {
"description": "标准CoT,展示完整推理过程",
"template": "问题:{question}\n\n让我一步步思考:\n{reasoning_steps}\n\n答案:{answer}"
},
"self_consistency": {
"description": "多路径CoT,展示多种推理方式",
"template": "问题:{question}\n\n方法一:{path1}\n方法二:{path2}\n综合分析:{synthesis}\n答案:{answer}"
},
"tree_of_thought": {
"description": "树状CoT,展示分支推理",
"template": "问题:{question}\n\n思考分支1:{branch1}\n思考分支2:{branch2}\n最佳路径:{best_path}\n答案:{answer}"
},
"zero_shot_cot": {
"description": "零样本CoT,直接要求推理",
"template": "问题:{question}\n\n让我们一步步思考。\n\n答案:{answer}"
}
}
CoT数据生成
自动生成CoT数据
class CoTDataGenerator:
"""CoT数据自动生成器"""
def __init__(self, llm_client):
self.llm = llm_client
def generate_cot(self, question, answer):
"""为给定问题-答案对生成CoT推理过程"""
prompt = f"""请为以下问题生成详细的思维链推理过程:
问题:{question}
答案:{answer}
要求:
1. 将推理过程分解为清晰的步骤
2. 每个步骤都要有明确的推理依据
3. 展示完整的思考过程
请按照以下格式输出:
第1步:...
第2步:...
...
最终答案:{answer}
"""
response = self.llm.generate(prompt)
return self.parse_cot_response(response)
def parse_cot_response(self, response):
"""解析CoT响应"""
lines = response.strip().split('\n')
steps = []
final_answer = ""
for line in lines:
if line.startswith("第") and "步" in line:
# 解析步骤
step_num = int(line.split("步")[0][1:])
step_content = line.split(":", 1)[-1] if ":" in line else ""
steps.append({"step": step_num, "content": step_content})
elif line.startswith("最终答案"):
final_answer = line.split(":", 1)[-1] if ":" in line else ""
return {
"steps": steps,
"final_answer": final_answer,
"raw_response": response
}
多样性生成
class DiverseCoTGenerator:
"""多样性CoT生成器"""
def __init__(self, llm_client):
self.llm = llm_client
self.generation_strategies = [
self.generate_standard_cot,
self.generate_analogy_cot,
self.generate_step_by_step_cot,
self.generate_contradiction_cot
]
def generate_standard_cot(self, question, answer):
"""标准CoT生成"""
prompt = f"请为以下问题生成标准的思维链推理:\n\n问题:{question}\n答案:{answer}\n\n推理过程:"
return self.llm.generate(prompt)
def generate_analogy_cot(self, question, answer):
"""类比CoT生成"""
prompt = f"请使用类比的方法为以下问题生成推理过程:\n\n问题:{question}\n答案:{answer}\n\n类比推理:"
return self.llm.generate(prompt)
def generate_step_by_step_cot(self, question, answer):
"""分步CoT生成"""
prompt = f"请将以下问题分解为最小的推理步骤:\n\n问题:{question}\n答案:{answer}\n\n最小步骤推理:"
return self.llm.generate(prompt)
def generate_contradiction_cot(self, question, answer):
"""反证CoT生成"""
prompt = f"请使用反证法为以下问题生成推理过程:\n\n问题:{question}\n答案:{answer}\n\n反证推理:"
return self.llm.generate(prompt)
def generate_diverse_cot(self, question, answer, num_variations=3):
"""生成多样化的CoT"""
variations = []
for strategy in self.generation_strategies[:num_variations]:
cot = strategy(question, answer)
variations.append(cot)
return variations
CoT数据标注
标注指南
cot_annotation_guide = {
"步骤完整性": {
"要求": "推理过程必须包含所有必要步骤",
"评分标准": {
"5分": "所有步骤完整,无遗漏",
"4分": "基本完整,有轻微遗漏",
"3分": "部分步骤缺失",
"2分": "多个步骤缺失",
"1分": "推理过程严重不完整"
}
},
"逻辑正确性": {
"要求": "每个推理步骤都必须逻辑正确",
"评分标准": {
"5分": "所有步骤逻辑正确",
"4分": "基本正确,有轻微错误",
"3分": "有明显逻辑错误",
"2分": "多个步骤有错误",
"1分": "推理逻辑严重错误"
}
},
"可读性": {
"要求": "推理过程必须清晰易懂",
"评分标准": {
"5分": "表达清晰,易于理解",
"4分": "基本清晰",
"3分": "有些地方不够清晰",
"2分": "多处表达不清",
"1分": "难以理解"
}
}
}
质量评估
class CoTQualityEvaluator:
"""CoT质量评估器"""
def __init__(self):
self.evaluation_criteria = [
self.evaluate_completeness,
self.evaluate_correctness,
self.evaluate_readability,
self.evaluate_efficiency
]
def evaluate_completeness(self, cot):
"""评估步骤完整性"""
steps = cot.get("steps", [])
if not steps:
return 0.0
# 检查是否有中间步骤
has_intermediate = any("因为" in step.get("content", "") or
"所以" in step.get("content", "") or
"因此" in step.get("content", "")
for step in steps)
# 检查步骤数量是否合理
step_count_score = min(1.0, len(steps) / 5)
return (0.5 * has_intermediate + 0.5 * step_count_score)
def evaluate_correctness(self, cot, reference_answer):
"""评估逻辑正确性"""
final_answer = cot.get("final_answer", "")
# 检查最终答案是否正确
answer_correct = self.check_answer_correctness(final_answer, reference_answer)
# 检查推理过程是否有明显错误
steps = cot.get("steps", [])
has_errors = any("错误" in step.get("content", "") or
"不对" in step.get("content", "")
for step in steps)
if has_errors:
return 0.3
return 1.0 if answer_correct else 0.5
def evaluate_readability(self, cot):
"""评估可读性"""
steps = cot.get("steps", [])
if not steps:
return 0.0
# 检查步骤长度是否适中
avg_length = sum(len(step.get("content", "")) for step in steps) / len(steps)
length_score = 1.0 if 10 < avg_length < 100 else 0.5
# 检查是否有清晰的标记
has_markers = any(":" in step.get("content", "") for step in steps)
marker_score = 0.8 if has_markers else 0.5
return (length_score + marker_score) / 2
def evaluate_efficiency(self, cot):
"""评估推理效率"""
steps = cot.get("steps", [])
if not steps:
return 0.0
# 检查是否有冗余步骤
unique_steps = set()
for step in steps:
content = step.get("content", "")
# 简单的去重检查
if content not in unique_steps:
unique_steps.add(content)
efficiency = len(unique_steps) / len(steps)
return efficiency
def check_answer_correctness(self, predicted, reference):
"""检查答案正确性"""
# 简单的答案匹配
return predicted.strip() == reference.strip()
CoT数据应用
训练配置
# CoT训练配置
cot_training_config = {
"data_format": {
"input_field": "instruction",
"output_field": "chain_of_thought",
"answer_field": "final_answer"
},
"training_strategy": {
"method": "sft", # 监督微调
"loss_function": "cross_entropy",
"learning_rate": 2e-5,
"epochs": 3
},
"evaluation": {
"metrics": ["accuracy", "reasoning_quality", "step_completeness"],
"test_set_size": 0.1
}
}
CoT推理模板
class CoTInferenceTemplate:
"""CoT推理模板"""
def __init__(self, model_client):
self.model = model_client
def zero_shot_cot(self, question):
"""零样本CoT推理"""
prompt = f"""{question}
让我们一步步思考。"""
return self.model.generate(prompt)
def few_shot_cot(self, question, examples):
"""少样本CoT推理"""
example_text = "\n\n".join([
f"问题:{ex['question']}\n思考过程:{ex['reasoning']}\n答案:{ex['answer']}"
for ex in examples
])
prompt = f"""以下是几个示例:
{example_text}
现在请回答以下问题:
问题:{question}
思考过程:"""
return self.model.generate(prompt)
def self_consistency_cot(self, question, num_samples=5):
"""自一致性CoT推理"""
responses = []
for _ in range(num_samples):
prompt = f"""{question}
让我们用一种新的方式思考。"""
response = self.model.generate(prompt, temperature=0.7)
responses.append(response)
# 选择最一致的答案
return self.select_most_consistent(responses)
def select_most_consistent(self, responses):
"""选择最一致的答案"""
from collections import Counter
# 提取最终答案
answers = []
for response in responses:
# 简单的答案提取
if "答案:" in response:
answer = response.split("答案:")[-1].strip()
answers.append(answer)
# 选择最常见的答案
if answers:
most_common = Counter(answers).most_common(1)[0][0]
return most_common
return responses[0]
CoT数据质量保证
质量检查流程
class CoTQualityPipeline:
"""CoT质量检查流程"""
def __init__(self):
self.checks = [
self.check_format,
self.check_completeness,
self.check_consistency,
self.check_safety
]
def validate_cot(self, cot_sample):
"""验证CoT样本"""
results = {}
for check in self.checks:
check_name = check.__name__
results[check_name] = check(cot_sample)
# 计算验证分数
validation_score = sum(results.values()) / len(results)
return {
"results": results,
"validation_score": validation_score,
"passed": validation_score > 0.8
}
def check_format(self, sample):
"""检查格式"""
required_fields = ["instruction", "chain_of_thought", "final_answer"]
return all(field in sample and sample[field] for field in required_fields)
def check_completeness(self, sample):
"""检查完整性"""
cot = sample.get("chain_of_thought", [])
return len(cot) >= 2 # 至少2个步骤
def check_consistency(self, sample):
"""检查一致性"""
cot = sample.get("chain_of_thought", [])
final_answer = sample.get("final_answer", "")
# 检查最终答案是否与推理一致
# 简单检查:最终答案应该在推理中出现
cot_text = " ".join([step.get("content", "") for step in cot])
return final_answer in cot_text or any(
part in cot_text for part in final_answer.split()
)
def check_safety(self, sample):
"""检查安全性"""
cot = sample.get("chain_of_thought", [])
# 检查是否有有害内容
unsafe_patterns = ["暴力", "歧视", "色情", "违法"]
for step in cot:
content = step.get("content", "")
for pattern in unsafe_patterns:
if pattern in content:
return False
return True
实践案例
# CoT数据生成配置
cot_config = {
"generation": {
"method": "auto", # auto或manual
"diversity_strategies": ["standard", "analogy", "step_by_step"],
"quality_threshold": 0.8
},
"annotation": {
"annotators": 3,
"agreement_threshold": 0.7,
"quality_criteria": ["completeness", "correctness", "readability"]
},
"training": {
"format": "instruction_cot",
"loss_weighting": "equal",
"evaluation_metrics": ["accuracy", "reasoning_quality"]
}
}
总结
CoT数据是提升LLM推理能力的关键。通过自动生成、人工标注和质量控制,可以构建高质量的CoT数据集。CoT数据的应用需要配合合适的训练策略和推理模板,才能充分发挥其价值。