LoRA适配器合并
--- title: "LoRA适配器合并" description: "掌握LoRA适配器的合并技术,包括单适配器合并、多适配器融合和权重调优" tags: ["LoRA合并", "适配器融合", "权重合并", "模型部署"] category: "llm" icon: "🧠"
LoRA适配器合并
LoRA合并简介
LoRA合并是将训练好的LoRA适配器权重与基础模型合并的过程。通过合并,可以将多个专业能力集成到一个模型中,同时消除推理时的额外计算开销。LoRA合并是模型部署和定制化的重要步骤。
LoRA合并的优势:
- 推理加速:消除适配器的额外前向传播开销
- 多能力集成:将多个LoRA适配器的能力融合
- 简化部署:只需部署一个合并后的模型
- 灵活调优:通过权重调整控制各适配器的贡献
基础合并
单适配器合并
from peft import PeftModel
from transformers import AutoModelForCausalLM
def merge_single_lora(base_model_path, lora_path, output_path):
"""合并单个LoRA适配器"""
# 加载基础模型
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
torch_dtype=torch.float16
)
# 加载LoRA适配器
model = PeftModel.from_pretrained(base_model, lora_path)
# 合并权重
merged_model = model.merge_and_unload()
# 保存合并后的模型
merged_model.save_pretrained(output_path)
return merged_model
# 使用
merged_model = merge_single_lora(
base_model_path="meta-llama/Llama-2-7b-hf",
lora_path="./math_lora",
output_path="./merged_math_model"
)
使用PEFT库合并
from peft import AutoPeftModelForCausalLM
# 自动加载并合并
model = AutoPeftModelForCausalLM.from_pretrained(
"./lora_adapter",
torch_dtype=torch.float16,
merge_weights=True # 自动合并
)
# 保存合并后的模型
model.save_pretrained("./merged_model")
多适配器合并
顺序合并
def sequential_merge(base_model_path, lora_paths, output_path):
"""顺序合并多个LoRA适配器"""
# 加载基础模型
model = AutoModelForCausalLM.from_pretrained(base_model_path)
# 依次加载和合并
for lora_path in lora_paths:
model = PeftModel.from_pretrained(model, lora_path)
# 最终合并
merged_model = model.merge_and_unload()
merged_model.save_pretrained(output_path)
return merged_model
# 使用
lora_paths = ["./math_lora", "./code_lora", "./writing_lora"]
merged_model = sequential_merge(
"meta-llama/Llama-2-7b-hf",
lora_paths,
"./merged_all_model"
)
加权合并
def weighted_merge(base_model_path, lora_paths, weights, output_path):
"""加权合并多个LoRA适配器"""
from peft import PeftModel
import torch
# 加载基础模型
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
torch_dtype=torch.float16
)
# 加载所有适配器
model = base_model
for i, lora_path in enumerate(lora_paths):
model = PeftModel.from_pretrained(model, lora_path, adapter_name=f"adapter_{i}")
# 合并权重
model.add_weighted_adapter(
adapters=[f"adapter_{i}" for i in range(len(lora_paths))],
weights=weights,
adapter_name="merged"
)
# 切换到合并的适配器
model.set_adapter("merged")
# 导出
merged_model = model.merge_and_unload()
merged_model.save_pretrained(output_path)
return merged_model
# 使用
lora_paths = ["./math_lora", "./code_lora", "./writing_lora"]
weights = [0.4, 0.3, 0.3] # 数学能力更重要
merged_model = weighted_merge(
"meta-llama/Llama-2-7b-hf",
lora_paths,
weights,
"./weighted_merged_model"
)
使用mergekit合并
# mergekit配置文件 (lora_merge.yaml)
"""
models:
- model: base_model
parameters:
weight: 1.0
- model: lora_math
parameters:
weight: 0.5
- model: lora_code
parameters:
weight: 0.3
merge_method: linear
dtype: float16
"""
# 命令行使用
# mergekit-yaml lora_merge.yaml output_model
权重调优
def find_optimal_weights(base_model_path, lora_paths, eval_dataset,
weight_range=(0.1, 1.0), step=0.1):
"""网格搜索最优权重组合"""
from itertools import product
# 生成权重组合
weight_values = [round(i, 2) for i in range(
int(weight_range[0] * 10),
int(weight_range[1] * 10) + 1,
int(step * 10)
)]
best_score = 0
best_weights = None
for weights in product(weight_values, repeat=len(lora_paths)):
# 归一化权重
weights = [w / sum(weights) for w in weights]
# 合并模型
merged_model = weighted_merge(
base_model_path, lora_paths, weights,
f"./temp_model_{'_'.join(map(str, weights))}"
)
# 评估
score = evaluate_model(merged_model, eval_dataset)
if score > best_score:
best_score = score
best_weights = weights
print(f"权重: {weights}, 分数: {score:.4f}")
return best_weights, best_score
部署优化
量化合并
from transformers import BitsAndBytesConfig
import torch
def quantized_merge(base_model_path, lora_paths, weights, output_path):
"""量化合并(减少模型大小)"""
# 量化配置
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
# 加载量化模型
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
quantization_config=bnb_config
)
# 合并LoRA
model = base_model
for i, lora_path in enumerate(lora_paths):
model = PeftModel.from_pretrained(model, lora_path, adapter_name=f"adapter_{i}")
model.add_weighted_adapter(
adapters=[f"adapter_{i}" for i in range(len(lora_paths))],
weights=weights,
adapter_name="merged"
)
# 导出量化模型
merged_model = model.merge_and_unload()
merged_model.save_pretrained(output_path)
return merged_model
GGUF格式导出
# 使用llama.cpp转换
# python convert_hf_to_gguf.py ./merged_model --outfile merged_model.gguf
# 或使用ollama
# ollama create my_model -f Modelfile
评估合并效果
def evaluate_merged_model(base_model_path, lora_paths, weights, test_data):
"""评估合并模型"""
# 合并模型
merged_model = weighted_merge(
base_model_path, lora_paths, weights, "./temp_merged"
)
# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
results = {}
for task_name, task_data in test_data.items():
scores = []
for sample in task_data:
# 生成回答
inputs = tokenizer(sample["prompt"], return_tensors="pt")
outputs = merged_model.generate(**inputs, max_new_tokens=256)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 评估
score = evaluate_response(response, sample["expected"])
scores.append(score)
results[task_name] = np.mean(scores)
return results
常见问题
合并后性能下降
# 原因:权重设置不当
# 解决方案:
# 1. 降低各适配器权重
weights = [0.3, 0.2, 0.2] # 降低权重
# 2. 使用更小的alpha
# 在LoRA配置中设置较小的lora_alpha
# 3. 增加正则化
# 在训练时使用更高的dropout
显存不足
# 解决方案:
# 1. 使用量化加载
bnb_config = BitsAndBytesConfig(load_in_4bit=True)
# 2. 分批合并
def batch_merge(base_model_path, lora_paths, batch_size=2):
"""分批合并"""
model = AutoModelForCausalLM.from_pretrained(base_model_path)
for i in range(0, len(lora_paths), batch_size):
batch = lora_paths[i:i+batch_size]
for lora_path in batch:
model = PeftModel.from_pretrained(model, lora_path)
# 中间保存
model.save_pretrained(f"./temp_merged_{i}")
return model
LoRA合并为模型定制和部署提供了灵活高效的解决方案,是LLM应用的重要技术。