百川大模型:Baichuan系列
--- title: "百川大模型:Baichuan系列" description: "深入了解Baichuan系列模型的特点、架构和中文能力" tags: ["Baichuan", "百川", "中文LLM", "开源模型"] category: "llm" icon: "🧠"
百川大模型:Baichuan系列
Baichuan简介
Baichuan(百川大模型)是百川智能开发的开源大语言模型系列。百川模型以其强大的中文能力、高效的架构设计和优秀的开源生态著称,是国内领先的开源LLM之一。
Baichuan的核心优势:
- 中文优化:深度优化的中文处理能力
- 高效架构:ALiBi位置编码等创新技术
- 多规格选择:提供7B、13B、53B等多种规格
- 开源开放:Apache 2.0许可证
Baichuan架构
ALiBi位置编码
# Baichuan使用ALiBi(Attention with Linear Biases)
# 相比RoPE,ALiBi在长序列泛化上表现更好
def get_alibi_slopes(num_heads):
"""计算ALiBi斜率"""
closest_power_of_2 = 2 ** math.floor(math.log2(num_heads))
base = 2 ** (-(2 ** -(math.log2(closest_power_of_2) - 3)))
powers = torch.arange(1, closest_power_of_2 + 1)
slopes = torch.pow(base, powers)
if closest_power_of_2 != num_heads:
extra_base = 2 ** (-(2 ** -(math.log2(2 * closest_power_of_2) - 3)))
extra_powers = torch.arange(1, 2 * (num_heads - closest_power_of_2) + 1, 2)
slopes = torch.cat([slopes, torch.pow(extra_base, extra_powers)])
return slopes
def alibi_attention(q, k, v, slopes):
"""ALiBi注意力"""
seq_len = q.size(2)
# 计算ALiBi偏置
context_position = torch.arange(seq_len)[:, None]
memory_position = torch.arange(seq_len)[None, :]
relative_position = memory_position - context_position
bias = torch.tril(relative_position).float()
bias = -torch.abs(bias) * slopes.view(-1, 1, 1)
# 注意力计算
attn = torch.matmul(q, k.transpose(-2, -1)) + bias
attn = F.softmax(attn, dim=-1)
return torch.matmul(attn, v)
架构配置
# Baichuan-13B配置
baichuan_13b_config = {
"vocab_size": 64000,
"hidden_size": 5120,
"intermediate_size": 13696,
"num_hidden_layers": 40,
"num_attention_heads": 40,
"num_key_value_heads": 40,
"hidden_act": "silu",
"max_position_embeddings": 4096,
"rms_norm_eps": 1e-6,
"bos_token_id": 1,
"eos_token_id": 2,
"apply_residual_connection_post_layernorm": False,
"tie_word_embeddings": False
}
Baichuan版本
# Baichuan版本演进
versions = {
"Baichuan-7B": {
"参数": "7B",
"上下文": "4K",
"特点": "首个开源版本"
},
"Baichuan-13B": {
"参数": "13B",
"上下文": "4K",
"特点": "更大模型"
},
"Baichuan2-7B": {
"参数": "7B",
"上下文": "4K",
"特点": "ALiBi,GQA"
},
"Baichuan2-13B": {
"参数": "13B",
"上下文": "4K",
"特点": "旗舰版本"
},
"Baichuan3": {
"参数": "多规格",
"上下文": "32K",
"特点": "新一代架构"
},
"Baichuan4": {
"参数": "多规格",
"上下文": "32K",
"特点": "最新版本"
}
}
使用Baichuan
基本推理
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# 加载Baichuan2
model_name = "baichuan-inc/Baichuan2-13B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
# 推理
messages = [
{"role": "user", "content": "什么是机器学习?"}
]
response = model.chat(tokenizer, messages, max_new_tokens=512)
print(response)
流式输出
# 流式生成
for response in model.chat_stream(
tokenizer,
"写一个Python函数",
history=[]
):
print(response, end="", flush=True)
print()
vLLM部署
from vllm import LLM, SamplingParams
# 部署Baichuan2
llm = LLM(
model="baichuan-inc/Baichuan2-13B-Chat",
max_model_len=4096,
trust_remote_code=True,
gpu_memory_utilization=0.9
)
sampling_params = SamplingParams(temperature=0.7, max_tokens=512)
outputs = llm.generate(["你好,请介绍一下自己"], sampling_params)
print(outputs[0].outputs[0].text)
微调Baichuan
LoRA微调
from peft import LoraConfig, get_peft_model
# LoRA配置
lora_config = LoraConfig(
r=64,
lora_alpha=16,
target_modules=["W_pack"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
# 加载模型
model = AutoModelForCausalLM.from_pretrained(
"baichuan-inc/Baichuan2-13B-Chat",
torch_dtype=torch.float16
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
使用SWIFT微调
# SWIFT是ModelScope的微调框架
from swift.llm import (
sft_main, TrainArguments, get_model_tokenizer, get_template
)
# 配置
args = TrainArguments(
model='baichuan-inc/Baichuan2-13B-Chat',
dataset='sft_data.jsonl',
output_dir='output/baichuan2-13b',
num_train_epochs=3,
per_device_train_batch_size=4,
learning_rate=1e-4,
fp16=True
)
# 启动训练
sft_main(args)
性能评估
# Baichuan2性能
performance = {
"Baichuan2-13B": {
"C-Eval": "59.2",
"CMMLU": "59.0",
"MMLU": "59.2",
"HumanEval": "17.1",
"优势": "中文对话流畅"
}
}
最佳实践
- 选择版本:13B版本性能更好
- 使用Chat版本:经过对话优化
- 量化部署:INT4/INT8量化降低成本
- 利用ALiBi:长序列处理更好
- 使用SWIFT:官方微调框架
Baichuan凭借其优秀的中文能力和开源生态,成为国内LLM的重要选择。