← 返回首页
🧠

百川大模型:Baichuan系列

📂 llm ⏱ 2 min 381 words

--- title: "百川大模型:Baichuan系列" description: "深入了解Baichuan系列模型的特点、架构和中文能力" tags: ["Baichuan", "百川", "中文LLM", "开源模型"] category: "llm" icon: "🧠"

百川大模型:Baichuan系列

Baichuan简介

Baichuan(百川大模型)是百川智能开发的开源大语言模型系列。百川模型以其强大的中文能力、高效的架构设计和优秀的开源生态著称,是国内领先的开源LLM之一。

Baichuan的核心优势:

Baichuan架构

ALiBi位置编码

# Baichuan使用ALiBi(Attention with Linear Biases)
# 相比RoPE,ALiBi在长序列泛化上表现更好

def get_alibi_slopes(num_heads):
    """计算ALiBi斜率"""
    closest_power_of_2 = 2 ** math.floor(math.log2(num_heads))
    base = 2 ** (-(2 ** -(math.log2(closest_power_of_2) - 3)))
    powers = torch.arange(1, closest_power_of_2 + 1)
    slopes = torch.pow(base, powers)
    
    if closest_power_of_2 != num_heads:
        extra_base = 2 ** (-(2 ** -(math.log2(2 * closest_power_of_2) - 3)))
        extra_powers = torch.arange(1, 2 * (num_heads - closest_power_of_2) + 1, 2)
        slopes = torch.cat([slopes, torch.pow(extra_base, extra_powers)])
    
    return slopes

def alibi_attention(q, k, v, slopes):
    """ALiBi注意力"""
    seq_len = q.size(2)
    
    # 计算ALiBi偏置
    context_position = torch.arange(seq_len)[:, None]
    memory_position = torch.arange(seq_len)[None, :]
    relative_position = memory_position - context_position
    
    bias = torch.tril(relative_position).float()
    bias = -torch.abs(bias) * slopes.view(-1, 1, 1)
    
    # 注意力计算
    attn = torch.matmul(q, k.transpose(-2, -1)) + bias
    attn = F.softmax(attn, dim=-1)
    
    return torch.matmul(attn, v)

架构配置

# Baichuan-13B配置
baichuan_13b_config = {
    "vocab_size": 64000,
    "hidden_size": 5120,
    "intermediate_size": 13696,
    "num_hidden_layers": 40,
    "num_attention_heads": 40,
    "num_key_value_heads": 40,
    "hidden_act": "silu",
    "max_position_embeddings": 4096,
    "rms_norm_eps": 1e-6,
    "bos_token_id": 1,
    "eos_token_id": 2,
    "apply_residual_connection_post_layernorm": False,
    "tie_word_embeddings": False
}

Baichuan版本

# Baichuan版本演进
versions = {
    "Baichuan-7B": {
        "参数": "7B",
        "上下文": "4K",
        "特点": "首个开源版本"
    },
    "Baichuan-13B": {
        "参数": "13B",
        "上下文": "4K",
        "特点": "更大模型"
    },
    "Baichuan2-7B": {
        "参数": "7B",
        "上下文": "4K",
        "特点": "ALiBi,GQA"
    },
    "Baichuan2-13B": {
        "参数": "13B",
        "上下文": "4K",
        "特点": "旗舰版本"
    },
    "Baichuan3": {
        "参数": "多规格",
        "上下文": "32K",
        "特点": "新一代架构"
    },
    "Baichuan4": {
        "参数": "多规格",
        "上下文": "32K",
        "特点": "最新版本"
    }
}

使用Baichuan

基本推理

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 加载Baichuan2
model_name = "baichuan-inc/Baichuan2-13B-Chat"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 推理
messages = [
    {"role": "user", "content": "什么是机器学习?"}
]

response = model.chat(tokenizer, messages, max_new_tokens=512)
print(response)

流式输出

# 流式生成
for response in model.chat_stream(
    tokenizer,
    "写一个Python函数",
    history=[]
):
    print(response, end="", flush=True)
print()

vLLM部署

from vllm import LLM, SamplingParams

# 部署Baichuan2
llm = LLM(
    model="baichuan-inc/Baichuan2-13B-Chat",
    max_model_len=4096,
    trust_remote_code=True,
    gpu_memory_utilization=0.9
)

sampling_params = SamplingParams(temperature=0.7, max_tokens=512)
outputs = llm.generate(["你好,请介绍一下自己"], sampling_params)
print(outputs[0].outputs[0].text)

微调Baichuan

LoRA微调

from peft import LoraConfig, get_peft_model

# LoRA配置
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["W_pack"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    "baichuan-inc/Baichuan2-13B-Chat",
    torch_dtype=torch.float16
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

使用SWIFT微调

# SWIFT是ModelScope的微调框架
from swift.llm import (
    sft_main, TrainArguments, get_model_tokenizer, get_template
)

# 配置
args = TrainArguments(
    model='baichuan-inc/Baichuan2-13B-Chat',
    dataset='sft_data.jsonl',
    output_dir='output/baichuan2-13b',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    learning_rate=1e-4,
    fp16=True
)

# 启动训练
sft_main(args)

性能评估

# Baichuan2性能
performance = {
    "Baichuan2-13B": {
        "C-Eval": "59.2",
        "CMMLU": "59.0",
        "MMLU": "59.2",
        "HumanEval": "17.1",
        "优势": "中文对话流畅"
    }
}

最佳实践

  1. 选择版本:13B版本性能更好
  2. 使用Chat版本:经过对话优化
  3. 量化部署:INT4/INT8量化降低成本
  4. 利用ALiBi:长序列处理更好
  5. 使用SWIFT:官方微调框架

Baichuan凭借其优秀的中文能力和开源生态,成为国内LLM的重要选择。