← 返回首页
🧠

Phi系列:微软小模型大能力

📂 llm ⏱ 2 min 363 words

--- title: "Phi系列:微软小模型大能力" description: "深入了解Phi系列模型的创新训练方法和小模型性能突破" tags: ["Phi", "微软", "小模型", "高质量数据"] category: "llm" icon: "🧠"

Phi系列:微软小模型大能力

Phi简介

Phi是微软开发的小参数量大语言模型系列。Phi以其"小模型大能力"的理念著称,通过高质量数据和创新训练方法,在3.8B参数量下达到了接近更大模型的性能。

Phi的核心创新:

Phi架构

核心设计

# Phi-3配置(3.8B)
phi3_config = {
    "hidden_size": 3072,
    "intermediate_size": 8192,
    "num_hidden_layers": 32,
    "num_attention_heads": 32,
    "num_key_value_heads": 32,
    "max_position_embeddings": 131072,  # 128K长上下文
    "rope_theta": 1000000.0,  # 较大的RoPE base
    "vocab_size": 32064,
    "rms_norm_eps": 1e-5,
    "bos_token_id": 1,
    "eos_token_id": 32000,
    "pad_token_id": 0
}

# 关键特性
features = {
    "长上下文": "128K tokens",
    "Sliding Window": "滑动窗口注意力",
    "GQA": "分组查询注意力",
    "高质量数据": "教科书级训练数据"
}

Phi版本

# Phi版本演进
versions = {
    "Phi-1": {
        "参数": "1.3B",
        "上下文": "2K",
        "特点": "代码生成专用"
    },
    "Phi-1.5": {
        "参数": "1.3B",
        "上下文": "2K",
        "特点": "通用语言模型"
    },
    "Phi-2": {
        "参数": "2.7B",
        "上下文": "2K",
        "特点": "接近7B性能"
    },
    "Phi-3-mini": {
        "参数": "3.8B",
        "上下文": "4K/128K",
        "特点": "长上下文,高性能"
    },
    "Phi-3-small": {
        "参数": "7B",
        "上下文": "8K/128K",
        "特点": "更大模型"
    },
    "Phi-3-medium": {
        "参数": "14B",
        "上下文": "4K/128K",
        "特点": "旗舰版本"
    }
}

使用Phi

基本推理

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 加载Phi-3
model_name = "microsoft/Phi-3-mini-128k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 推理
messages = [
    {"role": "system", "content": "你是一个有帮助的助手"},
    {"role": "user", "content": "什么是大语言模型?"}
]

inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

outputs = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
print(response)

vLLM部署

from vllm import LLM, SamplingParams

# 部署Phi-3
llm = LLM(
    model="microsoft/Phi-3-mini-128k-instruct",
    max_model_len=131072,
    gpu_memory_utilization=0.9
)

sampling_params = SamplingParams(temperature=0.7, max_tokens=256)
outputs = llm.generate(["什么是深度学习?"], sampling_params)
print(outputs[0].outputs[0].text)

边缘设备运行

# CPU推理(适合笔记本电脑)
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    torch_dtype=torch.float32,
    device_map="cpu"
)

# 量化部署
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    quantization_config=bnb_config,
    device_map="auto"
)

微调Phi

LoRA微调

from peft import LoraConfig, get_peft_model

# LoRA配置
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["qkv_proj", "o_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

性能评估

# Phi-3性能
performance = {
    "Phi-3-mini": {
        "MMLU": "69.0",
        "HumanEval": "58.5",
        "参数": "3.8B",
        "优势": "小模型高性能"
    },
    "Phi-3-small": {
        "MMLU": "75.7",
        "HumanEval": "69.5",
        "参数": "7B",
        "优势": "平衡性能"
    },
    "Phi-3-medium": {
        "MMLU": "78.0",
        "HumanEval": "72.6",
        "参数": "14B",
        "优势": "旗舰模型"
    }
}

训练方法

# Phi的训练策略
training_strategy = {
    "数据质量": "使用教科书级高质量数据",
    "合成数据": "使用GPT-4生成训练数据",
    "知识注入": "在训练中注入领域知识",
    "多阶段训练": "预训练+微调+对齐"
}

# 数据构建示例
def create_phi_training_data(topic, num_samples=1000):
    """创建Phi风格的训练数据"""
    from openai import OpenAI
    
    client = OpenAI()
    
    prompt = f"""请为"{topic}"主题生成{num_samples}个高质量的教学问答对。
    要求:
    1. 内容准确、详细
    2. 适合学习者理解
    3. 包含具体例子
    """
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )
    
    return response.choices[0].message.content

最佳实践

  1. 选择规格:3.8B适合边缘设备,7B/14B适合服务器
  2. 利用长上下文:Phi-3支持128K上下文
  3. 量化部署:INT4量化可在CPU上运行
  4. 微调适配:针对特定任务微调
  5. 边缘部署:适合笔记本和移动设备

Phi系列通过创新的训练方法,证明了小模型也能达到大模型的性能水平。