← 返回首页
🧠

InternLM:书生系列大模型

📂 llm ⏱ 3 min 404 words

--- title: "InternLM:书生系列大模型" description: "深入了解InternLM系列模型的设计特点和多模态能力" tags: ["InternLM", "书生", "上海AI实验室", "开源模型"] category: "llm" icon: "🧠"

InternLM:书生系列大模型

InternLM简介

InternLM(书生大模型)是上海人工智能实验室开发的开源大语言模型系列。InternLM以其全面的能力、多模态支持和丰富的工具生态著称,是国内最具影响力的开源LLM之一。

InternLM的核心优势:

InternLM架构

核心设计

# InternLM架构配置
internlm_config = {
    "hidden_size": 4096,
    "num_hidden_layers": 32,
    "num_attention_heads": 32,
    "num_key_value_heads": 32,
    "intermediate_size": 11008,
    "max_position_embeddings": 4096,
    "rope_theta": 10000.0,
    "vocab_size": 103168,
    "rms_norm_eps": 1e-6,
    "bos_token_id": 1,
    "eos_token_id": 2,
    "pad_token_id": 0,
    "tie_word_embeddings": False,
    "bos_token": "<s>",
    "eos_token": "</s>",
    "pad_token": "<unk>",
    "sep_token": "<sep>"
}

# 关键特性
features = {
    "GQA": "分组查询注意力",
    "Flash Attention": "高效注意力计算",
    "RoPE": "旋转位置编码",
    "SwiGLU": "激活函数"
}

InternLM版本

# InternLM版本演进
versions = {
    "InternLM-7B": {
        "参数": "7B",
        "上下文": "4K",
        "特点": "基础版本"
    },
    "InternLM-20B": {
        "参数": "20B",
        "上下文": "4K",
        "特点": "更大模型"
    },
    "InternLM2-7B": {
        "参数": "7B",
        "上下文": "32K",
        "特点": "新一代架构"
    },
    "InternLM2-20B": {
        "参数": "20B",
        "上下文": "32K",
        "特点": "旗舰版本"
    },
    "InternLM2.5-7B": {
        "参数": "7B",
        "上下文": "1M",
        "特点": "超长上下文"
    },
    "InternLM3-8B": {
        "参数": "8B",
        "上下文": "32K",
        "特点": "最新版本"
    }
}

使用InternLM

基本推理

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 加载InternLM2
model_name = "internlm/internlm2_5-7b-chat"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 推理
messages = [
    {"role": "system", "content": "你是一个有帮助的助手"},
    {"role": "user", "content": "什么是大语言模型?"}
]

response = model.chat(tokenizer, messages, max_new_tokens=512)
print(response)

使用LMDeploy

# LMDeploy是InternLM官方推荐的推理框架
from lmdeploy import pipeline, GenerationConfig

# 创建推理管道
pipe = pipeline(
    "internlm/internlm2_5-7b-chat",
    model_config=GenerationConfig(
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9
    )
)

# 推理
response = pipe(["什么是机器学习?"])
print(response[0].text)

多模态InternVL

# InternVL支持图像理解
from transformers import AutoModel, AutoTokenizer

model = AutoModel.from_pretrained(
    "InternVL/InternVL-Chat-V1-5",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    "InternVL/InternVL-Chat-V1-5",
    trust_remote_code=True
)

# 图像理解
response = model.chat(
    tokenizer=tokenizer,
    image="https://example.com/image.jpg",
    messages=[{"role": "user", "content": "描述这张图片"}],
    max_new_tokens=512
)
print(response)

微调InternLM

LoRA微调

from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer

# LoRA配置
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["wqkv", "wo"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    "internlm/internlm2_5-7b",
    torch_dtype=torch.float16
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 训练
training_args = TrainingArguments(
    output_dir="./internlm-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    learning_rate=1e-4,
    fp16=True,
    optim="adamw_torch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()

全量微调

# 使用xtuner进行全量微调
from xtuner.dataset import process_hf_dataset
from xtuner.engine.runner import Runner

# 配置
config = dict(
    model=dict(
        type='InternLM2ForCausalLM',
        pretrained_model_name_or_path='internlm/internlm2_5-7b',
        torch_dtype='auto'
    ),
    dataset=dict(
        type='process_hf_dataset',
        datasets='sft_data.json'
    ),
    runner=dict(
        type='Runner',
        max_epochs=3,
        val_interval=100
    )
)

# 启动训练
runner = Runner.from_config(config)
runner.train()

LMDeploy部署

服务部署

# 使用LMDeploy部署服务
# lmdeploy serve api_server internlm/internlm2_5-7b-chat

# 客户端调用
import requests

response = requests.post(
    "http://localhost:23333/v1/chat/completions",
    json={
        "model": "internlm",
        "messages": [{"role": "user", "content": "你好"}],
        "max_tokens": 512,
        "temperature": 0.7
    }
)

print(response.json()["choices"][0]["message"]["content"])

TensorRT-LLM

# LMDeploy支持TensorRT-LLM加速
# lmdeploy convert internlm internlm/internlm2_5-7b-chat --tp 2
# lmdeploy serve api_server workspace --tp 2

性能评估

# InternLM2性能
performance = {
    "InternLM2-7B": {
        "MMLU": "65.8",
        "CMMLU": "78.2",
        "HumanEval": "43.3",
        "优势": "中文能力强"
    },
    "InternLM2-20B": {
        "MMLU": "78.6",
        "CMMLU": "85.6",
        "HumanEval": "52.4",
        "优势": "综合能力强"
    }
}

最佳实践

  1. 使用LMDeploy:官方推荐的推理框架
  2. 选择合适版本:根据任务选择7B/20B
  3. 利用长上下文:InternLM2.5支持1M上下文
  4. 量化部署:使用AWQ/GPTQ量化
  5. 多模态应用:使用InternVL处理图像任务

InternLM凭借其全面的能力和丰富的工具生态,成为国内开源LLM的重要选择。