InternLM:书生系列大模型
--- title: "InternLM:书生系列大模型" description: "深入了解InternLM系列模型的设计特点和多模态能力" tags: ["InternLM", "书生", "上海AI实验室", "开源模型"] category: "llm" icon: "🧠"
InternLM:书生系列大模型
InternLM简介
InternLM(书生大模型)是上海人工智能实验室开发的开源大语言模型系列。InternLM以其全面的能力、多模态支持和丰富的工具生态著称,是国内最具影响力的开源LLM之一。
InternLM的核心优势:
- 全面能力:在多项基准测试中表现优异
- 多模态:支持文本、图像等多种模态
- 工具生态:提供LMDeploy等部署工具
- 长上下文:支持超长上下文处理
InternLM架构
核心设计
# InternLM架构配置
internlm_config = {
"hidden_size": 4096,
"num_hidden_layers": 32,
"num_attention_heads": 32,
"num_key_value_heads": 32,
"intermediate_size": 11008,
"max_position_embeddings": 4096,
"rope_theta": 10000.0,
"vocab_size": 103168,
"rms_norm_eps": 1e-6,
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 0,
"tie_word_embeddings": False,
"bos_token": "<s>",
"eos_token": "</s>",
"pad_token": "<unk>",
"sep_token": "<sep>"
}
# 关键特性
features = {
"GQA": "分组查询注意力",
"Flash Attention": "高效注意力计算",
"RoPE": "旋转位置编码",
"SwiGLU": "激活函数"
}
InternLM版本
# InternLM版本演进
versions = {
"InternLM-7B": {
"参数": "7B",
"上下文": "4K",
"特点": "基础版本"
},
"InternLM-20B": {
"参数": "20B",
"上下文": "4K",
"特点": "更大模型"
},
"InternLM2-7B": {
"参数": "7B",
"上下文": "32K",
"特点": "新一代架构"
},
"InternLM2-20B": {
"参数": "20B",
"上下文": "32K",
"特点": "旗舰版本"
},
"InternLM2.5-7B": {
"参数": "7B",
"上下文": "1M",
"特点": "超长上下文"
},
"InternLM3-8B": {
"参数": "8B",
"上下文": "32K",
"特点": "最新版本"
}
}
使用InternLM
基本推理
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# 加载InternLM2
model_name = "internlm/internlm2_5-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
# 推理
messages = [
{"role": "system", "content": "你是一个有帮助的助手"},
{"role": "user", "content": "什么是大语言模型?"}
]
response = model.chat(tokenizer, messages, max_new_tokens=512)
print(response)
使用LMDeploy
# LMDeploy是InternLM官方推荐的推理框架
from lmdeploy import pipeline, GenerationConfig
# 创建推理管道
pipe = pipeline(
"internlm/internlm2_5-7b-chat",
model_config=GenerationConfig(
max_new_tokens=512,
temperature=0.7,
top_p=0.9
)
)
# 推理
response = pipe(["什么是机器学习?"])
print(response[0].text)
多模态InternVL
# InternVL支持图像理解
from transformers import AutoModel, AutoTokenizer
model = AutoModel.from_pretrained(
"InternVL/InternVL-Chat-V1-5",
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
"InternVL/InternVL-Chat-V1-5",
trust_remote_code=True
)
# 图像理解
response = model.chat(
tokenizer=tokenizer,
image="https://example.com/image.jpg",
messages=[{"role": "user", "content": "描述这张图片"}],
max_new_tokens=512
)
print(response)
微调InternLM
LoRA微调
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer
# LoRA配置
lora_config = LoraConfig(
r=64,
lora_alpha=16,
target_modules=["wqkv", "wo"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
# 加载模型
model = AutoModelForCausalLM.from_pretrained(
"internlm/internlm2_5-7b",
torch_dtype=torch.float16
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# 训练
training_args = TrainingArguments(
output_dir="./internlm-finetuned",
num_train_epochs=3,
per_device_train_batch_size=4,
learning_rate=1e-4,
fp16=True,
optim="adamw_torch"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset
)
trainer.train()
全量微调
# 使用xtuner进行全量微调
from xtuner.dataset import process_hf_dataset
from xtuner.engine.runner import Runner
# 配置
config = dict(
model=dict(
type='InternLM2ForCausalLM',
pretrained_model_name_or_path='internlm/internlm2_5-7b',
torch_dtype='auto'
),
dataset=dict(
type='process_hf_dataset',
datasets='sft_data.json'
),
runner=dict(
type='Runner',
max_epochs=3,
val_interval=100
)
)
# 启动训练
runner = Runner.from_config(config)
runner.train()
LMDeploy部署
服务部署
# 使用LMDeploy部署服务
# lmdeploy serve api_server internlm/internlm2_5-7b-chat
# 客户端调用
import requests
response = requests.post(
"http://localhost:23333/v1/chat/completions",
json={
"model": "internlm",
"messages": [{"role": "user", "content": "你好"}],
"max_tokens": 512,
"temperature": 0.7
}
)
print(response.json()["choices"][0]["message"]["content"])
TensorRT-LLM
# LMDeploy支持TensorRT-LLM加速
# lmdeploy convert internlm internlm/internlm2_5-7b-chat --tp 2
# lmdeploy serve api_server workspace --tp 2
性能评估
# InternLM2性能
performance = {
"InternLM2-7B": {
"MMLU": "65.8",
"CMMLU": "78.2",
"HumanEval": "43.3",
"优势": "中文能力强"
},
"InternLM2-20B": {
"MMLU": "78.6",
"CMMLU": "85.6",
"HumanEval": "52.4",
"优势": "综合能力强"
}
}
最佳实践
- 使用LMDeploy:官方推荐的推理框架
- 选择合适版本:根据任务选择7B/20B
- 利用长上下文:InternLM2.5支持1M上下文
- 量化部署:使用AWQ/GPTQ量化
- 多模态应用:使用InternVL处理图像任务
InternLM凭借其全面的能力和丰富的工具生态,成为国内开源LLM的重要选择。