← 返回首页
🧠

通义千问Qwen:阿里云大模型

📂 llm ⏱ 3 min 450 words

--- title: "通义千问Qwen:阿里云大模型" description: "深入了解Qwen系列模型的架构特点、多模态能力和中文优化" tags: ["Qwen", "通义千问", "阿里云", "中文LLM"] category: "llm" icon: "🧠"

通义千问Qwen:阿里云大模型

Qwen简介

Qwen(通义千问)是阿里云开发的大语言模型系列。Qwen以其出色的中文能力、多模态支持和开源生态著称,是国内最具影响力的开源LLM之一。

Qwen的核心优势:

Qwen架构

核心设计

from transformers import AutoModelForCausalLM, AutoTokenizer

# Qwen配置
qwen_config = {
    "hidden_size": 4096,
    "intermediate_size": 11008,
    "num_hidden_layers": 32,
    "num_attention_heads": 32,
    "num_key_value_heads": 32,
    "max_position_embeddings": 8192,
    "vocab_size": 151936,  # 大词汇表支持多语言
    "rope_theta": 1000000.0,  # 较大的RoPE base
    "use_sliding_window": False,
    "use_flash_attn": True
}

# 加载Qwen模型
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen-7B-Chat",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)

关键技术

# 1. 大词汇表(151K tokens)
# 支持中英文和代码,提高多语言能力

# 2. Dynamic NTK-aware RoPE
class DynamicNTKRoPE:
    def __init__(self, dim, max_position_embeddings=8192, base=1000000):
        self.dim = dim
        self.max_position_embeddings = max_position_embeddings
        self.base = base
    
    def forward(self, x, seq_len):
        # 动态调整频率
        if seq_len > self.max_position_embeddings:
            base = self.base * (seq_len / self.max_position_embeddings) ** (self.dim / (self.dim - 2))
        else:
            base = self.base
        
        inv_freq = 1.0 / (base ** (torch.arange(0, self.dim, 2).float() / self.dim))
        t = torch.arange(seq_len)
        freqs = torch.einsum('i,j->ij', t, inv_freq)
        emb = torch.cat((freqs, freqs), dim=-1)
        return emb

Qwen系列版本

# Qwen版本演进
versions = {
    "Qwen-7B": {
        "参数": "7B",
        "上下文": "8K",
        "特点": "基础版本"
    },
    "Qwen-14B": {
        "参数": "14B",
        "上下文": "8K",
        "特点": "更大模型"
    },
    "Qwen-72B": {
        "参数": "72B",
        "上下文": "32K",
        "特点": "旗舰模型"
    },
    "Qwen-1.8B": {
        "参数": "1.8B",
        "上下文": "8K",
        "特点": "轻量级"
    },
    "Qwen2": {
        "参数": "0.5B-72B",
        "上下文": "32K-128K",
        "特点": "新一代架构"
    }
}

使用Qwen

基本推理

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 加载Qwen2
model_name = "Qwen/Qwen2-7B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 推理
messages = [
    {"role": "system", "content": "你是一个有帮助的助手"},
    {"role": "user", "content": "解释什么是大语言模型"}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)

vLLM部署

from vllm import LLM, SamplingParams

# 部署Qwen2
llm = LLM(
    model="Qwen/Qwen2-7B-Instruct",
    max_model_len=32768,
    gpu_memory_utilization=0.9,
    trust_remote_code=True
)

sampling_params = SamplingParams(temperature=0.7, max_tokens=512)
prompts = ["什么是深度学习?", "解释神经网络的工作原理"]
outputs = llm.generate(prompts, sampling_params)

for output in outputs:
    print(output.outputs[0].text)

多模态Qwen-VL

from transformers import AutoModelForCausalLM, AutoProcessor

# 加载Qwen-VL
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen-VL-Chat",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

processor = AutoProcessor.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)

# 图像理解
messages = [
    {"role": "user", "content": [
        {"type": "image", "image": "https://example.com/image.jpg"},
        {"type": "text", "text": "这张图片里有什么?"}
    ]}
]

response = model.chat(messages)
print(response)

微调Qwen

LoRA微调

from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer

# LoRA配置
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-7B",
    torch_dtype=torch.float16
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 训练
training_args = TrainingArguments(
    output_dir="./qwen-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    learning_rate=1e-4,
    fp16=True,
    optim="adamw_torch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()

性能评估

# Qwen2性能
performance = {
    "Qwen2-7B": {
        "MMLU": "79.2",
        "HumanEval": "64.6",
        "CMMLU": "83.2",
        "优势": "中文能力出色"
    },
    "Qwen2-72B": {
        "MMLU": "86.1",
        "HumanEval": "86.4",
        "CMMLU": "91.3",
        "优势": "综合能力强"
    }
}

Qwen生态

# Qwen生态工具
ecosystem = {
    "模型": ["Qwen-7B", "Qwen-14B", "Qwen-72B", "Qwen-VL", "Qwen-Audio"],
    "工具": ["ModelScope", "DashScope", "Qwen-Agent"],
    "应用": ["通义千问", "钉钉AI", "阿里云AI"],
    "社区": ["GitHub", "ModelScope", "HuggingFace"]
}

最佳实践

  1. 选择合适版本:根据任务和硬件选择模型大小
  2. 使用中文提示:充分利用中文优化
  3. 应用多模态:使用Qwen-VL处理图像任务
  4. 优化部署:使用vLLM和量化技术
  5. 参与社区:关注Qwen的更新和生态发展

Qwen作为国内领先的开源LLM,为中文AI应用提供了强大的基础模型支持。