LLM集成测试
--- title: "LLM集成测试" description: "详细介绍LLM系统的集成测试方法,包括RAG管道测试、Agent工具链测试、多组件协作测试、测试替身使用以及集成测试环境搭建" tags: ["集成测试", "管道测试", "组件协作", "LLM测试"] category: "llm" icon: "🧠"
LLM集成测试
集成测试的目标
集成测试验证LLM系统中各个组件协同工作的正确性。与单元测试关注单个函数不同,集成测试关注模块间的数据流转和交互逻辑,确保系统作为一个整体能够正常运行。
RAG管道集成测试
RAG(检索增强生成)是LLM应用中最常见的架构模式,其集成测试需要验证检索、增强、生成三个阶段的协作。
import pytest
class RAGPipeline:
def __init__(self, retriever, llm_client, reranker=None):
self.retriever = retriever
self.llm_client = llm_client
self.reranker = reranker
def query(self, question, top_k=5):
# 检索阶段
documents = self.retriever.search(question, top_k=top_k)
# 重排序阶段(可选)
if self.reranker:
documents = self.reranker.rerank(question, documents)
# 增强与生成阶段
context = "\n".join([doc.text for doc in documents])
prompt = f"根据以下信息回答问题:\n{context}\n\n问题:{question}"
answer = self.llm_client.generate(prompt)
return {
"answer": answer,
"sources": documents,
}
class TestRAGPipeline:
def setup_method(self):
self.retriever = MockRetriever([
{"text": "Python是一种编程语言", "score": 0.95},
{"text": "Python广泛用于机器学习", "score": 0.88},
])
self.llm_client = MockLLMClient("Python是一种广泛使用的编程语言。")
self.pipeline = RAGPipeline(self.retriever, self.llm_client)
def test_basic_query(self):
result = self.pipeline.query("什么是Python?")
assert "answer" in result
assert len(result["sources"]) == 2
def test_empty_retrieval(self):
self.retriever.set_results([])
result = self.pipeline.query("量子计算")
assert "answer" in result
def test_retriever_called_correctly(self):
self.pipeline.query("测试问题", top_k=3)
self.retriever.search.assert_called_once_with("测试问题", top_k=3)
Agent工具链测试
Agent通过调用外部工具完成复杂任务,集成测试需要验证工具调用的正确性和工具间的数据传递。
class Agent:
def __init__(self, tools, llm_client):
self.tools = {tool.name: tool for tool in tools}
self.llm_client = llm_client
self.tool_calls = []
def execute(self, task):
prompt = f"任务:{task}\n可用工具:{list(self.tools.keys())}"
plan = self.llm_client.generate(prompt)
for step in plan["steps"]:
tool = self.tools[step["tool"]]
result = tool.execute(step["params"])
self.tool_calls.append({
"tool": step["tool"],
"params": step["params"],
"result": result,
})
return self.tool_calls
class TestAgentIntegration:
def setup_method(self):
self.tools = [
MockTool("search", lambda q: f"搜索结果:{q}"),
MockTool("calculate", lambda expr: str(eval(expr))),
MockTool("translate", lambda text: f"翻译:{text}"),
]
self.llm_client = MockLLMClient({
"steps": [
{"tool": "search", "params": {"q": "天气"}},
{"tool": "translate", "params": {"text": "晴天"}},
]
})
self.agent = Agent(self.tools, self.llm_client)
def test_tool_chain_execution(self):
results = self.agent.execute("查询天气并翻译")
assert len(results) == 2
assert results[0]["tool"] == "search"
assert results[1]["tool"] == "translate"
多组件协作测试
测试向量数据库、LLM、缓存等多个组件的协作:
class LLMApplication:
def __init__(self, cache, vectordb, llm_client):
self.cache = cache
self.vectordb = vectordb
self.llm_client = llm_client
def process(self, query):
# 检查缓存
cached = self.cache.get(query)
if cached:
return {"answer": cached, "source": "cache"}
# 检索相关文档
docs = self.vectordb.search(query, top_k=3)
# 生成回答
context = "\n".join([d["text"] for d in docs])
answer = self.llm_client.generate(f"基于{context}回答:{query}")
# 写入缓存
self.cache.set(query, answer, ttl=3600)
return {"answer": answer, "source": "llm", "docs": docs}
class TestLLMApplicationIntegration:
def setup_method(self):
self.cache = InMemoryCache()
self.vectordb = MockVectorDB({"测试文档": "这是测试内容"})
self.llm_client = MockLLMClient("测试回答")
self.app = LLMApplication(self.cache, self.vectordb, self.llm_client)
def test_first_query_populates_cache(self):
result = self.app.process("测试查询")
assert result["source"] == "llm"
assert self.cache.get("测试查询") == "测试回答"
def test_second_query_uses_cache(self):
self.app.process("测试查询") # 首次查询
result = self.app.process("测试查询") # 缓存命中
assert result["source"] == "cache"
集成测试环境
使用Docker Compose搭建隔离的测试环境:
# docker-compose.test.yml
version: '3.8'
services:
vectordb:
image: chromadb/chroma:latest
ports:
- "8000:8000"
redis:
image: redis:7-alpine
ports:
- "6379:6379"
test-runner:
build: .
command: pytest tests/integration/ -v
environment:
- CHROMA_HOST=vectordb
- REDIS_HOST=redis
depends_on:
- vectordb
- redis
测试替身策略
集成测试中使用真实的外部服务(如OpenAI API)成本高昂。建议使用测试替身(Test Double)来模拟外部依赖,同时保持组件间交互的真实性。在关键路径上可以使用真实服务的沙箱环境进行验证。