高级RAG技术:从基础检索到智能增强生成
高级RAG技术:从基础检索到智能增强生成
RAG回顾
检索增强生成(RAG)通过检索相关文档来增强LLM的生成能力:
用户问题 → 检索相关文档 → LLM基于文档生成答案
高级RAG技术
1. 查询增强
用户的原始查询可能不够精确,需要增强。
查询重写
class QueryRewriter:
def __init__(self, llm):
self.llm = llm
def rewrite(self, query):
prompt = f"""
请将以下用户查询重写为更精确的搜索查询:
原始查询:{query}
重写后的查询:
"""
return self.llm.generate(prompt)
# 示例
rewriter = QueryRewriter(llm)
original = "怎么减肥"
rewritten = rewriter.rewrite(original)
# 输出:有效的减肥方法 科学减重 饮食运动建议
查询分解
class QueryDecomposer:
def __init__(self, llm):
self.llm = llm
def decompose(self, query):
prompt = f"""
将以下复杂问题分解为多个简单子问题:
问题:{query}
子问题:
1.
2.
3.
"""
response = self.llm.generate(prompt)
return self.parse_subquestions(response)
# 示例
decomposer = QueryDecomposer(llm)
subquestions = decomposer.decompose(
"比较React和Vue的优缺点,并说明各自适用场景"
)
# 输出:["React的优点是什么", "Vue的优点是什么", "React适用什么场景", "Vue适用什么场景"]
HyDE(假设性文档嵌入)
class HyDE:
def __init__(self, llm, embedder, vector_db):
self.llm = llm
self.embedder = embedder
self.vector_db = vector_db
def search(self, query, top_k=5):
# 生成假设性答案
prompt = f"""
请回答以下问题(即使你不确定,也请提供一个合理的答案):
问题:{query}
答案:
"""
hypothetical_answer = self.llm.generate(prompt)
# 用假设性答案的嵌入进行检索
answer_embedding = self.embedder.encode(hypothetical_answer)
results = self.vector_db.search(answer_embedding, top_k)
return results
2. 检索优化
混合检索
结合稠密检索和稀疏检索的优势。
class HybridRetriever:
def __init__(self, dense_retriever, sparse_retriever, alpha=0.5):
self.dense_retriever = dense_retriever
self.sparse_retriever = sparse_retriever
self.alpha = alpha # 稠密检索权重
def search(self, query, top_k=10):
# 稠密检索
dense_results = self.dense_retriever.search(query, top_k)
dense_scores = {doc.id: score for doc, score in dense_results}
# 稀疏检索
sparse_results = self.sparse_retriever.search(query, top_k)
sparse_scores = {doc.id: score for doc, score in sparse_results}
# 归一化分数
max_dense = max(dense_scores.values()) if dense_scores else 1
max_sparse = max(sparse_scores.values()) if sparse_scores else 1
# 混合分数
hybrid_scores = {}
for doc_id in set(list(dense_scores.keys()) + list(sparse_scores.keys())):
dense_score = dense_scores.get(doc_id, 0) / max_dense
sparse_score = sparse_scores.get(doc_id, 0) / max_sparse
hybrid_scores[doc_id] = self.alpha * dense_score + (1 - self.alpha) * sparse_score
# 排序返回
sorted_ids = sorted(hybrid_scores.keys(), key=lambda x: hybrid_scores[x], reverse=True)
return sorted_ids[:top_k]
多向量检索
为文档生成多个表示,提高检索召回率。
class MultiVectorRetriever:
def __init__(self, llm, embedder, vector_db):
self.llm = llm
self.embedder = embedder
self.vector_db = vector_db
def index_document(self, document):
# 原始文档嵌入
doc_embedding = self.embedder.encode(document.content)
# 生成摘要嵌入
summary = self.llm.generate(f"请为以下文档生成摘要:\n{document.content}")
summary_embedding = self.embedder.encode(summary)
# 生成假设性问题嵌入
questions = self.generate_questions(document.content)
question_embeddings = [self.embedder.encode(q) for q in questions]
# 存储所有向量
self.vector_db.insert(doc_embedding, document, {"type": "document"})
self.vector_db.insert(summary_embedding, document, {"type": "summary"})
for q_emb, q in zip(question_embeddings, questions):
self.vector_db.insert(q_emb, document, {"type": "question", "question": q})
def generate_questions(self, content):
prompt = f"""
请为以下文档生成3个可能被问到的问题:
文档:{content}
问题:
1.
2.
3.
"""
response = self.llm.generate(prompt)
return self.parse_questions(response)
3. 重排序
检索后对结果进行重排序,提高相关性。
class Reranker:
def __init__(self, rerank_model):
self.model = rerank_model
def rerank(self, query, documents, top_k=5):
# 计算查询和每个文档的相关性分数
scores = []
for doc in documents:
score = self.model.score(query, doc.content)
scores.append((doc, score))
# 按分数排序
scores.sort(key=lambda x: x[1], reverse=True)
return [doc for doc, score in scores[:top_k]]
# 使用Cohere重排序
from cohere import Client
class CohereReranker:
def __init__(self, api_key):
self.client = Client(api_key)
def rerank(self, query, documents, top_k=5):
results = self.client.rerank(
query=query,
documents=[doc.content for doc in documents],
top_n=top_k,
model="rerank-english-v2.0"
)
return [documents[r.index] for r in results.results]
4. 自适应RAG
根据问题复杂度动态选择策略。
class AdaptiveRAG:
def __init__(self, llm, simple_retriever, complex_retriever):
self.llm = llm
self.simple_retriever = simple_retriever
self.complex_retriever = complex_retriever
def answer(self, query):
# 分析问题复杂度
complexity = self.analyze_complexity(query)
if complexity == "simple":
# 简单问题:直接检索生成
docs = self.simple_retriever.search(query, top_k=3)
return self.generate_answer(query, docs)
elif complexity == "medium":
# 中等问题:检索+重排序
docs = self.simple_retriever.search(query, top_k=10)
docs = self.reranker.rerank(query, docs, top_k=5)
return self.generate_answer(query, docs)
else:
# 复杂问题:多步检索+推理
subquestions = self.decompose_query(query)
all_docs = []
for sq in subquestions:
docs = self.complex_retriever.search(sq, top_k=3)
all_docs.extend(docs)
return self.generate_answer(query, all_docs)
def analyze_complexity(self, query):
prompt = f"""
分析以下问题的复杂度(simple/medium/complex):
问题:{query}
复杂度:
"""
response = self.llm.generate(prompt).strip().lower()
return response
生产级RAG系统
1. 文档处理管道
class DocumentProcessor:
def __init__(self):
self.chunker = SemanticChunker()
self.embedder = EmbeddingModel()
def process(self, documents):
processed = []
for doc in documents:
# 分块
chunks = self.chunker.chunk(doc.content)
for chunk in chunks:
# 生成嵌入
embedding = self.embedder.encode(chunk)
processed.append({
"content": chunk,
"embedding": embedding,
"metadata": {
"source": doc.source,
"title": doc.title,
"chunk_id": len(processed)
}
})
return processed
2. 评估指标
class RAGEvaluator:
def __init__(self, llm):
self.llm = llm
def evaluate(self, query, answer, contexts):
# 检索质量评估
retrieval_score = self.evaluate_retrieval(query, contexts)
# 生成质量评估
generation_score = self.evaluate_generation(query, answer, contexts)
# 忠实度评估
faithfulness_score = self.evaluate_faithfulness(answer, contexts)
return {
"retrieval": retrieval_score,
"generation": generation_score,
"faithfulness": faithfulness_score
}
def evaluate_faithfulness(self, answer, contexts):
prompt = f"""
评估以下答案是否忠实于提供的上下文:
答案:{answer}
上下文:{contexts}
评分(0-1):
"""
response = self.llm.generate(prompt)
return float(response)
3. 监控和可观测性
class RAGMonitor:
def __init__(self):
self.metrics = {
"queries": 0,
"avg_latency": 0,
"avg_retrieval_score": 0,
"errors": 0
}
def log_query(self, query, response, latency, retrieval_score):
self.metrics["queries"] += 1
# 更新平均值
n = self.metrics["queries"]
self.metrics["avg_latency"] = (self.metrics["avg_latency"] * (n-1) + latency) / n
self.metrics["avg_retrieval_score"] = (self.metrics["avg_retrieval_score"] * (n-1) + retrieval_score) / n
# 记录详细日志
self.log_to_file(query, response, latency, retrieval_score)
总结
高级RAG技术通过查询增强、检索优化、重排序和自适应策略,显著提升了RAG系统的性能。构建生产级RAG系统需要考虑文档处理、评估指标和监控可观测性。