Google Cloud LLM服务
--- title: "Google Cloud LLM服务" description: "全面介绍Google Cloud平台上的大语言模型服务,包括Vertex AI、PaLM和Gemini集成方案" tags: ["Google Cloud", "LLM服务", "Vertex AI", "PaLM", "Gemini"] category: "llm" icon: "🧠"
Google Cloud LLM服务
Google Cloud Platform(GCP)提供了强大的大语言模型(LLM)服务生态。Vertex AI作为统一的AI平台,集成了PaLM和Gemini等前沿模型,支持模型训练、微调和部署的全流程管理。本文将深入介绍Vertex AI平台、PaLM模型和Gemini模型的集成方案。
Vertex AI平台基础
Vertex AI是Google Cloud的统一AI/ML平台,提供了从数据准备到模型部署的完整工作流。通过Vertex AI SDK可以方便地调用各种LLM服务。
from google.cloud import aiplatform
from vertexai.preview import generative_models
import vertexai
aiplatform.init(project="your-project-id", location="us-central1")
vertexai.init(project="your-project-id", location="us-central1")
def call_gemini(prompt, model_name="gemini-pro"):
model = generative_models.GenerativeModel(model_name)
response = model.generate_content(
prompt,
generation_config=generative_models.GenerationConfig(
max_output_tokens=2048,
temperature=0.7,
top_p=0.95
)
)
return response.text
def call_gemini_with_context(prompt, context_documents):
model = generative_models.GenerativeModel("gemini-pro")
context = generative_models.GenerationConfig(
max_output_tokens=4096,
temperature=0.3
)
response = model.generate_content(
[prompt] + context_documents,
generation_config=context
)
return response.text
Vertex AI的SDK提供了简洁的API接口,支持单轮和多轮对话。通过GenerativeModel类可以加载不同版本的Gemini模型,GenerationConfig控制输出参数。
PaLM 2模型集成
PaLM 2是Google的大语言模型系列,通过Vertex AI API提供访问。PaLM 2在多语言理解和代码生成方面表现出色。
from google.cloud import aiplatform
import json
def call_palm2_chat(message, context=""):
aiplatform.init(project="your-project-id", location="us-central1")
model = aiplatform.GenerativeModel("text-bison@002")
prompt = f"""上下文:{context}
用户问题:{message}
请基于上下文回答问题。"""
response = model.generate_content(
prompt,
generation_config=aiplatform.GenerationConfig(
max_output_tokens=1024,
temperature=0.4,
top_p=0.8
)
)
return response.text
def palm2_code_generation(description, language="python"):
model = aiplatform.GenerativeModel("code-bison@001")
prompt = f"""请根据以下描述生成{language}代码:
{description}
要求:
1. 代码完整可运行
2. 包含必要的注释
3. 遵循最佳实践"""
response = model.generate_content(
prompt,
generation_config=aiplatform.GenerationConfig(
max_output_tokens=2048,
temperature=0.2
)
)
return response.text
def palm2_embedding(text):
model = aiplatform.TextEmbeddingModel.from_pretrained("textembedding-gecko@003")
embeddings = model.get_embeddings([text])
return embeddings[0].values
PaLM 2提供了三个变体模型:Text Bison用于文本生成,Code Bison专门用于代码生成,Gecko用于文本向量化。这种细分让开发者可以根据任务选择最合适的模型。
Gemini多模态模型
Gemini是Google最新一代多模态模型,支持文本、图像和代码的统一理解。在Vertex AI中可以通过API直接调用。
from vertexai.preview.generative_models import GenerativeModel, Part, Image
import vertexai
def analyze_image_with_gemini(image_path, question):
vertexai.init(project="your-project-id", location="us-central1")
model = GenerativeModel("gemini-pro-vision")
with open(image_path, "rb") as f:
image_data = f.read()
image_part = Part.from_data(data=image_data, mime_type="image/png")
response = model.generate_content(
[image_part, question],
generation_config={"max_output_tokens": 1024}
)
return response.text
def multimodal_analysis(contents):
vertexai.init(project="your-project-id", location="us-central1")
model = GenerativeModel("gemini-pro-vision")
parts = []
for item in contents:
if item["type"] == "text":
parts.append(item["content"])
elif item["type"] == "image":
with open(item["path"], "rb") as f:
parts.append(Part.from_data(data=f.read(), mime_type=item["mime"]))
response = model.generate_content(parts)
return response.text
Gemini的多模态能力使其能够同时处理图像和文本输入,适用于图像描述、文档分析和视觉问答等场景。Part.from_data方法支持多种数据格式的输入。
Vertex AI Search:智能搜索
Vertex AI Search结合了LLM和搜索引擎,可以构建企业级的智能搜索和问答系统。
from google.cloud import discoveryengine_v1beta1 as discoveryengine
def create_search_app(project_id, location, collection_id):
client = discoveryengine.SearchServiceClient()
serving_config = client.serving_config_path(
project=project_id,
location=location,
data_store=collection_id,
serving_config="default_config"
)
return serving_config
def search_with_llm(query, project_id, location, data_store_id):
client = discoveryengine.SearchServiceClient()
serving_config = client.serving_config_path(
project=project_id,
location=location,
data_store=data_store_id,
serving_config="default_config"
)
request = discoveryengine.SearchRequest(
serving_config=serving_config,
query=query,
page_size=5,
summary_spec=discoveryengine.SearchRequest.SummarySpec(
summary_model=discoveryengine.SearchRequest.SummarySpec.SummaryModel.LATEST,
include_citations=True
)
)
response = client.search(request)
summary = response.summary
results = [{"title": r.document.metadata.get("title", ""), "snippet": r.document.metadata.get("snippet", "")}
for r in response.results]
return {"summary": summary.summary_text if summary else "", "results": results}
Vertex AI Search不仅返回搜索结果,还能利用LLM生成摘要和回答。通过配置Summary Spec,可以控制摘要的生成方式和引用标注。
部署与成本优化
GCP的LLM服务需要合理规划项目结构和成本控制。通过IAM权限管理和配额设置,可以确保服务的安全和经济运行。
import os
from google.cloud import monitoring_v3
from google.protobuf import timestamp_pb2
import time
def monitor_vertex_usage(project_id):
client = monitoring_v3.MetricServiceClient()
project_name = f"projects/{project_id}"
now = time.time()
seconds = int(now)
nanos = int((now - seconds) * 10**9)
interval = monitoring_v3.TimeInterval(
{"end_time": {"seconds": seconds, "nanos": nanos}, "start_time": {"seconds": seconds - 86400, "nanos": nanos}}
)
results = client.list_time_series(
request={
"name": project_name,
"filter": 'metric.type="aiplatform.googleapis.com/prediction/online/acc_usage_time"',
"interval": interval,
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
}
)
total_usage = sum(float(point.value.int64_value) for ts in results for point in ts.points)
return {"total_usage_seconds": total_usage, "estimated_cost": total_usage * 0.00000167}
def set_budget_alert(project_id, budget_amount):
billing_client = billing_budgets_v1.BudgetServiceClient()
budget = billing_budgets_v1.Budget(
display_name="LLM服务预算",
budget_filter=billing_budgets_v1.BudgetFilter(
projects=[f"projects/{project_id}"],
services=["services/aiplatform.googleapis.com"]
),
amount=billing_budgets_v1.BudgetAmount(
specified_amount=billing_budgets_v1.SpecifiedAmount(
currency_code="USD",
units=str(int(budget_amount))
)
),
threshold_rules=[
billing_budgets_v1.ThresholdRule(threshold_percent=0.8),
billing_budgets_v1.ThresholdRule(threshold_percent=1.0)
]
)
billing_client.create_budget(parent=f"billingAccounts/{os.environ['BILLING_ACCOUNT']}", budget=budget)
通过Cloud Monitoring监控Vertex AI的使用情况,结合Billing Budget设置预算告警,可以有效控制LLM服务的成本。建议在项目初期就配置好预算和告警阈值。
总结
Google Cloud提供了完整的LLM服务生态,从Vertex AI的统一平台到Gemini的多模态能力,再到Vertex AI Search的智能搜索,覆盖了各种AI应用场景。开发者可以利用GCP的基础设施优势,构建高效、可扩展的LLM应用。