← 返回首页
🧠

Google Cloud LLM服务

📂 llm ⏱ 3 min 437 words

--- title: "Google Cloud LLM服务" description: "全面介绍Google Cloud平台上的大语言模型服务,包括Vertex AI、PaLM和Gemini集成方案" tags: ["Google Cloud", "LLM服务", "Vertex AI", "PaLM", "Gemini"] category: "llm" icon: "🧠"

Google Cloud LLM服务

Google Cloud Platform(GCP)提供了强大的大语言模型(LLM)服务生态。Vertex AI作为统一的AI平台,集成了PaLM和Gemini等前沿模型,支持模型训练、微调和部署的全流程管理。本文将深入介绍Vertex AI平台、PaLM模型和Gemini模型的集成方案。

Vertex AI平台基础

Vertex AI是Google Cloud的统一AI/ML平台,提供了从数据准备到模型部署的完整工作流。通过Vertex AI SDK可以方便地调用各种LLM服务。

from google.cloud import aiplatform
from vertexai.preview import generative_models
import vertexai

aiplatform.init(project="your-project-id", location="us-central1")
vertexai.init(project="your-project-id", location="us-central1")

def call_gemini(prompt, model_name="gemini-pro"):
    model = generative_models.GenerativeModel(model_name)
    
    response = model.generate_content(
        prompt,
        generation_config=generative_models.GenerationConfig(
            max_output_tokens=2048,
            temperature=0.7,
            top_p=0.95
        )
    )
    
    return response.text

def call_gemini_with_context(prompt, context_documents):
    model = generative_models.GenerativeModel("gemini-pro")
    
    context = generative_models.GenerationConfig(
        max_output_tokens=4096,
        temperature=0.3
    )
    
    response = model.generate_content(
        [prompt] + context_documents,
        generation_config=context
    )
    
    return response.text

Vertex AI的SDK提供了简洁的API接口,支持单轮和多轮对话。通过GenerativeModel类可以加载不同版本的Gemini模型,GenerationConfig控制输出参数。

PaLM 2模型集成

PaLM 2是Google的大语言模型系列,通过Vertex AI API提供访问。PaLM 2在多语言理解和代码生成方面表现出色。

from google.cloud import aiplatform
import json

def call_palm2_chat(message, context=""):
    aiplatform.init(project="your-project-id", location="us-central1")
    
    model = aiplatform.GenerativeModel("text-bison@002")
    
    prompt = f"""上下文:{context}

用户问题:{message}

请基于上下文回答问题。"""
    
    response = model.generate_content(
        prompt,
        generation_config=aiplatform.GenerationConfig(
            max_output_tokens=1024,
            temperature=0.4,
            top_p=0.8
        )
    )
    
    return response.text

def palm2_code_generation(description, language="python"):
    model = aiplatform.GenerativeModel("code-bison@001")
    
    prompt = f"""请根据以下描述生成{language}代码:

{description}

要求:
1. 代码完整可运行
2. 包含必要的注释
3. 遵循最佳实践"""
    
    response = model.generate_content(
        prompt,
        generation_config=aiplatform.GenerationConfig(
            max_output_tokens=2048,
            temperature=0.2
        )
    )
    
    return response.text

def palm2_embedding(text):
    model = aiplatform.TextEmbeddingModel.from_pretrained("textembedding-gecko@003")
    embeddings = model.get_embeddings([text])
    return embeddings[0].values

PaLM 2提供了三个变体模型:Text Bison用于文本生成,Code Bison专门用于代码生成,Gecko用于文本向量化。这种细分让开发者可以根据任务选择最合适的模型。

Gemini多模态模型

Gemini是Google最新一代多模态模型,支持文本、图像和代码的统一理解。在Vertex AI中可以通过API直接调用。

from vertexai.preview.generative_models import GenerativeModel, Part, Image
import vertexai

def analyze_image_with_gemini(image_path, question):
    vertexai.init(project="your-project-id", location="us-central1")
    
    model = GenerativeModel("gemini-pro-vision")
    
    with open(image_path, "rb") as f:
        image_data = f.read()
    
    image_part = Part.from_data(data=image_data, mime_type="image/png")
    
    response = model.generate_content(
        [image_part, question],
        generation_config={"max_output_tokens": 1024}
    )
    
    return response.text

def multimodal_analysis(contents):
    vertexai.init(project="your-project-id", location="us-central1")
    
    model = GenerativeModel("gemini-pro-vision")
    
    parts = []
    for item in contents:
        if item["type"] == "text":
            parts.append(item["content"])
        elif item["type"] == "image":
            with open(item["path"], "rb") as f:
                parts.append(Part.from_data(data=f.read(), mime_type=item["mime"]))
    
    response = model.generate_content(parts)
    return response.text

Gemini的多模态能力使其能够同时处理图像和文本输入,适用于图像描述、文档分析和视觉问答等场景。Part.from_data方法支持多种数据格式的输入。

Vertex AI Search:智能搜索

Vertex AI Search结合了LLM和搜索引擎,可以构建企业级的智能搜索和问答系统。

from google.cloud import discoveryengine_v1beta1 as discoveryengine

def create_search_app(project_id, location, collection_id):
    client = discoveryengine.SearchServiceClient()
    
    serving_config = client.serving_config_path(
        project=project_id,
        location=location,
        data_store=collection_id,
        serving_config="default_config"
    )
    
    return serving_config

def search_with_llm(query, project_id, location, data_store_id):
    client = discoveryengine.SearchServiceClient()
    
    serving_config = client.serving_config_path(
        project=project_id,
        location=location,
        data_store=data_store_id,
        serving_config="default_config"
    )
    
    request = discoveryengine.SearchRequest(
        serving_config=serving_config,
        query=query,
        page_size=5,
        summary_spec=discoveryengine.SearchRequest.SummarySpec(
            summary_model=discoveryengine.SearchRequest.SummarySpec.SummaryModel.LATEST,
            include_citations=True
        )
    )
    
    response = client.search(request)
    
    summary = response.summary
    results = [{"title": r.document.metadata.get("title", ""), "snippet": r.document.metadata.get("snippet", "")} 
               for r in response.results]
    
    return {"summary": summary.summary_text if summary else "", "results": results}

Vertex AI Search不仅返回搜索结果,还能利用LLM生成摘要和回答。通过配置Summary Spec,可以控制摘要的生成方式和引用标注。

部署与成本优化

GCP的LLM服务需要合理规划项目结构和成本控制。通过IAM权限管理和配额设置,可以确保服务的安全和经济运行。

import os
from google.cloud import monitoring_v3
from google.protobuf import timestamp_pb2
import time

def monitor_vertex_usage(project_id):
    client = monitoring_v3.MetricServiceClient()
    project_name = f"projects/{project_id}"
    
    now = time.time()
    seconds = int(now)
    nanos = int((now - seconds) * 10**9)
    
    interval = monitoring_v3.TimeInterval(
        {"end_time": {"seconds": seconds, "nanos": nanos}, "start_time": {"seconds": seconds - 86400, "nanos": nanos}}
    )
    
    results = client.list_time_series(
        request={
            "name": project_name,
            "filter": 'metric.type="aiplatform.googleapis.com/prediction/online/acc_usage_time"',
            "interval": interval,
            "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
        }
    )
    
    total_usage = sum(float(point.value.int64_value) for ts in results for point in ts.points)
    return {"total_usage_seconds": total_usage, "estimated_cost": total_usage * 0.00000167}

def set_budget_alert(project_id, budget_amount):
    billing_client = billing_budgets_v1.BudgetServiceClient()
    
    budget = billing_budgets_v1.Budget(
        display_name="LLM服务预算",
        budget_filter=billing_budgets_v1.BudgetFilter(
            projects=[f"projects/{project_id}"],
            services=["services/aiplatform.googleapis.com"]
        ),
        amount=billing_budgets_v1.BudgetAmount(
            specified_amount=billing_budgets_v1.SpecifiedAmount(
                currency_code="USD",
                units=str(int(budget_amount))
            )
        ),
        threshold_rules=[
            billing_budgets_v1.ThresholdRule(threshold_percent=0.8),
            billing_budgets_v1.ThresholdRule(threshold_percent=1.0)
        ]
    )
    
    billing_client.create_budget(parent=f"billingAccounts/{os.environ['BILLING_ACCOUNT']}", budget=budget)

通过Cloud Monitoring监控Vertex AI的使用情况,结合Billing Budget设置预算告警,可以有效控制LLM服务的成本。建议在项目初期就配置好预算和告警阈值。

总结

Google Cloud提供了完整的LLM服务生态,从Vertex AI的统一平台到Gemini的多模态能力,再到Vertex AI Search的智能搜索,覆盖了各种AI应用场景。开发者可以利用GCP的基础设施优势,构建高效、可扩展的LLM应用。