GCP-AI-Dispatcher

Deploy a model and a job for batched inference in Vertex AI.

Features

Batch Job Lifecycle: launch async jobs, replace on every run or ignore old runs
Model Upload and Deployment: automatic model artifacts upload to GCS and deployment to the model registry
Model input and outputs storage: model inputs and outputs automatically stored in GCS
Service Account: dedicated service account with necessary IAM permissions (not required for garden models)
Bring your own docker image: set ModelImageURL to serve the model with a custom image and Custom Prediction Routines

Deploy model from the model garden

pulumi.Run(func(ctx *pulumi.Context) error {
    // Launch a new async inference job with a LLama-based model
    batchJob, err := gcp.NewAIBatch(ctx, "llama-sentiment-batch", &gcp.AIBatchArgs{
        Project: "my-gcp-project",
        Region:  "us-central1",

        // Model configuration
        ModelName:     "publishers/meta/models/llama3-2@llama-3.2-3b-instruct",

        // Input data configuration
        InputDataPath: "./inputs",
        InputFormat:   "jsonl",

        // Output configuration
        OutputDataPath: pulumi.String("my-predictions/"),
        OutputFormat:   pulumi.String("jsonl"),

        // Resource allocation
        MachineType:          pulumi.String("g2-standard-8"),
        AcceleratorType:      pulumi.String("NVIDIA_L4"),
        AcceleratorCount:     pulumi.Int(1),

        // Metadata
        Labels: map[string]string{
            "environment": "production",
            "model-type":  "llama",
            "use-case":    "sentiment-analysis",
        },
    })
    if err != nil {
        return err
    }

    // Export useful outputs
    ctx.Export("batchJobName", batchJob.GetBatchPredictionJob().Name)

    return nil
})

Deploy custom model

pulumi.Run(func(ctx *pulumi.Context) error {
    // Launch a new async inference job with a BERT-based model downloaded from Hugging Face
    batchJob, err := gcp.NewAIBatch(ctx, "bert-sentiment-batch", &gcp.AIBatchArgs{
        Project: "my-gcp-project",
        Region:  "us-central1",

        // Model configuration
        ModelDir:                          "./models/nlptown-bert-base-multilingual-uncased-sentiment",
        ModelPredictionInputSchemaPath:    "bert-instance-schema.yaml",
        ModelPredictionOutputSchemaPath:   "bert-prediction-schema.yaml",
        ModelPredictionBehaviorSchemaPath: "bert-parameters-schema.yaml",
        ModelImageURL:                     pulumi.String("us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-15:latest"),

        // Input data configuration
        InputDataPath: "./inputs",
        InputFormat:   "jsonl",

        // Output configuration
        OutputDataPath: pulumi.String("predictions/"),
        OutputFormat:   pulumi.String("jsonl"),

        // Resource allocation
        MachineType:          pulumi.String("g2-standard-8"),
        StartingReplicaCount: pulumi.Int(2),
        MaxReplicaCount:      pulumi.Int(5),
        BatchSize:            pulumi.Int(64),

        // Optional: GPU acceleration
        AcceleratorType:  pulumi.String("NVIDIA_L4"),
        AcceleratorCount: pulumi.Int(1),

        // Metadata
        Labels: map[string]string{
            "environment": "production",
            "model-type":  "bert",
            "use-case":    "sentiment-analysis",
        },
    })
    if err != nil {
        return err
    }

    // Export useful outputs
    ctx.Export("batchJobName", batchJob.GetBatchPredictionJob().Name)
    ctx.Export("modelServiceAccount", batchJob.GetModelServiceAccount().Email)

    return nil
})

Model deployment examples

See working end-to-end model deployments:

Example	Model Source	README
Sentiment analysis with Fine tuned BERT model with custom prediction routine	HuggingFace Model	examples/bert-sentiment-analysis-with-cpr
Sentiment analysis with out of the box Llama from the Model Garden	GCP Model Garden	examples/llama-sentiment-analysis
Code change review with Mistral model from the Model Garden	GCP Model Garden	examples/mistral-code-change-review

See:

Install

go get github.com/davidmontoyago/pulumi-gcp-ai-batch

Full Config

args := &gcp.AIBatchArgs{
    // Required: GCP project and region
    Project: "my-gcp-project",
    Region:  "us-central1",

    // Model configuration - use either ModelDir OR ModelName
    // Option 1: Custom model with artifacts
    ModelDir:                            "./models/my-model",
    ModelImageURL:                       pulumi.String("us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-15:latest"),
    ModelPredictionInputSchemaPath:      "input-schema.yaml",
    ModelPredictionOutputSchemaPath:     "output-schema.yaml",
    ModelPredictionBehaviorSchemaPath:   "behavior-schema.yaml", // Optional
    ModelBucketBasePath:                 "model", // Default: "model"

    // Option 2: Model garden model (alternative to ModelDir)
    // ModelName: "publishers/google/models/gemma2@gemma-2-2b-it",

    // Display names
    JobDisplayName:   pulumi.String("my-batch-job"),
    ModelDisplayName: pulumi.String("my-model"),

    // Input data configuration
    InputDataPath: "inputs",     // Default: "inputs"
    InputFormat:   "jsonl",      // Default: "jsonl"
    InputFileName: "data.jsonl", // Default: "*.jsonl"

    // Output data configuration
    OutputDataPath: pulumi.String("predictions"), // Default: "predictions"
    OutputFormat:   pulumi.String("jsonl"),       // Default: "jsonl"

    // Resource allocation
    MachineType:          pulumi.String("n1-standard-4"), // Default: "n1-standard-4"
    StartingReplicaCount: pulumi.Int(1),                  // Default: 1
    MaxReplicaCount:      pulumi.Int(3),                  // Default: 3
    BatchSize:            pulumi.Int(64),                 // Optional, auto-configured if not set

    // Accelerator configuration (optional)
    AcceleratorType:  pulumi.String("NVIDIA_TESLA_T4"), // Default: "ACCELERATOR_TYPE_UNSPECIFIED"
    AcceleratorCount: pulumi.Int(1),                     // Default: 1

    // Access control
    EnablePrivateRegistryAccess: true,  // Default: false
    RetainJobOnDelete:           false, // Default: false

    // Metadata
    Labels: map[string]string{
        "environment": "production",
        "team":        "ml-ops",
        "cost-center": "research",
    },
}

Development

Build: make build
Test: make test
Lint: make lint
Clean: make clean

Requirements

Go 1.24+
GCP project with Vertex AI API enabled
Pulumi CLI

Name		Name	Last commit message	Last commit date
Latest commit History 66 Commits
.github		.github
examples		examples
pkg/gcp		pkg/gcp
.gitignore		.gitignore
.golangci.yml		.golangci.yml
LICENSE		LICENSE
Makefile		Makefile
README.md		README.md
go.mod		go.mod
go.sum		go.sum
go.work		go.work
go.work.sum		go.work.sum

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

GCP-AI-Dispatcher

Features

Deploy model from the model garden

Deploy custom model

Model deployment examples

Install

Full Config

Development

Requirements

About

Uh oh!

Releases

Packages

Languages

License

Kenxpx/GCP-AI-Dispacher

Folders and files

Latest commit

History

Repository files navigation

GCP-AI-Dispatcher

Features

Deploy model from the model garden

Deploy custom model

Model deployment examples

Install

Full Config

Development

Requirements

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages