package dataflowlib

import ""

Package dataflowlib translates a Beam pipeline model to the Dataflow API job model, for submission to Google Cloud Dataflow.


Package Files

execute.go fixup.go job.go messages.go stage.go translate.go

func Execute(ctx context.Context, raw *pipepb.Pipeline, opts *JobOptions, workerURL, jarURL, modelURL, endpoint string, async bool) (string, error)

Execute submits a pipeline as a Dataflow job.

func Fixup(p *pipepb.Pipeline) (*pipepb.Pipeline, error)

Fixup proto pipeline with Dataflow quirks.

func NewClient(ctx context.Context, endpoint string) (*df.Service, error)

NewClient creates a new dataflow client with default application credentials and CloudPlatformScope. The Dataflow endpoint is optionally overridden.

func PrintJob(ctx context.Context, job *df.Job)

PrintJob logs the Dataflow job.

func StageFile(ctx context.Context, project, url, filename string) error

StageFile uploads a file to GCS.

func StageModel(ctx context.Context, project, modelURL string, model []byte) error

StageModel uploads the pipeline model to GCS as a unique object.

func Submit(ctx context.Context, client *df.Service, project, region string, job *df.Job) (*df.Job, error)

Submit submits a prepared job to Cloud Dataflow.

func Translate(p *pipepb.Pipeline, opts *JobOptions, workerURL, jarURL, modelURL string) (*df.Job, error)

Translate translates a pipeline to a Dataflow job.

func WaitForCompletion(ctx context.Context, client *df.Service, project, region, jobID string) error

WaitForCompletion monitors the given job until completion. It logs any messages and state changes received.

type JobOptions struct {
    // Name is the job name.
    Name string
    // Experiments are additional experiments.
    Experiments []string
    // Pipeline options
    Options runtime.RawOptions

    Project             string
    Region              string
    Zone                string
    Network             string
    Subnetwork          string
    NoUsePublicIPs      bool
    NumWorkers          int64
    MachineType         string
    Labels              map[string]string
    ServiceAccountEmail string

    // Autoscaling settings
    Algorithm     string
    MaxNumWorkers int64

    TempLocation string

    // Worker is the worker binary override.
    Worker string
    // WorkerJar is a custom worker jar.
    WorkerJar string

    TeardownPolicy string

JobOptions capture the various options for submitting jobs to Dataflow.

