types

package
v0.0.0-...-37f5ccb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 11, 2023 License: Apache-2.0 Imports: 3 Imported by: 3

Documentation

Index

Constants

View Source
const (
	DeploymentCreateEvent    = "deployment-create"
	DeploymentUpdateEvent    = "deployment-update"
	DeploymentDeleteEvent    = "deployment-delete"
	DeploymentScaleUpEvent   = "deployment-scale-up"
	DeploymentScaleDownEvent = "deployment-scale-down"
)
View Source
const (
	ScalingTypeCapacity = "capacity"
	ScalingTypeRPS      = "rps"
)
View Source
const (
	LabelNamespace = "modelz.tensorchord.ai/namespace"
)
View Source
const (
	RuntimeClassNvidia string = "nvidia"
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Build

type Build struct {
	Spec   BuildSpec   `json:"spec"`
	Status BuildStatus `json:"status,omitempty"`
}

type BuildPhase

type BuildPhase string
const (
	BuildPhasePending   BuildPhase = "Pending"
	BuildPhaseRunning   BuildPhase = "Running"
	BuildPhaseSucceeded BuildPhase = "Succeeded"
	BuildPhaseFailed    BuildPhase = "Failed"
)

type BuildSource

type BuildSource struct {
	// directory is the target directory name.
	// Must not contain or start with '..'.  If '.' is supplied, the volume directory will be the
	// git repository.  Otherwise, if specified, the volume will contain the git repository in
	// the subdirectory with the given name.
	// +optional
	Directory string `json:"directory,omitempty"`

	Builder          BuilderType `json:"builder,omitempty"`
	ArtifactImage    string      `json:"image,omitempty"`
	ArtifactImageTag string      `json:"image_tag,omitempty"`

	Duration string `json:"duration,omitempty"`
}

type BuildSpec

type BuildSpec struct {
	Name                string `json:"name,omitempty"`
	ProjectID           string `json:"project_id,omitempty"`
	Namespace           string `json:"namespace,omitempty"`
	GitRepositorySource `json:",inline,omitempty"`
	BuildSource         `json:",inline,omitempty"`
}

type BuildStatus

type BuildStatus struct {
	Image string     `json:"image,omitempty"`
	Phase BuildPhase `json:"phase,omitempty"`
}

type BuilderType

type BuilderType string
const (
	BuilderTypeDockerfile BuilderType = "Dockerfile"
	BuilderTypeENVD       BuilderType = "envd"
)

type DeleteFunctionRequest

type DeleteFunctionRequest struct {
	FunctionName string `json:"functionName"`
}

DeleteFunctionRequest delete a deployed function

type ErrorResponse

type ErrorResponse struct {
	Message string `json:"message"`
}

type Framework

type Framework string

Framework is the inference framework. It is only used to set the default port and command. For example, if the framework is "gradio", the default port is 7860 and the default command is "python app.py". You could override these defaults by setting the port and command fields and framework to `other`.

const (
	FrameworkGradio    Framework = "gradio"
	FrameworkStreamlit Framework = "streamlit"
	FrameworkMosec     Framework = "mosec"
	FrameworkOther     Framework = "other"
)

type GitRepositorySource

type GitRepositorySource struct {
	// repository is the URL
	Repository string `json:"repository"`
	Branch     string `json:"branch,omitempty"`
	// revision is the commit hash for the specified revision.
	// +optional
	Revision string `json:"revision,omitempty"`
}

type InferenceDeployment

type InferenceDeployment struct {
	Spec   InferenceDeploymentSpec   `json:"spec"`
	Status InferenceDeploymentStatus `json:"status,omitempty"`
}

InferenceDeployment represents a request to create or update a Model.

type InferenceDeploymentInstance

type InferenceDeploymentInstance struct {
	Spec   InferenceDeploymentInstanceSpec   `json:"spec,omitempty"`
	Status InferenceDeploymentInstanceStatus `json:"status,omitempty"`
}

type InferenceDeploymentInstanceSpec

type InferenceDeploymentInstanceSpec struct {
	Namespace      string `json:"namespace,omitempty"`
	Name           string `json:"name,omitempty"`
	OwnerReference string `json:"owner_reference,omitempty"`
}

type InferenceDeploymentInstanceStatus

type InferenceDeploymentInstanceStatus struct {
	Phase     InstancePhase `json:"phase,omitempty"`
	StartTime time.Time     `json:"createdAt,omitempty"`
	Reason    string        `json:"reason,omitempty"`
	Message   string        `json:"message,omitempty"`
}

type InferenceDeploymentSpec

type InferenceDeploymentSpec struct {
	// Name is the name of the inference.
	Name string `json:"name"`

	// Namespace for the inference.
	Namespace string `json:"namespace,omitempty"`

	// Scaling is the scaling configuration for the inference.
	Scaling *ScalingConfig `json:"scaling,omitempty"`

	// Framework is the inference framework.
	Framework Framework `json:"framework,omitempty"`

	// Image is a fully-qualified container image
	Image string `json:"image"`

	// Port is the port exposed by the inference.
	Port *int32 `json:"port,omitempty"`

	// HTTPProbePath is the path of the http probe.
	HTTPProbePath *string `json:"http_probe_path,omitempty"`

	// Command to run when starting the
	Command *string `json:"command,omitempty"`

	// EnvVars can be provided to set environment variables for the inference runtime.
	EnvVars map[string]string `json:"envVars,omitempty"`

	// Constraints are the constraints for the inference.
	Constraints []string `json:"constraints,omitempty"`

	// Secrets list of secrets to be made available to inference.
	Secrets []string `json:"secrets,omitempty"`

	// Labels are key-value pairs that may be attached to the inference.
	Labels map[string]string `json:"labels,omitempty"`

	// Annotations are key-value pairs that may be attached to the inference.
	Annotations map[string]string `json:"annotations,omitempty"`

	// Resources are the compute resource requirements.
	Resources *ResourceRequirements `json:"resources,omitempty"`
}

type InferenceDeploymentStatus

type InferenceDeploymentStatus struct {
	Phase Phase `json:"phase,omitempty"`

	// InvocationCount count of invocations
	InvocationCount int32 `json:"invocationCount,omitempty"`

	// Replicas desired within the cluster
	Replicas int32 `json:"replicas,omitempty"`

	// AvailableReplicas is the count of replicas ready to receive
	// invocations as reported by the faas-provider
	AvailableReplicas int32 `json:"availableReplicas,omitempty"`

	// CreatedAt is the time read back from the faas backend's
	// data store for when the function or its container was created.
	CreatedAt *time.Time `json:"createdAt,omitempty"`

	// Usage represents CPU and RAM used by all of the
	// functions' replicas. Divide by AvailableReplicas for an
	// average value per replica.
	Usage *InferenceUsage `json:"usage,omitempty"`

	// EventMessage record human readable message indicating details about the event of deployment.
	EventMessage string `json:"eventMessage,omitempty"`
}

InferenceDeploymentStatus exported for system/inferences endpoint

type InferenceUsage

type InferenceUsage struct {
	// CPU is the increase in CPU usage since the last measurement
	// equivalent to Kubernetes' concept of millicores.
	CPU float64 `json:"cpu,omitempty"`

	//TotalMemoryBytes is the total memory usage in bytes.
	TotalMemoryBytes float64 `json:"totalMemoryBytes,omitempty"`

	GPU float64 `json:"gpu,omitempty"`
}

InferenceUsage represents CPU and RAM used by all of the functions' replicas.

CPU is measured in seconds consumed since the last measurement RAM is measured in total bytes consumed

type InstancePhase

type InstancePhase string
const (
	InstancePhaseScheduling   InstancePhase = "Scheduling"
	InstancePhasePending      InstancePhase = "Pending"
	InstancePhaseRunning      InstancePhase = "Running"
	InstancePhaseFailed       InstancePhase = "Failed"
	InstancePhaseSucceeded    InstancePhase = "Succeeded"
	InstancePhaseUnknown      InstancePhase = "Unknown"
	InstancePhaseCreating     InstancePhase = "Creating"
	InstancePhaseInitializing InstancePhase = "Initializing"
)

type LogRequest

type LogRequest struct {
	Namespace string `form:"namespace" json:"namespace,omitempty"`
	Name      string `form:"name" json:"name,omitempty"`
	// Instance is the optional pod name, that allows you to request logs from a specific instance
	Instance string `form:"instance" json:"instance,omitempty"`
	// Follow is allows the user to request a stream of logs until the timeout
	Follow bool `form:"follow" json:"follow,omitempty"`
	// Tail sets the maximum number of log messages to return, <=0 means unlimited
	Tail  int    `form:"tail" json:"tail,omitempty"`
	Since string `form:"since" json:"since,omitempty"`
	// End is the end time of the log stream
	End string `form:"end" json:"end,omitempty"`
}

type Message

type Message struct {
	// Name is the function name
	Name      string `json:"name"`
	Namespace string `json:"namespace"`
	// instance is the name/id of the specific function instance
	Instance string `json:"instance"`
	// Timestamp is the timestamp of when the log message was recorded
	Timestamp time.Time `json:"timestamp"`
	// Text is the raw log message content
	Text string `json:"text"`
}

Message is a specific log message from a function container log stream

type NamespaceRequest

type NamespaceRequest struct {
	Name string `json:"name,omitempty"`
}

type NodeSystemInfo

type NodeSystemInfo struct {
	// MachineID reported by the node. For unique machine identification
	// in the cluster this field is preferred. Learn more from man(5)
	// machine-id: http://man7.org/linux/man-pages/man5/machine-id.5.html
	MachineID string `json:"machineID" protobuf:"bytes,1,opt,name=machineID"`
	// Kernel Version reported by the node from 'uname -r' (e.g. 3.16.0-0.bpo.4-amd64).
	KernelVersion string `json:"kernelVersion" protobuf:"bytes,4,opt,name=kernelVersion"`
	// OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)).
	OSImage string `json:"osImage" protobuf:"bytes,5,opt,name=osImage"`
	// The Operating System reported by the node
	OperatingSystem string `json:"operatingSystem" protobuf:"bytes,9,opt,name=operatingSystem"`
	// The Architecture reported by the node
	Architecture string `json:"architecture" protobuf:"bytes,10,opt,name=architecture"`
}

NodeSystemInfo is a set of ids/uuids to uniquely identify the node.

type Phase

type Phase string
const (
	// PhaseReady is the state of an inference when it is ready to
	// receive invocations.
	PhaseReady Phase = "Ready"

	// PhaseScaling is the state of an inference when scales.
	PhaseScaling Phase = "Scaling"

	PhaseTerminating Phase = "Terminating"

	PhaseNoReplicas Phase = "NoReplicas"

	PhaseNotReady Phase = "NotReady"

	PhaseBuilding Phase = "Building"
)

type ProviderInfo

type ProviderInfo struct {
	Name          string       `json:"provider"`
	Version       *VersionInfo `json:"version"`
	Orchestration string       `json:"orchestration"`
}

ProviderInfo provides information about the configured provider

type Quantity

type Quantity string

type QueueRequest

type QueueRequest struct {
	// Header from HTTP request
	Header http.Header

	// Host from HTTP request
	Host string

	// Body from HTTP request to use for invocation
	Body []byte

	// Method from HTTP request
	Method string

	// Path from HTTP request
	Path string

	// QueryString from HTTP request
	QueryString string

	// Function name to invoke
	Function string

	// QueueName to publish the request to, leave blank
	// for default.
	QueueName string

	// Used by queue worker to submit a result
	CallbackURL *url.URL `json:"CallbackUrl"`
}

Request for asynchronous processing

type RequestQueuer

type RequestQueuer interface {
	Queue(req *QueueRequest) error
}

RequestQueuer can public a request to be executed asynchronously

type ResourceList

type ResourceList map[ResourceName]Quantity

ResourceList is a set of (resource name, quantity) pairs.

type ResourceName

type ResourceName string
const (
	ResourceCPU    ResourceName = "cpu"
	ResourceMemory ResourceName = "memory"
	ResourceGPU    ResourceName = "gpu"
)

type ResourceRequirements

type ResourceRequirements struct {
	// Limits describes the maximum amount of compute resources allowed.
	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
	// +optional
	Limits ResourceList `json:"limits,omitempty" protobuf:"bytes,1,rep,name=limits,casttype=ResourceList,castkey=ResourceName"`
	// Requests describes the minimum amount of compute resources required.
	// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
	// otherwise to an implementation-defined value.
	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
	// +optional
	Requests ResourceList `json:"requests,omitempty" protobuf:"bytes,2,rep,name=requests,casttype=ResourceList,castkey=ResourceName"`
}

ResourceRequirements describes the compute resource requirements.

type ScaleServiceRequest

type ScaleServiceRequest struct {
	ServiceName  string `json:"serviceName"`
	Replicas     uint64 `json:"replicas"`
	EventMessage string `json:"eventMessage"`
}

ScaleServiceRequest scales the service to the requested replcia count.

type ScalingConfig

type ScalingConfig struct {
	// MinReplicas is the lower limit for the number of replicas to which the
	// autoscaler can scale down. It defaults to 0.
	MinReplicas *int32 `json:"min_replicas,omitempty"`
	// MaxReplicas is the upper limit for the number of replicas to which the
	// autoscaler can scale up. It cannot be less that minReplicas. It defaults
	// to 1.
	MaxReplicas *int32 `json:"max_replicas,omitempty"`
	// TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.
	TargetLoad *int32 `json:"target_load,omitempty"`
	// Type is the scaling type. It can be either "capacity" or "rps". Default is "capacity".
	Type *ScalingType `json:"type,omitempty"`
	// ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.
	ZeroDuration *int32 `json:"zero_duration,omitempty"`
	// StartupDuration is the duration (in seconds) of startup time.
	StartupDuration *int32 `json:"startup_duration,omitempty"`
}

type ScalingType

type ScalingType string

type Secret

type Secret struct {
	// Name of the secret
	Name string `json:"name"`

	// Namespace if applicable for the secret
	Namespace string `json:"namespace,omitempty"`

	// Value is a string representing the string's value
	Value string `json:"value,omitempty"`

	// RawValue can be used to provide binary data when
	// Value is not set
	RawValue []byte `json:"rawValue,omitempty"`
}

Secret for underlying orchestrator

type Server

type Server struct {
	Spec   ServerSpec   `json:"spec,omitempty"`
	Status ServerStatus `json:"status,omitempty"`
}

type ServerSpec

type ServerSpec struct {
	Name   string            `json:"name,omitempty"`
	Labels map[string]string `json:"labels,omitempty"`
}

type ServerStatus

type ServerStatus struct {
	Allocatable ResourceList   `json:"allocatable,omitempty"`
	Capacity    ResourceList   `json:"capacity,omitempty"`
	Phase       string         `json:"phase,omitempty"`
	System      NodeSystemInfo `json:"system,omitempty"`
}

type VersionInfo

type VersionInfo struct {
	Version      string `json:"version,omitempty"`
	BuildDate    string `json:"build_date,omitempty"`
	GitCommit    string `json:"git_commit,omitempty"`
	GitTag       string `json:"git_tag,omitempty"`
	GitTreeState string `json:"git_tree_state,omitempty"`
	GoVersion    string `json:"go_version,omitempty"`
	Compiler     string `json:"compiler,omitempty"`
	Platform     string `json:"platform,omitempty"`
}

VersionInfo provides the commit message, sha and release version number

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL