model

package
v0.0.0-...-3511abf Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 2, 2023 License: Apache-2.0 Imports: 59 Imported by: 4

Documentation

Index

Constants

View Source
const (
	// NotebookIdleTypeKernelsOrTerminals indicates that a notebook should be considered active if any
	// kernels or terminals are open.
	NotebookIdleTypeKernelsOrTerminals = "kernels_or_terminals"
	// NotebookIdleTypeKernelConnections indicates that a notebook should be considered active if any
	// connections to kernels are open.
	NotebookIdleTypeKernelConnections = "kernel_connections"
	// NotebookIdleTypeActivity indicates that a notebook should be considered active if any kernel is
	// running a command or any terminal is inputting or outputting data.
	NotebookIdleTypeActivity = "activity"
)
View Source
const (
	// ActiveState constant.
	ActiveState State = "ACTIVE"
	// CanceledState constant.
	CanceledState State = "CANCELED"
	// CompletedState constant.
	CompletedState State = "COMPLETED"
	// ErrorState constant.
	ErrorState State = "ERROR"
	// PausedState constant.
	PausedState State = "PAUSED"
	// StoppingKilledState constant.
	StoppingKilledState State = "STOPPING_KILLED"
	// StoppingCanceledState constant.
	StoppingCanceledState State = "STOPPING_CANCELED"
	// StoppingCompletedState constant.
	StoppingCompletedState State = "STOPPING_COMPLETED"
	// StoppingErrorState constant.
	StoppingErrorState State = "STOPPING_ERROR"
	// DeletingState constant.
	DeletingState State = "DELETING"
	// DeleteFailedState constant.
	DeleteFailedState State = "DELETE_FAILED"
	// DeletedState constant.
	DeletedState State = "DELETED"
	// PartiallyDeletedState constant.
	PartiallyDeletedState State = "PARTIALLY_DELETED"
	// RunningState constant. Currently only used by unmanaged trials.
	RunningState State = "RUNNING"

	// TrialWorkloadSequencerType constant.
	TrialWorkloadSequencerType WorkloadSequencerType = "TRIAL_WORKLOAD_SEQUENCER"
)
View Source
const (
	// MinUserSchedulingPriority is the smallest priority users may specify.
	MinUserSchedulingPriority = 1
	// MaxUserSchedulingPriority is the largest priority users may specify.
	MaxUserSchedulingPriority = 99
)
View Source
const (

	// LogLevelTrace is the trace task log level.
	LogLevelTrace = tasklog.LogLevelTrace
	// LogLevelDebug is the debug task log level.
	LogLevelDebug = tasklog.LogLevelDebug
	// LogLevelInfo is the info task log level.
	LogLevelInfo = tasklog.LogLevelInfo
	// LogLevelWarning is the warn task log level.
	LogLevelWarning = tasklog.LogLevelWarning
	// LogLevelError is the error task log level.
	LogLevelError = tasklog.LogLevelError
	// LogLevelCritical is the critical task log level.
	LogLevelCritical = tasklog.LogLevelCritical
	// LogLevelUnspecified is the unspecified task log level.
	LogLevelUnspecified = tasklog.LogLevelUnspecified
)
View Source
const (
	// DefaultWorkspaceID is a special, always-existing, workspace titled "Uncategorized".
	DefaultWorkspaceID = 1
	// DefaultProjectID is the default project ID for the default workspace.
	DefaultProjectID = 1
)
View Source
const BCryptCost = 15

BCryptCost is a stopgap until we implement sane master-configuration.

View Source
const (
	// DeterminedK8ContainerName is the name of the container that executes the task within Kubernetes
	// pods that are launched by Determined.
	DeterminedK8ContainerName = "determined-container"
)
View Source
const (
	// RFC3339MicroTrailingZeroes unlike time.RFC3339Nano is a time format specifier that preserves
	// trailing zeroes.
	RFC3339MicroTrailingZeroes = "2006-01-02T15:04:05.000000Z07:00"
)
View Source
const (
	// StepsCompletedMetadataKey is the key within metadata to find steps completed now, if it exists.
	StepsCompletedMetadataKey = "steps_completed"
)

Variables

View Source
var (
	// EmptyPassword is the empty password (i.e., the empty string).
	EmptyPassword = null.NewString("", false)

	// NoPasswordLogin is a password that prevents the user from logging in
	// directly. They can still login via external authentication methods like
	// OAuth.
	NoPasswordLogin = null.NewString("", true)
)
View Source
var CheckpointReverseTransitions = reverseTransitions(CheckpointTransitions)

CheckpointReverseTransitions list possible ancestor states.

View Source
var CheckpointTransitions = map[State]map[State]bool{
	ActiveState: {
		CompletedState: true,
		ErrorState:     true,
	},
	CompletedState: {
		DeletedState: true,
	},
	DeletedState: {},
	ErrorState:   {},
}

CheckpointTransitions maps checkpoint states to their possible transitions.

DeletingStates are the valid deleting states.

View Source
var ExperimentReverseTransitions = reverseTransitions(ExperimentTransitions)

ExperimentReverseTransitions lists possible ancestor states.

ExperimentTransitions maps experiment states to their possible transitions.

ManualStates are the states the user can set an experiment to.

View Source
var NonTerminalStates = func() []State {
	var states []State
	for s := range ExperimentTransitions {
		if !TerminalStates[s] && !DeletingStates[s] {
			states = append(states, s)
		}
	}
	return states
}()

NonTerminalStates where an experiment can be canceled or killed.

View Source
var RunningStates = map[State]bool{
	ActiveState: true,
	PausedState: true,
}

RunningStates are the valid running states.

View Source
var StepReverseTransitions = reverseTransitions(StepTransitions)

StepReverseTransitions list possible ancestor states.

View Source
var StepTransitions = map[State]map[State]bool{
	ActiveState: {
		CompletedState: true,
		ErrorState:     true,
	},
	CompletedState: {},
	ErrorState:     {},
}

StepTransitions maps step and validation states to their possible transitions.

StoppingStates are the valid stopping states.

StoppingToTerminalStates maps from stopping states to the corresponding terminal states.

TerminalStates are the valid terminal states.

View Source
var TrialReverseTransitions = reverseTransitions(TrialTransitions)

TrialReverseTransitions list possible ancestor states.

TrialTransitions maps trial states to their possible transitions. Trials are mostly the same as experiments, but when immediate exits through ErrorState allowed since can die immediately and let the RM clean us up.

Functions

func FmtInstances

func FmtInstances(instances []*Instance) string

FmtInstances formats instance ids and states to print.

func MostProgressedExperimentState

func MostProgressedExperimentState(
	state1 experimentv1.State, state2 experimentv1.State,
) experimentv1.State

MostProgressedExperimentState returns the more advanced active state based on experimentStateIndex (Queued -> Pulling -> Starting -> Running).

func ProjectsToProto

func ProjectsToProto(ps []*Project) []*projectv1.Project

ProjectsToProto converts a slice of projects to its protobuf representation.

func StateToProto

func StateToProto(state State) experimentv1.State

StateToProto maps State to experimentv1.State.

func StatesToStrings

func StatesToStrings(inStates map[State]bool) []string

StatesToStrings converts a State map to a list of strings for db queries.

func TaskLogLevelFromLogrus

func TaskLogLevelFromLogrus(l logrus.Level) string

TaskLogLevelFromLogrus returns an equivalent task log level from a logrus level.

func TaskLogLevelFromProto

func TaskLogLevelFromProto(l logv1.LogLevel) string

TaskLogLevelFromProto returns a task log level from its protobuf repr.

func TaskLogLevelToProto

func TaskLogLevelToProto(l string) logv1.LogLevel

TaskLogLevelToProto returns a protobuf task log level from its string repr.

func TrialMetricsJSONPath

func TrialMetricsJSONPath(isValidation bool) string

TrialMetricsJSONPath returns the legacy JSON path to the metrics field in the metrics table.

func TrialSummaryMetricsJSONPath

func TrialSummaryMetricsJSONPath(metricGroup MetricGroup) string

TrialSummaryMetricsJSONPath returns the JSON path to the trials metric summary.

func UsingCustomImage

func UsingCustomImage(req *apiv1.LaunchTensorboardRequest) bool

UsingCustomImage checks for image argument in request. It's only used for tensor board now. Error is ignored because we treat unexpected error when parsing as not using custom image.

func ValidatePrioritySetting

func ValidatePrioritySetting(priority *int) []error

ValidatePrioritySetting checks that priority if set is within a valid range.

Types

type AcceleratorData

type AcceleratorData struct {
	bun.BaseModel `bun:"table:allocation_accelerators"`

	ContainerID      string       `db:"container_id" bun:"container_id"`
	AllocationID     AllocationID `db:"allocation_id" bun:"allocation_id,notnull"`
	NodeName         string       `db:"node_name" bun:"node_name,notnull"`
	AcceleratorType  string       `db:"accelerator_type" bun:"accelerator_type,notnull"`
	AcceleratorUuids []string     `db:"accelerator_uuids" bun:"accelerator_uuids,array"`
	ID               *int         `db:"id" bun:"id,pk,autoincrement"`
}

AcceleratorData is the model for an allocation accelerator data in the database.

func (AcceleratorData) Proto

Proto returns the proto representation of the task state.

type AccessScopeID

type AccessScopeID int

AccessScopeID is an identifier for an access scope.

type AccessScopeSet

type AccessScopeSet = map[AccessScopeID]bool

AccessScopeSet is a set of access scopes.

type ActivityType

type ActivityType string

ActivityType describes a user activity.

const (
	// ActivityTypeGet represents a get request.
	ActivityTypeGet ActivityType = "GET"
)

type AgentStats

type AgentStats struct {
	ResourcePool string `db:"resource_pool"`
	AgentID      string `db:"agent_id"`
	Slots        int    `db:"slots"`
}

AgentStats stores the start/end status of instance.

type AgentSummary

type AgentSummary struct {
	ID             string       `json:"id"`
	RegisteredTime time.Time    `json:"registered_time"`
	Slots          SlotsSummary `json:"slots"`
	NumContainers  int          `json:"num_containers"`
	ResourcePool   []string     `json:"resource_pool"`
	Addresses      []string     `json:"addresses"`
	Enabled        bool         `json:"enabled"`
	Draining       bool         `json:"draining"`
	Version        string       `json:"version"`
}

AgentSummary summarizes the state on an agent.

func (AgentSummary) ToProto

func (a AgentSummary) ToProto() *agentv1.Agent

ToProto converts an agent summary to a proto struct.

type AgentUserGroup

type AgentUserGroup struct {
	bun.BaseModel `bun:"table:agent_user_groups"`

	ID int `db:"id" bun:"id,pk,autoincrement" json:"id"`

	UserID UserID `db:"user_id" json:"user_id"`

	// The User is the username on an agent host machine. This may be different
	// from the username of the user in the User database.
	User string `db:"user_" bun:"user_" json:"user"`
	UID  int    `db:"uid" json:"uid"`

	// The Group is the primary group of the user.
	Group string `db:"group_" bun:"group_" json:"group"`
	GID   int    `db:"gid" json:"gid"`
}

An AgentUserGroup represents a username and primary group for a user on an agent host machine. There is at most one AgentUserGroup for each User.

func AgentUserGroupFromProto

func AgentUserGroupFromProto(aug *userv1.AgentUserGroup) (*AgentUserGroup, error)

AgentUserGroupFromProto convert agent user group from proto to model.

func (*AgentUserGroup) OwnArchive

func (c *AgentUserGroup) OwnArchive(oldArchive archive.Archive) archive.Archive

OwnArchive will return an archive.Archive modified to be owned by the AgentUserGroup, or unmodified if c is nil.

func (*AgentUserGroup) OwnedArchiveItem

func (c *AgentUserGroup) OwnedArchiveItem(
	path string, content []byte, mode int, fileType byte,
) archive.Item

OwnedArchiveItem will create an archive.Item owned by the AgentUserGroup, or by root if c is nil.

func (AgentUserGroup) Validate

func (c AgentUserGroup) Validate() []error

Validate validates the fields of the AgentUserGroup.

type AgentsSummary

type AgentsSummary map[string]AgentSummary

AgentsSummary is a map of agent IDs to a summary of the agent.

type Allocation

type Allocation struct {
	bun.BaseModel `bun:"table:allocations"`

	AllocationID AllocationID     `db:"allocation_id" bun:"allocation_id,pk"`
	TaskID       TaskID           `db:"task_id" bun:"task_id,notnull"`
	Slots        int              `db:"slots" bun:"slots,notnull"`
	ResourcePool string           `db:"resource_pool" bun:"resource_pool,notnull"`
	StartTime    *time.Time       `db:"start_time" bun:"start_time"`
	EndTime      *time.Time       `db:"end_time" bun:"end_time"`
	State        *AllocationState `db:"state" bun:"state"`
	IsReady      *bool            `db:"is_ready" bun:"is_ready"`
	Ports        map[string]int   `db:"ports" bun:"ports,notnull"`
	// ProxyAddress stores the explicitly provided task-provided proxy address for resource
	// managers that do not supply us with it. Comes from `determined.exec.prep_container --proxy`.
	ProxyAddress *string `db:"proxy_address" bun:"proxy_address"`
	ExitReason   *string `db:"exit_reason" bun:"exit_reason"`
	ExitErr      *string `db:"exit_error" bun:"exit_error"`
	StatusCode   *int32  `db:"status_code" bun:"status_code"`
}

Allocation is the model for an allocation in the database.

func (Allocation) Proto

func (a Allocation) Proto() *taskv1.Allocation

Proto returns the proto representation of the allocation state.

type AllocationID

type AllocationID string

AllocationID is the ID of an allocation of a task. It is usually of the form TaskID.allocation_number, maybe with some other metadata if different types of allocations run.

func NewAllocationID

func NewAllocationID(in *string) *AllocationID

NewAllocationID casts string ptr to AllocationID ptr.

func (AllocationID) String

func (a AllocationID) String() string

func (AllocationID) ToTaskID

func (a AllocationID) ToTaskID() TaskID

ToTaskID converts an AllocationID to its taskID.

type AllocationSession

type AllocationSession struct {
	bun.BaseModel `bun:"table:allocation_sessions"`
	ID            SessionID    `db:"id" json:"id"`
	AllocationID  AllocationID `db:"allocation_id" json:"allocation_id"`
	OwnerID       *UserID      `db:"owner_id" json:"owner_id"`
}

AllocationSession corresponds to a row in the "allocation_sessions" DB table.

type AllocationState

type AllocationState string

AllocationState represents the current state of the task. Value indicates a partial ordering.

const (
	// AllocationStatePending state denotes that the command is awaiting allocation.
	AllocationStatePending AllocationState = "PENDING"
	// AllocationStateWaiting state denotes that the command is waiting on data.
	AllocationStateWaiting AllocationState = "WAITING"
	// AllocationStateAssigned state denotes that the command has been assigned to an agent but has
	// not started yet.
	AllocationStateAssigned AllocationState = "ASSIGNED"
	// AllocationStatePulling state denotes that the command's base image is being pulled from the
	// Docker registry.
	AllocationStatePulling AllocationState = "PULLING"
	// AllocationStateStarting state denotes that the image has been pulled and the task is being
	// started, but the task is not ready yet.
	AllocationStateStarting AllocationState = "STARTING"
	// AllocationStateRunning state denotes that the service in the command is running.
	AllocationStateRunning AllocationState = "RUNNING"
	// AllocationStateTerminated state denotes that the command has exited or has been aborted.
	AllocationStateTerminated AllocationState = "TERMINATED"
	// AllocationStateTerminating state denotes that the command is terminating.
	AllocationStateTerminating AllocationState = "TERMINATING"
)

func MostProgressedAllocationState

func MostProgressedAllocationState(states ...AllocationState) AllocationState

MostProgressedAllocationState returns the further progressed state. E.G. a call with PENDING, PULLING and STARTING returns PULLING.

func (AllocationState) Proto

func (s AllocationState) Proto() taskv1.State

Proto returns the proto representation of the task state.

type AuthTokenKeypair

type AuthTokenKeypair struct {
	bun.BaseModel `bun:"table:auth_token_keypair"`
	PublicKey     ed25519.PublicKey  `db:"public_key"`
	PrivateKey    ed25519.PrivateKey `db:"private_key"`
}

AuthTokenKeypair stores the public/private keypair used for asymmetric encryption of authentication tokens.

type BindMount

type BindMount struct {
	HostPath      string `json:"host_path"`
	ContainerPath string `json:"container_path"`
	ReadOnly      bool   `json:"read_only"`
	Propagation   string `json:"propagation"`
}

BindMount configures trial runner filesystem bind mounts.

func ToModelBindMount

func ToModelBindMount(b expconf.BindMount) BindMount

ToModelBindMount converts new expconf bind mounts into old modl bind mounts.

func (BindMount) ToExpconf

func (b BindMount) ToExpconf() expconf.BindMount

ToExpconf translates old model objects into an expconf object.

func (*BindMount) UnmarshalJSON

func (b *BindMount) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

func (BindMount) Validate

func (b BindMount) Validate() []error

Validate implements the check.Validatable interface.

type BindMountsConfig

type BindMountsConfig []BindMount

BindMountsConfig is the configuration for bind mounts.

func (BindMountsConfig) ToExpconf

ToExpconf translates old model objects into an expconf object.

func (*BindMountsConfig) UnmarshalJSON

func (b *BindMountsConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type Checkpoint

type Checkpoint struct {
	bun.BaseModel `bun:"table:checkpoints_view"`
	ID            int `db:"id"`

	UUID         *uuid.UUID    `db:"uuid"`
	TaskID       *TaskID       `db:"task_id"`
	AllocationID *AllocationID `db:"allocation_id"`
	ReportTime   time.Time     `db:"report_time"`
	State        State         `db:"state"`
	Resources    JSONObj       `db:"resources"`
	Metadata     JSONObj       `db:"metadata"`
	Size         int64         `db:"size"`

	CheckpointTrainingMetadata
}

Checkpoint represents a row from the `checkpoints_view` view.

type CheckpointTrainingMetadata

type CheckpointTrainingMetadata struct {
	TrialID           int      `db:"trial_id"`
	ExperimentID      int      `db:"experiment_id"`
	ExperimentConfig  JSONObj  `db:"experiment_config"`
	HParams           JSONObj  `db:"hparams" bun:"hparams"`
	TrainingMetrics   JSONObj  `db:"training_metrics"`
	ValidationMetrics JSONObj  `db:"validation_metrics"`
	SearcherMetric    *float64 `db:"searcher_metric"`
	StepsCompleted    int      `db:"steps_completed"`
}

CheckpointTrainingMetadata is a substruct of checkpoints encapsulating training specific information.

type CheckpointV2

type CheckpointV2 struct {
	bun.BaseModel `bun:"table:checkpoints_v2"`
	ID            int                    `db:"id" bun:"id,pk,autoincrement"`
	UUID          uuid.UUID              `db:"uuid"`
	TaskID        TaskID                 `db:"task_id"`
	AllocationID  *AllocationID          `db:"allocation_id"`
	ReportTime    time.Time              `db:"report_time"`
	State         State                  `db:"state"`
	Resources     map[string]int64       `db:"resources"`
	Metadata      map[string]interface{} `db:"metadata"`
	Size          int64                  `db:"size"`
}

CheckpointV2 represents a row from the `checkpoints_v2` table.

type CommandConfig

type CommandConfig struct {
	Description      string              `json:"description"`
	BindMounts       BindMountsConfig    `json:"bind_mounts"`
	Environment      Environment         `json:"environment"`
	Resources        ResourcesConfig     `json:"resources"`
	Entrypoint       []string            `json:"entrypoint"`
	TensorBoardArgs  []string            `json:"tensorboard_args,omitempty"`
	IdleTimeout      *Duration           `json:"idle_timeout"`
	NotebookIdleType string              `json:"notebook_idle_type"`
	WorkDir          *string             `json:"work_dir"`
	Debug            bool                `json:"debug"`
	Pbs              expconf.PbsConfig   `json:"pbs,omitempty"`
	Slurm            expconf.SlurmConfig `json:"slurm,omitempty"`
}

CommandConfig holds the necessary configurations to launch a command task in the cluster.

func DefaultConfig

func DefaultConfig(taskContainerDefaults *TaskContainerDefaultsConfig) CommandConfig

DefaultConfig is the default configuration used by all commands (e.g., commands, notebooks, shells) if a request does not specify any configuration options.

func (*CommandConfig) Validate

func (c *CommandConfig) Validate() []error

Validate implements the check.Validatable interface.

type ConfigFile

type ConfigFile struct {
	ID      int    `db:"id" json:"id"`
	Content []byte `db:"content"`
}

ConfigFile represents a row from the `config_files` table.

type DefaultLoggingConfig

type DefaultLoggingConfig struct{}

DefaultLoggingConfig configures logging for tasks using HTTP to the master.

type DeviceConfig

type DeviceConfig struct {
	HostPath      string `json:"host_path"`
	ContainerPath string `json:"container_path"`
	Mode          string `json:"mode"`
}

DeviceConfig configures container device access.

func (DeviceConfig) ToExpconf

func (d DeviceConfig) ToExpconf() expconf.Device

ToExpconf translates old model objects into an expconf object.

func (*DeviceConfig) UnmarshalJSON

func (d *DeviceConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type DevicesConfig

type DevicesConfig []DeviceConfig

DevicesConfig is the configuration for devices. It is a named type because it needs custom merging behavior (via UnmarshalJSON).

func (DevicesConfig) ToExpconf

func (d DevicesConfig) ToExpconf() expconf.DevicesConfig

ToExpconf translates old model objects into an expconf object.

func (*DevicesConfig) UnmarshalJSON

func (d *DevicesConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface so that DeviceConfigs are additive.

type Duration

type Duration time.Duration

Duration is a JSON (un)marshallable version of time.Duration.

func (Duration) MarshalJSON

func (d Duration) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*Duration) UnmarshalJSON

func (d *Duration) UnmarshalJSON(b []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type ElasticLoggingConfig

type ElasticLoggingConfig struct {
	Host     string                `json:"host"`
	Port     int                   `json:"port"`
	Security ElasticSecurityConfig `json:"security"`
}

ElasticLoggingConfig configures logging for tasks using Elastic.

func (*ElasticLoggingConfig) Resolve

func (o *ElasticLoggingConfig) Resolve() error

Resolve resolves the configuration.

type ElasticSecurityConfig

type ElasticSecurityConfig struct {
	Username *string         `json:"username"`
	Password *string         `json:"password"`
	TLS      TLSClientConfig `json:"tls"`
}

ElasticSecurityConfig configures security-related options for the elastic logging backend.

func (*ElasticSecurityConfig) Resolve

func (o *ElasticSecurityConfig) Resolve() error

Resolve resolves the configuration.

func (ElasticSecurityConfig) Validate

func (o ElasticSecurityConfig) Validate() []error

Validate implements the check.Validatable interface.

type EntityType

type EntityType string

EntityType represents an entity.

const (
	// EntityTypeProject represents a project.
	EntityTypeProject EntityType = "Project"
)

type Environment

type Environment struct {
	Image                RuntimeItem      `json:"image"`
	EnvironmentVariables RuntimeItems     `json:"environment_variables,omitempty"`
	ProxyPorts           ProxyPortsConfig `json:"proxy_ports"`

	Ports          map[string]int    `json:"ports"`
	RegistryAuth   *types.AuthConfig `json:"registry_auth,omitempty"`
	ForcePullImage bool              `json:"force_pull_image"`
	PodSpec        *k8sV1.Pod        `json:"pod_spec"`

	AddCapabilities  []string `json:"add_capabilities"`
	DropCapabilities []string `json:"drop_capabilities"`
}

Environment configures the environment of a Determined command or experiment.

func DefaultEnvConfig

func DefaultEnvConfig(taskContainerDefaults *TaskContainerDefaultsConfig) Environment

DefaultEnvConfig returns the default environment configuration.

func (Environment) ToExpconf

func (e Environment) ToExpconf() expconf.EnvironmentConfig

ToExpconf translates old model objects into an expconf object.

func (Environment) Validate

func (e Environment) Validate() []error

Validate implements the check.Validatable interface.

type ExitedReason

type ExitedReason string

ExitedReason defines why a workload exited early.

const (
	// Errored signals the searcher that the workload errored out.
	Errored ExitedReason = "ERRORED"
	// UserRequestedStop signals the searcher that the user requested a cancelation, from code.
	UserRequestedStop ExitedReason = "USER_REQUESTED_STOP"
	// UserCanceled signals the searcher that the user requested a cancelation, from the CLI or UI.
	UserCanceled ExitedReason = "USER_CANCELED"
	// InvalidHP signals the searcher that the user raised an InvalidHP exception.
	InvalidHP ExitedReason = "INVALID_HP"
	// InitInvalidHP signals the searcher that the user raised an InvalidHP exception
	// in the trial init.
	InitInvalidHP ExitedReason = "INIT_INVALID_HP"
)

func ExitedReasonFromProto

func ExitedReasonFromProto(r trialv1.TrialEarlyExit_ExitedReason) ExitedReason

ExitedReasonFromProto returns an ExitedReason from its protobuf representation.

func (ExitedReason) ToSearcherProto

ToSearcherProto converts an ExitedReason to its protobuf representation for searcher purposes.

type Experiment

type Experiment struct {
	ID    int    `db:"id" bun:"id,pk"`
	JobID JobID  `db:"job_id"`
	State State  `db:"state"`
	Notes string `db:"notes"`

	// Offer a LegacyConfig rather than ExperimentConfig since most of the system is about querying
	// experiments which ran some time in the past, which is exactly what LegacyConfig is for.
	Config         expconf.LegacyConfig `db:"config"`
	OriginalConfig string               `db:"original_config"`

	// The model definition is stored as a .tar.gz file (raw bytes).
	ModelDefinitionBytes []byte     `db:"model_definition" bun:"model_definition"`
	StartTime            time.Time  `db:"start_time"`
	EndTime              *time.Time `db:"end_time"`
	ParentID             *int       `db:"parent_id"`
	Archived             bool       `db:"archived"`
	GitRemote            *string    `db:"git_remote"`
	GitCommit            *string    `db:"git_commit"`
	GitCommitter         *string    `db:"git_committer"`
	GitCommitDate        *time.Time `db:"git_commit_date"`
	OwnerID              *UserID    `db:"owner_id"`
	Username             string     `db:"username"`
	ProjectID            int        `db:"project_id"`
	Unmanaged            bool       `db:"unmanaged"`
	ExternalExperimentID *string    `db:"external_experiment_id"`
	Progress             *float64
}

Experiment represents a row from the `experiments` table.

func ExperimentFromProto

func ExperimentFromProto(e *experimentv1.Experiment) (*Experiment, error)

ExperimentFromProto converts a experimentv1.Experiment to a model.Experiment.

func NewExperiment

func NewExperiment(
	config expconf.ExperimentConfig,
	originalConfig string,
	modelDefinitionBytes []byte,
	parentID *int,
	archived bool,
	gitRemote, gitCommit, gitCommitter *string,
	gitCommitDate *time.Time,
	projectID int,
	unmanaged bool,
) (*Experiment, error)

NewExperiment creates a new experiment struct in the paused state. Note that the experiment ID will not be set.

func (*Experiment) Transition

func (e *Experiment) Transition(state State) (bool, error)

Transition changes the state of the experiment to the new state. If the state was not modified the first return value returns false. If the state transition is illegal, an error is returned.

type ExtendedFloat64

type ExtendedFloat64 float64

ExtendedFloat64 handles serializing floats to JSON, including special cases for infinite values.

func (ExtendedFloat64) MarshalJSON

func (f ExtendedFloat64) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*ExtendedFloat64) UnmarshalJSON

func (f *ExtendedFloat64) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type ExternalSessions

type ExternalSessions struct {
	LoginURI  string `json:"login_uri"`
	LogoutURI string `json:"logout_uri"`
	JwtKey    string `json:"jwt_key"`
}

ExternalSessions provides an integration point for an external service to issue JWTs to control access to the cluster.

func (ExternalSessions) Enabled

func (e ExternalSessions) Enabled() bool

Enabled returns whether or not external sessions are enabled.

type FullUser

type FullUser struct {
	ID          UserID      `db:"id" json:"id"`
	DisplayName null.String `db:"display_name" json:"display_name"`
	Username    string      `db:"username" json:"username"`
	Name        string      `db:"name" json:"name"`
	Admin       bool        `db:"admin" json:"admin"`
	Active      bool        `db:"active" json:"active"`
	ModifiedAt  time.Time   `db:"modified_at" json:"modified_at"`
	Remote      bool        `db:"remote" json:"remote"`
	LastAuthAt  *time.Time  `db:"last_auth_at" json:"last_auth_at"`

	AgentUID   null.Int    `db:"agent_uid" json:"agent_uid"`
	AgentGID   null.Int    `db:"agent_gid" json:"agent_gid"`
	AgentUser  null.String `db:"agent_user" json:"agent_user"`
	AgentGroup null.String `db:"agent_group" json:"agent_group"`
}

A FullUser is a User joined with any other user relations.

func (FullUser) ToUser

func (u FullUser) ToUser() User

ToUser converts a FullUser model to just a User model.

type Group

type Group struct {
	bun.BaseModel `bun:"table:groups,alias:groups"`

	ID      int    `bun:"id,pk,autoincrement" json:"id"`
	Name    string `bun:"group_name,notnull"  json:"name"`
	OwnerID UserID `bun:"user_id,nullzero"    json:"userId,omitempty"`
}

Group represents a user group as it's stored in the database.

func (*Group) Proto

func (g *Group) Proto() *groupv1.Group

Proto converts a group to its protobuf representation.

type GroupMembership

type GroupMembership struct {
	bun.BaseModel `bun:"table:user_group_membership"`

	UserID  UserID `bun:"user_id,notnull"`
	GroupID int    `bun:"group_id,notnull"`
}

GroupMembership represents a user's membership to a group as it's stored in the database.

type Groups

type Groups []Group

Groups is a slice of Group objects—primarily useful for its methods.

func (Groups) Proto

func (gs Groups) Proto() []*groupv1.Group

Proto converts Groups into its protobuf representation.

type Instance

type Instance struct {
	ID                  string
	LaunchTime          time.Time
	LastStateChangeTime time.Time
	AgentName           string
	State               InstanceState
}

Instance connects a provider's name for a compute resource to the Determined agent name.

func (Instance) Equals

func (inst Instance) Equals(other Instance) bool

Equals checks if this instance is the same resource as instance `other`.

func (Instance) String

func (inst Instance) String() string

type InstanceState

type InstanceState string

InstanceState is an enum type that describes an instance state.

const (
	// Unknown describes the instance state cannot be recognized.
	Unknown InstanceState = "Unknown"
	// Starting describes the instance is starting up.
	Starting InstanceState = "Starting"
	// Running describes the instance is running.
	Running InstanceState = "Running"
	// Stopping describes the instance is stopping.
	Stopping InstanceState = "Stopping"
	// Stopped describes the instance is stopped.
	Stopped InstanceState = "Stopped"
	// Terminating is when the instance is in the process of being terminated.
	Terminating InstanceState = "Terminating"
	// SpotRequestPendingAWS indicates that the instance is actually a pending AWS spot request.
	SpotRequestPendingAWS InstanceState = "SpotRequestPendingAWS"
)

type InstanceStats

type InstanceStats struct {
	ResourcePool string `db:"resource_pool"`
	InstanceID   string `db:"instance_id"`
	Slots        int    `db:"slots"`
}

InstanceStats stores the start/end status of instance.

type InstanceType

type InstanceType interface {
	Name() string
	Slots() int
}

InstanceType describes an instance type.

type JSONObj

type JSONObj map[string]interface{}

JSONObj is a JSON object that converts to a []byte in SQL queries.

func (*JSONObj) Scan

func (j *JSONObj) Scan(src interface{}) error

Scan unmarshals JSON in []byte to map[string]interface{}.

func (JSONObj) Value

func (j JSONObj) Value() (driver.Value, error)

Value marshals a []byte.

type Job

type Job struct {
	bun.BaseModel `bun:"table:jobs"`

	JobID   JobID           `db:"job_id" bun:"job_id,pk"`
	JobType JobType         `db:"job_type" bun:"job_type"`
	OwnerID *UserID         `db:"owner_id" bun:"owner_id"`
	QPos    decimal.Decimal `db:"q_position" bun:"q_position"`
}

Job is the model for a job in the database.

type JobID

type JobID string

JobID is the unique ID of a job among all jobs.

func NewJobID

func NewJobID() JobID

NewJobID returns a random, globally unique job ID.

func (JobID) String

func (id JobID) String() string

String represents the job ID as a string.

type JobType

type JobType string

JobType is the type of a job.

const (
	// JobTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres.
	JobTypeNotebook JobType = "NOTEBOOK"
	// JobTypeShell is the "SHELL" job type for the enum public.job_type in Postgres.
	JobTypeShell JobType = "SHELL"
	// JobTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres.
	JobTypeCommand JobType = "COMMAND"
	// JobTypeTensorboard is the "TENSORBOARD" job type for the enum.job_type in Postgres.
	JobTypeTensorboard JobType = "TENSORBOARD"
	// JobTypeExperiment is the "EXPERIMENT" job type for the enum.job_type in Postgres.
	JobTypeExperiment JobType = "EXPERIMENT"
	// JobTypeCheckpointGC is the "CheckpointGC" job type for enum.job_type in Postgres.
	JobTypeCheckpointGC JobType = "CHECKPOINT_GC"
)

func JobTypeFromProto

func JobTypeFromProto(t jobv1.Type) JobType

JobTypeFromProto maps a jobv1.Type to JobType.

func (JobType) Proto

func (jt JobType) Proto() jobv1.Type

Proto returns the proto representation of the job type.

type KubernetesTaskContainerDefaults

type KubernetesTaskContainerDefaults struct {
	MaxSlotsPerPod *int `json:"max_slots_per_pod"`
}

KubernetesTaskContainerDefaults is task container defaults specific to Kubernetes.

type LoggingConfig

type LoggingConfig struct {
	DefaultLoggingConfig *DefaultLoggingConfig `union:"type,default" json:"-"`
	ElasticLoggingConfig *ElasticLoggingConfig `union:"type,elastic" json:"-"`
}

LoggingConfig configures logging for tasks (currently only trials) in Determined.

func (LoggingConfig) MarshalJSON

func (c LoggingConfig) MarshalJSON() ([]byte, error)

MarshalJSON serializes LoggingConfig.

func (LoggingConfig) Resolve

func (c LoggingConfig) Resolve() error

Resolve resolves the parts of the TaskContainerDefaultsConfig that must be evaluated on the master machine.

func (*LoggingConfig) UnmarshalJSON

func (c *LoggingConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON deserializes LoggingConfig.

type MetricGroup

type MetricGroup string

MetricGroup denotes what custom group the metric is.

const (
	// ValidationMetricGroup designates metrics from validation runs.
	ValidationMetricGroup MetricGroup = "validation"
	// TrainingMetricGroup designates metrics from training runs.
	TrainingMetricGroup MetricGroup = "training"
	// InferenceMetricGroup designates metrics from inference runs.
	InferenceMetricGroup MetricGroup = "inference"
)

func TrialSummaryMetricGroup

func TrialSummaryMetricGroup(jsonPath string) MetricGroup

TrialSummaryMetricGroup returns the metric group for the given summary JSON path.

func (MetricGroup) ToProto

func (t MetricGroup) ToProto() apiv1.MetricType

ToProto returns the proto representation of the metric group.

func (MetricGroup) ToString

func (t MetricGroup) ToString() string

ToString returns the string representation of the metric group.

func (MetricGroup) Validate

func (t MetricGroup) Validate() error

Validate validates the metric group.

type MetricIdentifier

type MetricIdentifier struct {
	Group MetricGroup
	Name  metricName
}

MetricIdentifier packages metric group and name together.

func DeserializeMetricIdentifier

func DeserializeMetricIdentifier(s string) (*MetricIdentifier, error)

DeserializeMetricIdentifier deserialize a metric identifier from a string.

func (MetricIdentifier) ToProto

ToProto returns the proto representation of the metric identifier.

type Model

type Model struct {
	ID              int       `db:"id" json:"id"`
	Name            string    `db:"name" json:"name"`
	Description     string    `db:"description" json:"description"`
	Metadata        JSONObj   `db:"metadata" json:"metadata"`
	CreationTime    time.Time `db:"creation_time" json:"creation_time"`
	LastUpdatedTime time.Time `db:"last_updated_time" json:"last_updated_time"`
	Labels          []string  `db:"labels" json:"labels"`
	Username        string    `db:"username" json:"username"`
	Archived        bool      `db:"archived" json:"archived"`
	NumVersions     int       `db:"num_versions" json:"num_versions"`
	WorkspaceID     int       `db:"workspace_id" json:"workspace_id"`
}

Model represents a row from the `models` table.

type ModelVersion

type ModelVersion struct {
	ID              int       `db:"id" json:"id"`
	Version         int       `db:"version" json:"version"`
	CheckpointID    int       `db:"checkpoint_id" json:"checkpoint_id"`
	CreationTime    time.Time `db:"creation_time" json:"creation_time"`
	ModelID         int       `db:"model_id" json:"model_id"`
	Metadata        JSONObj   `db:"metadata" json:"metadata"`
	Name            string    `db:"name" json:"name"`
	LastUpdatedTime time.Time `db:"last_updated_time" json:"last_updated_time"`
	Comment         string    `db:"comment" json:"comment"`
	Notes           string    `db:"readme" json:"notes"`
	Username        string    `db:"username" json:"username"`
}

ModelVersion represents a row from the `model_versions` table.

type Project

type Project struct {
	bun.BaseModel           `bun:"table:projects"`
	ID                      int               `bun:"id,pk,autoincrement"`
	Name                    string            `bun:"name"`
	CreatedAt               time.Time         `bun:"created_at,scanonly"`
	Archived                bool              `bun:"archived"`
	WorkspaceID             int               `bun:"workspace_id"`
	WorkspaceName           string            `bun:"workspace_name"`
	UserID                  int               `bun:"user_id"`
	Username                string            `bun:"username"`
	Immutable               bool              `bun:"immutable"`
	Description             string            `bun:"description"`
	Notes                   []*projectv1.Note `bun:"notes,type:jsonb"`
	NumActiveExperiments    int32             `bun:"num_active_experiments"`
	NumExperiments          int32             `bun:"num_experiments"`
	State                   WorkspaceState    `bun:"state"`
	ErrorMessage            string            `bun:"error_message"`
	LastExperimentStartedAt time.Time         `bun:"last_experiment_started_at"`
}

Project is the bun model of a project.

func (Project) Proto

func (p Project) Proto() *projectv1.Project

Proto converts a bun model of a project to a proto object.

type Projects

type Projects []*Project

Projects is an array of project instances.

type ProxyPort

type ProxyPort struct {
	ProxyPort        int  `json:"proxy_port"`
	ProxyTCP         bool `json:"proxy_tcp"`
	Unauthenticated  bool `json:"unauthenticated"`
	DefaultServiceID bool `json:"default_service_id"`
}

ProxyPort is a legacy-style clone of expconf.ProxyPort. TODO(ilia): migrate command config to expconf.

func (ProxyPort) ToExpconf

func (p ProxyPort) ToExpconf() expconf.ProxyPort

ToExpconf translates old model objects into an expconf object.

type ProxyPortsConfig

type ProxyPortsConfig []ProxyPort

ProxyPortsConfig is a legacy-style clone of expconf.ProxyPortsConfig.

func (ProxyPortsConfig) ToExpconf

ToExpconf translates old model objects into an expconf object.

type RequestID

type RequestID uuid.UUID

RequestID links all operations with the same ID to a single trial create request.

func MustParseRequestID

func MustParseRequestID(s string) RequestID

MustParseRequestID decodes s into a request id or panics.

func NewRequestID

func NewRequestID(r io.Reader) RequestID

NewRequestID returns a new request ID using the provided reader.

func ParseRequestID

func ParseRequestID(s string) (RequestID, error)

ParseRequestID decodes s into a request id or returns an error.

func (RequestID) Before

func (r RequestID) Before(s RequestID) bool

Before determines whether this UUID is strictly lexicographically less (comparing the sequences of bytes) than another one.

func (RequestID) MarshalText

func (r RequestID) MarshalText() ([]byte, error)

MarshalText returns the marshaled form of this ID, which is the string form of the underlying UUID.

func (*RequestID) Scan

func (r *RequestID) Scan(value interface{}) error

Scan implements the sql.Scanner interface.

func (RequestID) String

func (r RequestID) String() string

func (*RequestID) UnmarshalText

func (r *RequestID) UnmarshalText(data []byte) error

UnmarshalText unmarshals this ID from a text representation.

func (RequestID) Value

func (r RequestID) Value() (driver.Value, error)

Value implements the sql.Driver interface.

type ResourceAggregates

type ResourceAggregates struct {
	Date            *time.Time
	AggregationType string
	AggregationKey  string
	Seconds         float32
}

ResourceAggregates is the model for resource_aggregates in the database.

type ResourcesConfig

type ResourcesConfig struct {
	Slots int `json:"slots"`

	MaxSlots       *int         `json:"max_slots,omitempty"`
	Weight         float64      `json:"weight"`
	NativeParallel bool         `json:"native_parallel,omitempty"`
	ShmSize        *StorageSize `json:"shm_size,omitempty"`
	ResourcePool   string       `json:"resource_pool"`
	Priority       *int         `json:"priority,omitempty"`

	Devices DevicesConfig `json:"devices"`

	// Deprecated: Use ResourcePool instead.
	AgentLabel string `json:"agent_label,omitempty"`
}

ResourcesConfig configures resource usage for an experiment, command, notebook, or tensorboard.

func DefaultResourcesConfig

func DefaultResourcesConfig(taskContainerDefaults *TaskContainerDefaultsConfig) ResourcesConfig

DefaultResourcesConfig returns the default resources configuration.

func ParseJustResources

func ParseJustResources(configBytes []byte) ResourcesConfig

ParseJustResources is a helper function for breaking the circular dependency where we need the TaskContainerDefaults to unmarshal an ExperimentConfig, but we need the Resources.ResourcePool setting to know which TaskContainerDefaults to use. It does not throw errors; if unmarshalling fails that can just get caught later.

func (ResourcesConfig) ToExpconf

func (r ResourcesConfig) ToExpconf() expconf.ResourcesConfig

ToExpconf translates old model objects into an expconf object.

func (ResourcesConfig) Validate

func (r ResourcesConfig) Validate() []error

Validate implements the check.Validatable interface.

type RuntimeItem

type RuntimeItem struct {
	CPU  string `json:"cpu,omitempty"`
	CUDA string `json:"cuda,omitempty"`
	ROCM string `json:"rocm,omitempty"`
}

RuntimeItem configures the runtime image.

func (RuntimeItem) For

func (r RuntimeItem) For(deviceType device.Type) string

For returns the value for the provided device type.

func (RuntimeItem) ToExpconf

func (r RuntimeItem) ToExpconf() expconf.EnvironmentImageMap

ToExpconf translates old model objects into an expconf object.

func (*RuntimeItem) UnmarshalJSON

func (r *RuntimeItem) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type RuntimeItems

type RuntimeItems struct {
	CPU  []string `json:"cpu,omitempty"`
	CUDA []string `json:"cuda,omitempty"`
	ROCM []string `json:"rocm,omitempty"`
}

RuntimeItems configures the runtime environment variables.

func (*RuntimeItems) For

func (r *RuntimeItems) For(deviceType device.Type) []string

For returns the value for the provided device type.

func (RuntimeItems) ToExpconf

ToExpconf translates old model objects into an expconf object.

func (*RuntimeItems) UnmarshalJSON

func (r *RuntimeItems) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type SessionID

type SessionID int

SessionID is the type for user session IDs.

type SlotSummary

type SlotSummary struct {
	ID        string            `json:"id"`
	Device    device.Device     `json:"device"`
	Enabled   bool              `json:"enabled"`
	Container *cproto.Container `json:"container"`
	Draining  bool              `json:"draining"`
}

SlotSummary summarizes the state of a slot.

func (SlotSummary) ToProto

func (s SlotSummary) ToProto() *agentv1.Slot

ToProto converts a SlotSummary to its protobuf representation.

type SlotsSummary

type SlotsSummary map[string]SlotSummary

SlotsSummary contains a summary for a number of slots.

type Snapshotter

type Snapshotter interface {
	Snapshot() (json.RawMessage, error)
	Restore(json.RawMessage) error
}

Snapshotter is any object that implements how to save an restore its state.

type State

type State string

State is the run state of an experiment / trial / step / etc.

func StateFromProto

func StateFromProto(state experimentv1.State) State

StateFromProto maps experimentv1.State to State.

type StateWithReason

type StateWithReason struct {
	State               State
	InformationalReason string
}

StateWithReason is the run state of an experiment with an informational reason used for logging purposes.

type StorageSize

type StorageSize int64

StorageSize is a named type for custom marshaling behavior for shm_size.

func (*StorageSize) UnmarshalJSON

func (d *StorageSize) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type TLSClientConfig

type TLSClientConfig struct {
	Enabled         bool   `json:"enabled"`
	SkipVerify      bool   `json:"skip_verify"`
	CertificatePath string `json:"certificate"`
	CertificateName string `json:"certificate_name"`
	CertBytes       []byte
}

TLSClientConfig configures how to make a TLS connection.

func MakeTLSConfig

func MakeTLSConfig(cert *tls.Certificate) (TLSClientConfig, error)

MakeTLSConfig constructs a TLSClientConfig to use the provided tls.Certificate.

func (*TLSClientConfig) Resolve

func (t *TLSClientConfig) Resolve() error

Resolve resolves the configuration.

func (TLSClientConfig) Validate

func (t TLSClientConfig) Validate() []error

Validate implements the check.Validatable interface.

type Task

type Task struct {
	bun.BaseModel `bun:"table:tasks"`

	TaskID    TaskID     `db:"task_id" bun:"task_id,pk"`
	JobID     *JobID     `db:"job_id"`
	TaskType  TaskType   `db:"task_type"`
	StartTime time.Time  `db:"start_time"`
	EndTime   *time.Time `db:"end_time"`
	// LogVersion indicates how the logs were stored.
	LogVersion TaskLogVersion `db:"log_version"`

	// Relations.
	Job *Job `bun:"rel:belongs-to,join:job_id=job_id"`
}

Task is the model for a task in the database.

type TaskContainerDefaultsConfig

type TaskContainerDefaultsConfig struct {
	DtrainNetworkInterface string                `json:"dtrain_network_interface,omitempty"`
	NCCLPortRange          string                `json:"nccl_port_range,omitempty"`
	GLOOPortRange          string                `json:"gloo_port_range,omitempty"`
	ShmSizeBytes           int64                 `json:"shm_size_bytes,omitempty"`
	NetworkMode            container.NetworkMode `json:"network_mode,omitempty"`
	// TODO(DET-9855) we should move these over to KubernetesTaskContainerDefaults.
	CPUPodSpec           *k8sV1.Pod        `json:"cpu_pod_spec"`
	GPUPodSpec           *k8sV1.Pod        `json:"gpu_pod_spec"`
	Image                *RuntimeItem      `json:"image,omitempty"`
	RegistryAuth         *types.AuthConfig `json:"registry_auth,omitempty"`
	ForcePullImage       bool              `json:"force_pull_image,omitempty"`
	EnvironmentVariables *RuntimeItems     `json:"environment_variables,omitempty"`

	AddCapabilities  []string      `json:"add_capabilities"`
	DropCapabilities []string      `json:"drop_capabilities"`
	Devices          DevicesConfig `json:"devices"`

	BindMounts BindMountsConfig      `json:"bind_mounts"`
	WorkDir    *string               `json:"work_dir"`
	Slurm      expconf.SlurmConfigV0 `json:"slurm"`
	Pbs        expconf.PbsConfigV0   `json:"pbs"`

	LogPolicies expconf.LogPoliciesConfig

	// TODO(DET-9856) we should probably eventually move this to expconf and allow setting
	// on a per task level.
	Kubernetes *KubernetesTaskContainerDefaults `json:"kubernetes"`
}

TaskContainerDefaultsConfig configures docker defaults for all containers. If you add a field to this, you must update the merge impl.

func DefaultTaskContainerDefaults

func DefaultTaskContainerDefaults() *TaskContainerDefaultsConfig

DefaultTaskContainerDefaults returns the default for TaskContainerDefaultsConfig.

func (TaskContainerDefaultsConfig) Merge

Merge merges other into self, preferring other. The result is a deepcopy of self, with deep copies of values taken from other.

func (*TaskContainerDefaultsConfig) MergeIntoExpConfig

func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.ExperimentConfig)

MergeIntoExpConfig sets any unset ExperimentConfig values from TaskContainerDefaults.

func (*TaskContainerDefaultsConfig) UnmarshalJSON

func (c *TaskContainerDefaultsConfig) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface. Setting defaults here is necessary over our usual "Define a default struct and unmarshal into it" strategy because there are places (resource pool configs) where we need to know if the task container defaults were set at all or if they were not; if they were set then that resource pool's task container defaults are used instead of the toplevel master config's settings. To know if the user set them at the resource pool level, the resource pool has to have a nullable pointer, which is not compatible with our usual strategy for defaults.

func (*TaskContainerDefaultsConfig) Validate

func (c *TaskContainerDefaultsConfig) Validate() []error

Validate implements the check.Validatable interface.

type TaskContextDirectory

type TaskContextDirectory struct {
	bun.BaseModel `bun:"table:task_context_directory"`

	TaskID           TaskID `bun:"task_id"`
	ContextDirectory []byte `bun:"context_directory"`
}

TaskContextDirectory represents a row in database for a tasks context directory. This currently is only for notebooks, trials, tensorboards, and commands now. Trials aren't in it because they are stored on experiments.model_def. In addition trials can have many tasks but currently can only have one model_def. We would end up duplicating a lot of data migrating experiment's model_def over to this table. Also that migration would be pretty painful.

type TaskID

type TaskID string

TaskID is the unique ID of a task among all tasks.

func NewTaskID

func NewTaskID() TaskID

NewTaskID returns a random, globally unique task ID.

func (TaskID) String

func (a TaskID) String() string

type TaskLog

type TaskLog struct {
	// A task log should have one of these IDs after being persisted. All should be unique.
	ID *int `db:"id" json:"id,omitempty"`
	// The body of an Elasticsearch log response will look something like
	// { _id: ..., _source: { ... }} where _source is the rest of this struct.
	// StringID doesn't have serialization tags because it is not part of
	// _source and populated from _id.
	StringID     *string `json:"-"`
	TaskID       string  `db:"task_id" json:"task_id"`
	AllocationID *string `db:"allocation_id" json:"allocation_id"`
	AgentID      *string `db:"agent_id" json:"agent_id,omitempty"`
	// In the case of k8s, container_id is a pod name instead.
	ContainerID *string    `db:"container_id" json:"container_id,omitempty"`
	RankID      *int       `db:"rank_id" json:"rank_id,omitempty"`
	Timestamp   *time.Time `db:"timestamp" json:"timestamp"`
	Level       *string    `db:"level" json:"level"`
	Log         string     `db:"log" json:"log"`
	Source      *string    `db:"source" json:"source,omitempty"`
	StdType     *string    `db:"stdtype" json:"stdtype,omitempty"`
}

TaskLog represents a structured log emitted by an allocation.

func TaskLogFromProto

func TaskLogFromProto(in *taskv1.TaskLog) *TaskLog

TaskLogFromProto converts a proto task log to a model task log.

func (*TaskLog) Message

func (t *TaskLog) Message() string

Message resolves the flat version of the log that UIs have shown historically. TODO(task-unif): Should we just.. stop doing this? And send the log as is and let the UIs handle display (yes, IMO).

func (TaskLog) Proto

func (t TaskLog) Proto() (*apiv1.TaskLogsResponse, error)

Proto converts a task log to its protobuf representation.

type TaskLogBatch

type TaskLogBatch []*TaskLog

TaskLogBatch represents a batch of model.TaskLog.

func (TaskLogBatch) ForEach

func (t TaskLogBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TaskLogBatch) Size

func (t TaskLogBatch) Size() int

Size implements logs.Batch.

type TaskLogVersion

type TaskLogVersion int32

TaskLogVersion is the version for our log-storing scheme. Useful because changing designs would involve either a really costly migration or versioning schemes and we pick the latter.

const (
	TaskLogVersion0       TaskLogVersion = 0
	TaskLogVersion1       TaskLogVersion = 1
	CurrentTaskLogVersion                = TaskLogVersion1
)

CurrentTaskLogVersion describes the current scheme in which we store task logs. To avoid a migration that in some cases would be extremely costly, we record the log version so that we can just read old logs the old way and do the new however we please.

type TaskStats

type TaskStats struct {
	AllocationID AllocationID
	EventType    string
	// ContainerID is sent by the agent. This won't always be present in the database
	// This is a weird table since sometimes it is one row per allocation
	// (like in record queued stats) and sometimes it is many per allocation like in
	// pulled time.
	ContainerID *cproto.ID
	StartTime   *time.Time
	EndTime     *time.Time
}

TaskStats is the model for task stats in the database.

type TaskType

type TaskType string

TaskType is the type of a task.

const (
	// TaskTypeTrial is the "TRIAL" job type for the enum public.job_type in Postgres.
	TaskTypeTrial TaskType = "TRIAL"
	// TaskTypeNotebook is the "NOTEBOOK" job type for the enum public.job_type in Postgres.
	TaskTypeNotebook TaskType = "NOTEBOOK"
	// TaskTypeShell is the "SHELL" job type for the enum public.job_type in Postgres.
	TaskTypeShell TaskType = "SHELL"
	// TaskTypeCommand is the "COMMAND" job type for the enum public.job_type in Postgres.
	TaskTypeCommand TaskType = "COMMAND"
	// TaskTypeTensorboard is the "TENSORBOARD" task type for the enum.task_type in Postgres.
	TaskTypeTensorboard TaskType = "TENSORBOARD"
	// TaskTypeCheckpointGC is the "CHECKPOINT_GC" job type for the enum public.job_type in Postgres.
	TaskTypeCheckpointGC TaskType = "CHECKPOINT_GC"
)

type Template

type Template struct {
	Name        string `db:"name" json:"name"`
	Config      []byte `db:"config" json:"config" bun:"config"`
	WorkspaceID int    `db:"workspace_id" json:"workspace_id"`
}

Template represents a row from the `templates` table.

type Trial

type Trial struct {
	bun.BaseModel `bun:"table:trials"`

	ID                    int            `db:"id" bun:",pk,autoincrement"`
	RequestID             *RequestID     `db:"request_id"`
	ExperimentID          int            `db:"experiment_id"`
	State                 State          `db:"state"`
	StartTime             time.Time      `db:"start_time"`
	EndTime               *time.Time     `db:"end_time"`
	HParams               map[string]any `db:"hparams" bun:"hparams"`
	WarmStartCheckpointID *int           `db:"warm_start_checkpoint_id"`
	Seed                  int64          `db:"seed"`
	TotalBatches          int            `db:"total_batches"`
	ExternalTrialID       *string        `db:"external_trial_id"`
}

Trial represents a row from the `trials` table.

func NewTrial

func NewTrial(
	state State,
	requestID RequestID,
	experimentID int,
	hparams JSONObj,
	warmStartCheckpoint *Checkpoint,
	trialSeed int64,
) *Trial

NewTrial creates a new trial in the specified state. Note that the trial ID will not be set.

type TrialLog

type TrialLog struct {
	// A trial log should have one of these IDs. All should be unique.
	// TODO(Brad): This must be int64.
	ID *int `db:"id" json:"id,omitempty"`
	// The body of an Elasticsearch log response will look something like
	// { _id: ..., _source: { ... }} where _source is the rest of this struct.
	// StringID doesn't have serialization tags because it is not part of
	// _source and populated from _id.
	StringID *string `json:"-"`

	TrialID int    `db:"trial_id" json:"trial_id"`
	Message string `db:"message" json:"message,omitempty"`

	AgentID *string `db:"agent_id" json:"agent_id,omitempty"`
	// In the case of k8s, container_id is a pod name instead.
	ContainerID *string    `db:"container_id" json:"container_id,omitempty"`
	RankID      *int       `db:"rank_id" json:"rank_id,omitempty"`
	Timestamp   *time.Time `db:"timestamp" json:"timestamp"`
	Level       *string    `db:"level" json:"level"`
	Log         *string    `db:"log" json:"log"`
	Source      *string    `db:"source" json:"source,omitempty"`
	StdType     *string    `db:"stdtype" json:"stdtype,omitempty"`
}

TrialLog represents a row from the `trial_logs` table.

func (TrialLog) Proto

func (t TrialLog) Proto() (*apiv1.TrialLogsResponse, error)

Proto converts a trial log to its protobuf representation.

func (*TrialLog) Resolve

func (t *TrialLog) Resolve()

Resolve resolves the legacy Message field from the others provided.

type TrialLogBatch

type TrialLogBatch []*TrialLog

TrialLogBatch represents a batch of model.TrialLog.

func (TrialLogBatch) ForEach

func (t TrialLogBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TrialLogBatch) Size

func (t TrialLogBatch) Size() int

Size implements logs.Batch.

type TrialMetrics

type TrialMetrics struct {
	ID           int        `db:"id" json:"id"`
	TrialID      int        `db:"trial_id" json:"trial_id"`
	TrialRunID   int        `db:"trial_run_id" json:"-"`
	TotalBatches int        `db:"total_batches" json:"total_batches"`
	EndTime      *time.Time `db:"end_time" json:"end_time"`
	Metrics      JSONObj    `db:"metrics" json:"metrics"`
}

TrialMetrics represents a row from the `steps` or `validations` table.

type TrialProfilerMetricsBatch

type TrialProfilerMetricsBatch struct {
	Values     pgtype.Float4Array      `db:"values"`
	Batches    pgtype.Int4Array        `db:"batches"`
	Timestamps pgtype.TimestamptzArray `db:"timestamps"`
	Labels     []byte                  `db:"labels"`
}

TrialProfilerMetricsBatch represents a row from the `trial_profiler_metrics` table.

func (*TrialProfilerMetricsBatch) ToProto

ToProto converts a TrialProfilerMetricsBatch to its protobuf representation.

type TrialProfilerMetricsBatchBatch

type TrialProfilerMetricsBatchBatch []*trialv1.TrialProfilerMetricsBatch

TrialProfilerMetricsBatchBatch represents a batch of trialv1.TrialProfilerMetricsBatch.

func (TrialProfilerMetricsBatchBatch) ForEach

func (t TrialProfilerMetricsBatchBatch) ForEach(f func(interface{}) error) error

ForEach implements logs.Batch.

func (TrialProfilerMetricsBatchBatch) Size

Size implements logs.Batch.

type TrialTaskID

type TrialTaskID struct {
	bun.BaseModel `bun:"table:trial_id_task_id"`

	TrialID int
	TaskID  TaskID
}

TrialTaskID represents a row from the `trial_id_task_id` table.

type User

type User struct {
	bun.BaseModel `bun:"table:users"`
	ID            UserID      `db:"id" bun:"id,pk,autoincrement" json:"id"`
	Username      string      `db:"username" json:"username"`
	PasswordHash  null.String `db:"password_hash" json:"-"`
	DisplayName   null.String `db:"display_name" json:"display_name"`
	Admin         bool        `db:"admin" json:"admin"`
	Active        bool        `db:"active" json:"active"`
	ModifiedAt    time.Time   `db:"modified_at" json:"modified_at"`
	Remote        bool        `db:"remote" json:"remote"`
	LastAuthAt    *time.Time  `db:"last_auth_at" json:"last_auth_at"`
}

User corresponds to a row in the "users" DB table.

func (*User) Proto

func (user *User) Proto() *userv1.User

Proto converts a user to its protobuf representation.

func (*User) UpdatePasswordHash

func (user *User) UpdatePasswordHash(password string) error

UpdatePasswordHash updates the model's password hash employing necessary cryptographic techniques.

func (User) ValidatePassword

func (user User) ValidatePassword(password string) bool

ValidatePassword checks that the supplied password is correct.

type UserActivity

type UserActivity struct {
	bun.BaseModel `bun:"table:activity"`
	UserID        UserID       `db:"user_id" json:"user_id"`
	ActivityType  ActivityType `db:"activity_type" json:"activity_type"`
	EntityType    EntityType   `db:"entity_type" json:"entity_type"`
	EntityID      int32        `db:"entity_id" json:"entity_id"`
	ActivityTime  time.Time    `db:"activity_time" json:"activity_time"`
}

UserActivity is a record of user activity.

func UserActivityFromProto

func UserActivityFromProto(
	a userv1.ActivityType,
	e userv1.EntityType,
	entityID int32,
	userID int32,
	timestamp time.Time,
) *UserActivity

UserActivityFromProto returns a model UserActivity from a proto definition.

type UserID

type UserID int

UserID is the type for user IDs.

type UserSession

type UserSession struct {
	bun.BaseModel `bun:"table:user_sessions"`
	ID            SessionID `db:"id" json:"id"`
	UserID        UserID    `db:"user_id" json:"user_id"`
	Expiry        time.Time `db:"expiry" json:"expiry"`
}

UserSession corresponds to a row in the "user_sessions" DB table.

type UserWebSetting

type UserWebSetting struct {
	UserID      UserID
	Key         string
	Value       string
	StoragePath string
}

UserWebSetting is a record of user web setting.

type Users

type Users []User

Users is a slice of User objects—primarily useful for its methods.

func (Users) Proto

func (users Users) Proto() []*userv1.User

Proto converts a slice of users to its protobuf representation.

type WorkloadManagerType

type WorkloadManagerType string

WorkloadManagerType indicates which type of workloads the harness should prepare to receive.

type WorkloadSequencerType

type WorkloadSequencerType string

WorkloadSequencerType is the type of sequencer that a trial actor should use.

type Workspace

type Workspace struct {
	bun.BaseModel           `bun:"table:workspaces"`
	ID                      int                              `bun:"id,pk,autoincrement"`
	Name                    string                           `bun:"name"`
	Archived                bool                             `bun:"archived"`
	CreatedAt               time.Time                        `bun:"created_at,scanonly"`
	UserID                  UserID                           `bun:"user_id"`
	Immutable               bool                             `bun:"immutable"`
	State                   *WorkspaceState                  `bun:"state"`
	AgentUID                *int32                           `bun:"uid"`
	AgentUser               *string                          `bun:"user_"`
	AgentGID                *int32                           `bun:"gid"`
	AgentGroup              *string                          `bun:"group_"`
	CheckpointStorageConfig *expconf.CheckpointStorageConfig `bun:"checkpoint_storage_config"`
	DefaultComputePool      string                           `bun:"default_compute_pool"`
	DefaultAuxPool          string                           `bun:"default_aux_pool"`
}

Workspace is the bun model of a workspace.

func (*Workspace) ToProto

func (w *Workspace) ToProto() (*workspacev1.Workspace, error)

ToProto converts a bun model of a workspace to a proto object. Some fields like username and pinned are not included since they are not on the bun model.

type WorkspacePin

type WorkspacePin struct {
	bun.BaseModel `bun:"table:workspace_pins"`
	WorkspaceID   int    `bun:"workspace_id"`
	UserID        UserID `bun:"user_id"`
}

WorkspacePin is the bun model of a workspace.

type WorkspaceState

type WorkspaceState string

WorkspaceState is the state of the workspace state with regards to being deleted.

const (
	// WorkspaceStateDeleting constant.
	WorkspaceStateDeleting WorkspaceState = "DELETING"
	// WorkspaceStateDeleteFailed constant.
	WorkspaceStateDeleteFailed WorkspaceState = "DELETE_FAILED"
	// WorkspaceStateDeleted constant.
	WorkspaceStateDeleted WorkspaceState = "DELETED"
)

func (*WorkspaceState) ToProto

ToProto converts a WorkspaceState to a proto workspacev1.Workspace state.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL