server

package
v1.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2021 License: Apache-2.0 Imports: 83 Imported by: 1

Documentation

Index

Constants

View Source
const (
	StatusNotFound            = 404
	StatusAlreadyExists       = 409
	MisdirectedRequest        = 421
	StatusUnprocessableEntity = 422
	StatusServerError         = 500
)
View Source
const (
	PodControllerCleanPeriod    = 20 * time.Second
	PodControllerControlPeriod  = 5 * time.Second
	PodControllerFullSyncPeriod = 31 * time.Second
)

make this configurable

View Source
const (
	ResourceLimitsGPU v1.ResourceName = "nvidia.com/gpu"
)

Variables

View Source
var (
	GPUNodeSelectorPrefixes = []string{
		"node.elotl.co/gpu-",
		"cloud.google.com/gke-accelerator",
	}
)
View Source
var (
	MaxEventListSize = 4000 // modified for testing
)

Functions

func ConfigureCloud

func ConfigureCloud(configFile *ServerConfigFile, controllerID, nametag string) (cloud.CloudClient, error)

func ConfigureK8sKipClient

func ConfigureK8sKipClient(kubeConfig *clientcmdapi.Config) (*kubeclient.Clientset, *rest.Config, error)

func GetNodeForRunningPod

func GetNodeForRunningPod(podName, unitName string, podRegistry *registry.PodRegistry, nodeRegistry *registry.NodeRegistry) (*api.Node, error)

func VersionAndKind

func VersionAndKind(m []byte) (string, string, error)

Types

type AWSConfig

type AWSConfig struct {
	Region                string `json:"region"`
	AccessKeyID           string `json:"accessKeyID"`
	SecretAccessKey       string `json:"secretAccessKey"`
	VPCID                 string `json:"vpcID,omitempty"`
	SubnetID              string `json:"subnetID,omitempty"`
	EcsClusterName        string `json:"ecsClusterName"`
	EndpointURL           string `json:"endpointURL"`
	InsecureTLSSkipVerify bool   `json:"insecureTLSSkipVerify"`
}

type ActivePods

type ActivePods []*api.Pod

ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete. Taken from k8s

func (ActivePods) Len

func (s ActivePods) Len() int

func (ActivePods) Less

func (s ActivePods) Less(i, j int) bool

func (ActivePods) Swap

func (s ActivePods) Swap(i, j int)

type AzureConfig

type AzureConfig struct {
	SubscriptionID string `json:"subscriptionID"`
	Location       string `json:"location"`
	VNetName       string `json:"virtualNetworkName"`
	ResourceGroup  string `json:"resourceGroup"`
	SubnetName     string `json:"subnetName"`
	TenantID       string `json:"tenantID"`
	ClientID       string `json:"clientID"`
	ClientSecret   string `json:"clientSecret"`
}

See https://github.com/Azure/azure-sdk-for-go/blob/master/README.md for more info on SDK login credentials. We might want to support CertificatePath and CertificatePassword.

type CellController

type CellController struct {
	// contains filtered or unexported fields
}

func NewCellController

func NewCellController(
	controllerID, nodeName string,
	restConfig *rest.Config,
	k8sKipClient kv1b1.CellInterface,
	eventsSystem *events.EventSystem,
	podLister registry.PodLister,
	nodeLister registry.NodeLister,
) (*CellController, error)

func (*CellController) CreateCRDIfNotExists

func (c *CellController) CreateCRDIfNotExists() error

func (*CellController) Dump

func (c *CellController) Dump() []byte

func (*CellController) Start

func (c *CellController) Start(quit <-chan struct{}, wg *sync.WaitGroup)

type CellOp

type CellOp struct {
	// contains filtered or unexported fields
}

type CellsConfig

type CellsConfig struct {
	BootImageSpec          cloud.BootImageSpec           `json:"bootImageSpec"`
	DefaultInstanceType    string                        `json:"defaultInstanceType"`
	DefaultVolumeSize      string                        `json:"defaultVolumeSize"`
	StandbyCells           []nodemanager.StandbyNodeSpec `json:"standbyCells"`
	CloudInitFile          string                        `json:"cloudInitFile"`
	Itzo                   ItzoConfig                    `json:"itzo"`
	ExtraCIDRs             []string                      `json:"extraCIDRs"`
	ExtraSecurityGroups    []string                      `json:"extraSecurityGroups"`
	Nametag                string                        `json:"nametag"`
	StatusInterval         int                           `json:"statusInterval"`
	HealthCheck            HealthCheckConfig             `json:"healthcheck"`
	PrivateIPOnly          *bool                         `json:"privateIPOnly"`
	CellConfig             map[string]string             `json:"cellConfig"`
	DefaultIAMPermissions  string                        `json:"defaultIAMPermissions"`
	UseCloudParameterStore bool                          `json:"useCloudParameterStore"`
}

type CloudAPIHealthCheck added in v0.0.3

type CloudAPIHealthCheck struct {
	Interval       int `json:"interval"`
	HealthyTimeout int `json:"healthyTimeout"`
}

type Controller

type Controller interface {
	Start(quit <-chan struct{}, wg *sync.WaitGroup)
	Dump() []byte
}

type ControllerManager

type ControllerManager struct {
	// contains filtered or unexported fields
}

The ControllerManager was created to make the interaction between leader election and controllers easier. It takes care of starting and stopping controllers based on the leader elector.

func NewControllerManager

func NewControllerManager(controllers map[string]Controller) *ControllerManager

func (*ControllerManager) ControllersRunning

func (cm *ControllerManager) ControllersRunning() bool

func (*ControllerManager) GetAllControllers

func (cm *ControllerManager) GetAllControllers() map[string]Controller

func (*ControllerManager) GetController

func (cm *ControllerManager) GetController(name string) (Controller, bool)

func (*ControllerManager) Start

func (cm *ControllerManager) Start()

This doesn't take a quit channel on purpose. This is because if you start listening for quit, it becomes difficult to handle starting and stopping controllers through the channel as well so we just let this goroutine run until the end of the milpa process

func (*ControllerManager) StartControllers

func (cm *ControllerManager) StartControllers()

func (*ControllerManager) StopControllers

func (cm *ControllerManager) StopControllers()

func (*ControllerManager) WaitForShutdown

func (cm *ControllerManager) WaitForShutdown(systemShutdown <-chan struct{}, systemWG *sync.WaitGroup)

Our leader election used to tell our controller manager to shutdown now this takes care of that. We could simplify the controller manager a fair amount... I'm concerned we'll need a clustered system again eventually.

type EtcdClientConfig

type EtcdClientConfig struct {
	Endpoints []string `json:"endpoints"`
	CertFile  string   `json:"certFile"`
	KeyFile   string   `json:"keyFile"`
	CAFile    string   `json:"caFile"`
}

type EtcdConfig

type EtcdConfig struct {
	Client   EtcdClientConfig   `json:"client"`
	Internal InternalEtcdConfig `json:"internal"`
}

type FullPodStatus

type FullPodStatus struct {
	Name             string
	PodIP            string
	UnitStatuses     []api.UnitStatus
	InitUnitStatuses []api.UnitStatus
	ResourceUsage    api.ResourceMetrics
	// If an error occurred, Status will be nil, and Error will contain the
	// error instance.
	Error error
}

type GCEConfig

type GCEConfig struct {
	ProjectID       string          `json:"projectID"`
	CredentialsFile string          `json:"credentialsFile,omitempty"`
	Credentials     *GCECredentials `json:"credentials,omitempty"`
	Zone            string          `json:"zone,omitempty"`
	VPCName         string          `json:"vpcName,omitempty"`
	SubnetName      string          `json:"subnetName,omitempty"`
}

type GCECredentials added in v0.0.3

type GCECredentials struct {
	ClientEmail string `json:"clientEmail"`
	PrivateKey  string `json:"privateKey"`
}

type GarbageController

type GarbageController struct {
	// contains filtered or unexported fields
}

func (*GarbageController) CleanAzureResourceGroups

func (c *GarbageController) CleanAzureResourceGroups()

func (*GarbageController) CleanAzureResourceGroupsHelper

func (c *GarbageController) CleanAzureResourceGroupsHelper(client ResourceGrouper) error

func (*GarbageController) CleanDanglingRoutes

func (c *GarbageController) CleanDanglingRoutes()

func (*GarbageController) CleanDedicatedHosts added in v1.1.0

func (c *GarbageController) CleanDedicatedHosts()

func (*GarbageController) CleanInstances

func (c *GarbageController) CleanInstances()

func (*GarbageController) CleanTerminatedNodes

func (c *GarbageController) CleanTerminatedNodes()

func (*GarbageController) Dump

func (c *GarbageController) Dump() []byte

func (*GarbageController) GCLoop

func (c *GarbageController) GCLoop(quit <-chan struct{}, wg *sync.WaitGroup)

func (*GarbageController) Start

func (c *GarbageController) Start(quit <-chan struct{}, wg *sync.WaitGroup)

type GarbageControllerConfig

type GarbageControllerConfig struct {
	CleanTerminatedInterval     time.Duration
	CleanInstancesInterval      time.Duration
	CleanResourceGroupsInterval time.Duration
	CleanDedicatedHostsInterval time.Duration
}

type HealthCheckConfig added in v0.0.3

type HealthCheckConfig struct {
	Status   *StatusHealthCheck   `json:"status"`
	CloudAPI *CloudAPIHealthCheck `json:"cloudAPI"`
}

type InstanceProvider

type InstanceProvider struct {
	Registries        map[string]registry.Registryer
	Encoder           api.MilpaCodec
	SystemQuit        <-chan struct{}
	SystemWaitGroup   *sync.WaitGroup
	Controllers       map[string]Controller
	ItzoClientFactory nodeclient.ItzoClientFactoryer
	// contains filtered or unexported fields
}

func NewInstanceProvider

func NewInstanceProvider(configFilePath, nodeName, internalIP, clusterDNS, clusterDomain string, daemonEndpointPort int32, debugServer bool, rm *manager.ResourceManager, kubeConfig, networkAgentKubeConfig *clientcmdapi.Config, instanceDataPath string, systemQuit <-chan struct{}) (*InstanceProvider, error)

InstanceProvider should implement node.PodLifecycleHandler

func (InstanceProvider) Attach

func (*InstanceProvider) ConfigureNode

func (p *InstanceProvider) ConfigureNode(ctx context.Context, n *v1.Node)

func (InstanceProvider) Create

func (*InstanceProvider) CreatePod

func (p *InstanceProvider) CreatePod(ctx context.Context, pod *v1.Pod) error

func (InstanceProvider) Delete

func (*InstanceProvider) DeletePod

func (p *InstanceProvider) DeletePod(ctx context.Context, pod *v1.Pod) (err error)

func (InstanceProvider) Deploy

func (InstanceProvider) Dump

func (InstanceProvider) Exec

func (InstanceProvider) Get

func (*InstanceProvider) GetContainerLogs

func (p *InstanceProvider) GetContainerLogs(ctx context.Context, namespace, podName, containerName string, opts vkapi.ContainerLogOpts) (io.ReadCloser, error)

func (InstanceProvider) GetLogs

func (s InstanceProvider) GetLogs(context context.Context, request *clientapi.LogsRequest) (*clientapi.APIReply, error)

func (InstanceProvider) GetNodeForRunningPod

func (s InstanceProvider) GetNodeForRunningPod(podName, unitName string) (*api.Node, error)

func (*InstanceProvider) GetPod

func (p *InstanceProvider) GetPod(ctx context.Context, namespace, name string) (*v1.Pod, error)

func (*InstanceProvider) GetPodStatus

func (p *InstanceProvider) GetPodStatus(ctx context.Context, namespace, name string) (*v1.PodStatus, error)

func (*InstanceProvider) GetPods

func (p *InstanceProvider) GetPods(ctx context.Context) ([]*v1.Pod, error)

func (*InstanceProvider) GetStatsSummary

func (p *InstanceProvider) GetStatsSummary(ctx context.Context) (*stats.Summary, error)

func (InstanceProvider) GetVersion

func (*InstanceProvider) Handle

func (p *InstanceProvider) Handle(ev events.Event) error

func (InstanceProvider) IsLeader

func (*InstanceProvider) NotifyNodeStatus

func (p *InstanceProvider) NotifyNodeStatus(ctx context.Context, notifier func(*v1.Node))

func (*InstanceProvider) NotifyPods

func (p *InstanceProvider) NotifyPods(ctx context.Context, notifier func(*v1.Pod))

NotifyPods is called to set a pod notifier callback function. This should be called before any operations are done within the provider.

func (*InstanceProvider) Ping

func (p *InstanceProvider) Ping(ctx context.Context) error

func (*InstanceProvider) RunInContainer

func (p *InstanceProvider) RunInContainer(ctx context.Context, namespace, podName, containerName string, cmd []string, attach vkapi.AttachIO) error

func (*InstanceProvider) Stop

func (p *InstanceProvider) Stop()

func (InstanceProvider) StreamLogs

func (InstanceProvider) Update

func (*InstanceProvider) UpdatePod

func (p *InstanceProvider) UpdatePod(ctx context.Context, pod *v1.Pod) error

type InternalEtcdConfig

type InternalEtcdConfig struct {
	DataDir    string `json:"dataDir"`
	ConfigFile string `json:"configFile"`
}

type ItzoConfig

type ItzoConfig struct {
	Version string `json:"version"`
	URL     string `json:"url"`
}

type KubeletConfig

type KubeletConfig struct {
	// Deprecated: CPU, Memory and Pods are copied into Capacity, and are only
	// present for backward compatibility.
	CPU      *resource.Quantity `json:"cpu"`
	Memory   *resource.Quantity `json:"memory"`
	Pods     *resource.Quantity `json:"pods"`
	Capacity v1.ResourceList    `json:"capacity"`
	Labels   map[string]string  `json:"labels"`
}

Kubelet stores kubelet-specific configuration such as capacity and labels.

type MetricsController

type MetricsController struct {
	// contains filtered or unexported fields
}

func (*MetricsController) Dump

func (c *MetricsController) Dump() []byte

func (*MetricsController) Start

func (c *MetricsController) Start(quit <-chan struct{}, wg *sync.WaitGroup)

type MultiCloudConfig

type MultiCloudConfig struct {
	AWS   *AWSConfig   `json:"aws,omitempty"`
	GCE   *GCEConfig   `json:"gce,omitempty"`
	Azure *AzureConfig `json:"azure,omitempty"`
}

type PodController

type PodController struct {
	// contains filtered or unexported fields
}

func (*PodController) ControlLoop

func (c *PodController) ControlLoop(quit <-chan struct{}, wg *sync.WaitGroup)

func (*PodController) ControlPods

func (c *PodController) ControlPods()

We do all pod controlling in one loop since there are windows for races otherwise.

func (*PodController) Dump

func (c *PodController) Dump() []byte

func (*PodController) Start

func (c *PodController) Start(quit <-chan struct{}, wg *sync.WaitGroup)

func (*PodController) SyncRunningPods

func (c *PodController) SyncRunningPods()

func (*PodController) TagNodeWithPodLabels

func (c *PodController) TagNodeWithPodLabels(pod *api.Pod, node *api.Node)

type ResourceGrouper

type ResourceGrouper interface {
	ListNodeResourceGroups() ([]string, error)
	DeleteResourceGroup(string) error
}

type SendRecver

type SendRecver interface {
	Send(*clientapi.StreamMsg) error
	Recv() (*clientapi.StreamMsg, error)
	Context() context.Context
}

type ServerConfigFile

type ServerConfigFile struct {
	api.TypeMeta `json:",inline"`
	Cloud        MultiCloudConfig `json:"cloud"`
	Etcd         EtcdConfig       `json:"etcd"`
	Cells        CellsConfig      `json:"cells"`
	Testing      TestingConfig    `json:"testing"`
	Kubelet      KubeletConfig    `json:"kubelet"`
}

ServerConfigFile stores the parsed json from provider.yaml

func ParseConfig

func ParseConfig(path string) (*ServerConfigFile, error)

type StatusHealthCheck added in v0.0.3

type StatusHealthCheck struct {
	HealthyTimeout int `json:"healthyTimeout"`
}

type TestingConfig

type TestingConfig struct {
	ControllerID string `json:"controllerID"`
}

type WinSize

type WinSize struct {
	Rows uint16
	Cols uint16
	X    uint16
	Y    uint16
}

Directories

Path Synopsis
aws
gce
not sure if this is a good pattern for decoupling the pod_controller from the node controller...
not sure if this is a good pattern for decoupling the pod_controller from the node controller...
Need to start DRYing up the registry code, this is terrible
Need to start DRYing up the registry code, this is terrible

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL