server

package
v0.12.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 29, 2023 License: Apache-2.0 Imports: 21 Imported by: 0

Documentation

Overview

Copyright 2021 IBM Corporation

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Index

Constants

View Source
const (
	KServeServiceName string = "inference.GRPCInferenceService"
)

Variables

This section is empty.

Functions

This section is empty.

Types

type AdapterConfiguration

type AdapterConfiguration struct {
	Port                           int
	TorchServeManagementPort       int
	TorchServeInferenceEndpoint    string
	TorchServeContainerMemReqBytes int
	TorchServeMemBufferBytes       int
	CapacityInBytes                int
	MaxLoadingConcurrency          int
	ModelLoadingTimeoutMS          int
	DefaultModelSizeInBytes        int
	ModelSizeMultiplier            float64
	RuntimeVersion                 string
	LimitModelConcurrency          int // 0 means no limit (default)
	ModelStoreDir                  string
	UseEmbeddedPuller              bool
	RequestBatchSize               int32
	MaxBatchDelaySecs              int32
}

func GetAdapterConfigurationFromEnv

func GetAdapterConfigurationFromEnv(log logr.Logger) (*AdapterConfiguration, error)

type TorchServeAdapterServer

type TorchServeAdapterServer struct {
	ManagementClient  torchserve.ManagementAPIsServiceClient
	ManagementConn    *grpc.ClientConn
	Puller            *puller.Puller
	AdapterConfig     *AdapterConfiguration
	Log               logr.Logger
	InferenceEndpoint string
	InferenceClient   torchserve.InferenceAPIsServiceClient
	InferenceConn     *grpc.ClientConn

	// embed generated Unimplemented type for forward-compatibility for gRPC
	mmesh.UnimplementedModelRuntimeServer
}

func NewTorchServeAdapterServer

func NewTorchServeAdapterServer(config *AdapterConfiguration, log logr.Logger) *TorchServeAdapterServer

func (*TorchServeAdapterServer) LoadModel

func (*TorchServeAdapterServer) ModelSize

func (*TorchServeAdapterServer) RuntimeStatus

func (*TorchServeAdapterServer) UnloadModel

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL