Documentation ¶
Overview ¶
ElasticThought is a REST wrapper around the Caffe Deep Learning framework.
Index ¶
- Constants
- Variables
- func Asset(name string) ([]byte, error)
- func AssetDir(name string) ([]string, error)
- func AssetInfo(name string) (os.FileInfo, error)
- func AssetNames() []string
- func CbfsReadWriteFile(config Configuration, destPath, content string) error
- func CbfsSanityCheck(config Configuration) error
- func CopyFileContents(src, dst string) (err error)
- func DbAuthRequired() gin.HandlerFunc
- func DbConnector(dbUrl string) gin.HandlerFunc
- func EnableAllLogKeys()
- func EnvironmentSanityCheck(config Configuration) error
- func Mkdir(directory string) error
- func NewUuid() string
- func RestoreAsset(dir, name string) error
- func RestoreAssets(dir, name string) error
- func TempDir() string
- type BlobHandle
- type BlobPutOptions
- type BlobStore
- type CbfsBlobStore
- type ChangesListener
- type Classifier
- type ClassifyJob
- func (c ClassifyJob) Failed(db couch.Database, processingErr error) error
- func (c *ClassifyJob) Find(id string) error
- func (c *ClassifyJob) Insert() error
- func (c *ClassifyJob) RefreshFromDB(db couch.Database) error
- func (c *ClassifyJob) Run(wg *sync.WaitGroup)
- func (c *ClassifyJob) SetResults(results map[string]string) (bool, error)
- func (c *ClassifyJob) UpdateProcessingLog(val string) (bool, error)
- func (c *ClassifyJob) UpdateProcessingState(newState ProcessingState) (bool, error)
- type Configuration
- type Datafile
- func (d Datafile) CopyToBlobStore(db couch.Database, blobStore BlobStore) (string, error)
- func (d Datafile) Failed(db couch.Database, processingErr error) error
- func (d Datafile) FinishedSuccessfully(db couch.Database) error
- func (d *Datafile) GetProcessingState() ProcessingState
- func (d Datafile) HasValidId() bool
- func (d *Datafile) RefreshFromDB(db couch.Database) error
- func (d Datafile) Save(db couch.Database) (*Datafile, error)
- func (d *Datafile) SetProcessingState(newState ProcessingState)
- func (d *Datafile) UpdateProcessingState(newState ProcessingState) (bool, error)
- type DatafileDownloader
- type Dataset
- func (d *Dataset) AddArtifactUrls() error
- func (d Dataset) Failed(db couch.Database, processingErr error) error
- func (d Dataset) FinishedSuccessfully(db couch.Database) error
- func (d *Dataset) GetProcessingState() ProcessingState
- func (d Dataset) GetSplittableDatafile(db couch.Database) (*Datafile, error)
- func (d Dataset) GetTestingDatafile(db couch.Database) (*Datafile, error)
- func (d Dataset) GetTestingDatafileUrl(db couch.Database) string
- func (d Dataset) GetTrainingDatafile(db couch.Database) (*Datafile, error)
- func (d Dataset) GetTrainingDatafileUrl(db couch.Database) string
- func (d *Dataset) Insert() error
- func (d *Dataset) RefreshFromDB(db couch.Database) error
- func (d *Dataset) SetProcessingState(newState ProcessingState)
- func (d Dataset) TestingArtifactPath() string
- func (d Dataset) TrainingArtifactPath() string
- func (d *Dataset) UpdateArtifactUrls(trainingDatasetUrl, testingDatasetUrl string) (bool, error)
- func (d *Dataset) UpdateProcessingLog(val string) (bool, error)
- func (d *Dataset) UpdateProcessingState(newState ProcessingState) (bool, error)
- type DatasetSplitter
- type ElasticThoughtDoc
- type EndpointContext
- func (e EndpointContext) CreateClassificationJobEndpoint(c *gin.Context)
- func (e EndpointContext) CreateClassifierEndpoint(c *gin.Context)
- func (e EndpointContext) CreateDataFileEndpoint(c *gin.Context)
- func (e EndpointContext) CreateDataSetsEndpoint(c *gin.Context)
- func (e EndpointContext) CreateSolverEndpoint(c *gin.Context)
- func (e EndpointContext) CreateTrainingJob(c *gin.Context)
- func (e EndpointContext) CreateUserEndpoint(c *gin.Context)
- type FileSystemBlobHandle
- type FileSystemBlobStore
- type InProcessJobScheduler
- type JobDescriptor
- type JobScheduler
- type LayerType
- type MockBlobStore
- func (m *MockBlobStore) Get(path string) (io.ReadCloser, error)
- func (m *MockBlobStore) OpenFile(path string) (BlobHandle, error)
- func (m *MockBlobStore) Put(srcname, dest string, r io.Reader, opts BlobPutOptions) error
- func (m *MockBlobStore) QueueGetResponse(pathRegex string, response io.Reader)
- func (m *MockBlobStore) Rm(fn string) error
- type NsqJobScheduler
- type NsqWorker
- type Processable
- type ProcessingState
- type QueueType
- type ResponseQueue
- type Runnable
- type Solver
- func (s Solver) DownloadSpecToBlobStore(db couch.Database, blobStore BlobStore) (*Solver, error)
- func (s Solver) Insert(db couch.Database) (*Solver, error)
- func (s Solver) Save(db couch.Database) (*Solver, error)
- func (s Solver) SaveTrainTestData(config Configuration, destDirectory string) (trainingLabelIndex []string, err error)
- func (s Solver) SpecificationNetUrlPath() (string, error)
- func (s Solver) SpecificationUrlPath() (string, error)
- type TestDataset
- type TrainingDataset
- type TrainingJob
- func (j TrainingJob) Failed(db couch.Database, processingErr error) error
- func (j *TrainingJob) Find(id string) error
- func (j TrainingJob) FinishedSuccessfully(db couch.Database, logPath string) error
- func (j *TrainingJob) GetProcessingState() ProcessingState
- func (j TrainingJob) Insert(db couch.Database) (*TrainingJob, error)
- func (j *TrainingJob) RefreshFromDB(db couch.Database) error
- func (j *TrainingJob) Run(wg *sync.WaitGroup)
- func (j *TrainingJob) SetProcessingState(newState ProcessingState)
- func (j *TrainingJob) UpdateLabels(labels []string) (bool, error)
- func (j *TrainingJob) UpdateProcessingLog(val string) (bool, error)
- func (j *TrainingJob) UpdateProcessingState(newState ProcessingState) (bool, error)
- type User
Constants ¶
const ( TRAINING_ARTIFACT = "training.tar.gz" TEST_ARTIFACT = "testing.tar.gz" TRAINING_INDEX = "training.txt" TESTING_INDEX = "testing.txt" TRAINING_DIR = "training-data" TESTING_DIR = "test-data" CBFS_URI_PREFIX = "cbfs://" )
const ( MIDDLEWARE_KEY_DB = "db" MIDDLEWARE_KEY_USER = "user" )
const ( DOC_TYPE_USER = "user" DOC_TYPE_DATAFILE = "datafile" DOC_TYPE_DATASET = "dataset" DOC_TYPE_SOLVER = "solver" DOC_TYPE_TRAINING_JOB = "training-job" DOC_TYPE_CLASSIFIER = "classifier" DOC_TYPE_CLASSIFY_JOB = "classify-job" )
const ( PROCESSING_STATE_PENDING = "pending" PROCESSING_STATE_PROCESSING = "processing" PROCESSING_STATE_FINISHED_SUCCESSFULLY = "finished_successfully" PROCESSING_STATE_FAILED = "failed" )
const ( IMAGE_DATA = LayerType(caffe.V1LayerParameter_IMAGE_DATA) DATA = LayerType(caffe.V1LayerParameter_DATA) )
Variables ¶
var LogKeys []string
The logging keys available in elastic thought
Functions ¶
func Asset ¶
Asset loads and returns the asset for the given name. It returns an error if the asset could not be found or could not be loaded.
func AssetDir ¶
AssetDir returns the file names below a certain directory embedded in the file by go-bindata. For example if you run go-bindata on data/... and data contains the following hierarchy:
data/ foo.txt img/ a.png b.png
then AssetDir("data") would return []string{"foo.txt", "img"} AssetDir("data/img") would return []string{"a.png", "b.png"} AssetDir("foo.txt") and AssetDir("notexist") would return an error AssetDir("") will return []string{"data"}.
func AssetInfo ¶
AssetInfo loads and returns the asset info for the given name. It returns an error if the asset could not be found or could not be loaded.
func CbfsReadWriteFile ¶
func CbfsReadWriteFile(config Configuration, destPath, content string) error
func CopyFileContents ¶
copyFileContents copies the contents of the file named src to the file named by dst. The file will be created if it does not already exist. If the destination file exists, all it's contents will be replaced by the contents of the source file.
Source: http://stackoverflow.com/questions/21060945/simple-way-to-copy-a-file-in-golang
func DbAuthRequired ¶
func DbAuthRequired() gin.HandlerFunc
Gin middleware to authenticate the user specified in the Basic Auth Authorization header. It will lookup the user in the database (this middleware requires the use of the DbConnector middleware to have run before it), and then add to the Gin Context.
func DbConnector ¶
func DbConnector(dbUrl string) gin.HandlerFunc
Gin middleware to connnect to the Sync Gw database given in the dbUrl parameter, and set the connection object into the context. This creates a new connection for each request, which is ultra-conservative in case the connection object isn't safe to use among multiple goroutines (and I believe it is). If it becomes a bottleneck, it's easy to create another middleware that re-uses an existing connection.
func EnvironmentSanityCheck ¶
func EnvironmentSanityCheck(config Configuration) error
func RestoreAsset ¶
Restore an asset under the given directory
func RestoreAssets ¶
Restore assets under the given directory recursively
Types ¶
type BlobHandle ¶
type BlobHandle interface { // The nodes that contain the file corresponding to this blob // and the last time it was "scrubbed" (cbfs terminology) Nodes() map[string]time.Time }
Calling OpenFile with a path on a BlobStore returns a BlobHandle which allows for reading and metadata.
type BlobPutOptions ¶
type BlobPutOptions struct {
cbfsclient.PutOptions
}
type BlobStore ¶
type BlobStore interface { Get(path string) (io.ReadCloser, error) Put(srcname, dest string, r io.Reader, opts BlobPutOptions) error Rm(fn string) error OpenFile(path string) (BlobHandle, error) }
BlobStore provides a blob store interface.
func NewBlobStore ¶
type CbfsBlobStore ¶
type CbfsBlobStore struct { *cbfsclient.Client // contains filtered or unexported fields }
func NewCbfsBlobStore ¶
func NewCbfsBlobStore(uri string) (*CbfsBlobStore, error)
func (*CbfsBlobStore) OpenFile ¶
func (c *CbfsBlobStore) OpenFile(path string) (BlobHandle, error)
func (*CbfsBlobStore) Put ¶
func (c *CbfsBlobStore) Put(srcname, dest string, r io.Reader, opts BlobPutOptions) error
type ChangesListener ¶
type ChangesListener struct { Configuration Configuration Database couch.Database JobScheduler JobScheduler }
A changes listener listens for changes on the _changes feed and reacts to them. The changes listener currently runs as a goroutine in the httpd process, and so the system only currently supports having a single httpd process, because otherwise there would be multiple changes listeners on the same changes feed, which will cause duplicate jobs to get kicked off. If the system needs to support multiple http processes, then the changes listener needs to run in its own process.
func NewChangesListener ¶
func NewChangesListener(c Configuration, jobScheduler JobScheduler) (*ChangesListener, error)
Create a new ChangesListener
func (ChangesListener) FollowChangesFeed ¶
func (c ChangesListener) FollowChangesFeed()
Follow changes feed. This will typically be run in its own goroutine.
type Classifier ¶
type Classifier struct { ElasticThoughtDoc SpecificationUrl string `json:"specification-url" binding:"required"` TrainingJobID string `json:"training-job-id" binding:"required"` Scale string `json:"scale" binding:"required"` ImageWidth string `json:"image-width" binding:"required"` ImageHeight string `json:"image-height" binding:"required"` Color bool `json:"color"` Gpu bool `json:"gpu"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration }
A classifier uses a trained model to classify new incoming data points
func NewClassifier ¶
func NewClassifier(c Configuration) *Classifier
Create a new classifier. If you don't use this, you must set the embedded ElasticThoughtDoc Type field.
func (*Classifier) Find ¶
func (c *Classifier) Find(id string) error
Find a classifier in the db with the given id, or return an error if not found
func (*Classifier) Insert ¶
func (c *Classifier) Insert() error
Insert into database (only call this if you know it doesn't arleady exist, or else you'll end up w/ unwanted dupes)
func (*Classifier) RefreshFromDB ¶
func (c *Classifier) RefreshFromDB(db couch.Database) error
func (*Classifier) SetSpecificationUrl ¶
func (c *Classifier) SetSpecificationUrl(specUrlCbfs string) error
func (Classifier) Validate ¶
func (c Classifier) Validate() error
type ClassifyJob ¶
type ClassifyJob struct { ElasticThoughtDoc ProcessingState ProcessingState `json:"processing-state"` ProcessingLog string `json:"processing-log"` StdOutUrl string `json:"std-out-url"` StdErrUrl string `json:"std-err-url"` ClassifierID string `json:"classifier-id"` // Key: image url of image in cbfs // Value: the classification result for that image Results map[string]string `json:"results"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration }
A classify job tries to classify images given by user against the given trained model
func NewClassifyJob ¶
func NewClassifyJob(c Configuration) *ClassifyJob
Create a new classify job. If you don't use this, you must set the embedded ElasticThoughtDoc Type field.
func (ClassifyJob) Failed ¶
func (c ClassifyJob) Failed(db couch.Database, processingErr error) error
func (*ClassifyJob) Find ¶
func (c *ClassifyJob) Find(id string) error
Find a classify Job in the db with the given id, or error if not found CodeReview: duplication with Find in many places
func (*ClassifyJob) Insert ¶
func (c *ClassifyJob) Insert() error
Insert into database (only call this if you know it doesn't arleady exist, or else you'll end up w/ unwanted dupes)
func (*ClassifyJob) RefreshFromDB ¶
func (c *ClassifyJob) RefreshFromDB(db couch.Database) error
CodeReview: duplication with RefreshFromDB in many places
func (*ClassifyJob) SetResults ¶
func (c *ClassifyJob) SetResults(results map[string]string) (bool, error)
func (*ClassifyJob) UpdateProcessingLog ¶
func (c *ClassifyJob) UpdateProcessingLog(val string) (bool, error)
func (*ClassifyJob) UpdateProcessingState ¶
func (c *ClassifyJob) UpdateProcessingState(newState ProcessingState) (bool, error)
Update the processing state to new state.
type Configuration ¶
type Configuration struct { DbUrl string CbfsUrl string NsqLookupdUrl string NsqdUrl string NsqdTopic string WorkDirectory string QueueType QueueType NumCbfsClusterNodes int // needed to validate cbfs cluster health }
Holds configuration values that are used throughout the application
func NewDefaultConfiguration ¶
func NewDefaultConfiguration() *Configuration
func (Configuration) DbConnection ¶
func (c Configuration) DbConnection() couch.Database
Connect to db based on url stored in config, or panic if not able to connect
func (Configuration) Merge ¶
func (c Configuration) Merge(parsedDocOpts map[string]interface{}) (Configuration, error)
Add values from parsedDocOpts into Configuration and return a new instance Example map:
map[--help:false --blob-store-url:file:///tmp --sync-gw-url:http://blah.com:4985/et]
func (Configuration) NewBlobStoreClient ¶
func (c Configuration) NewBlobStoreClient() (BlobStore, error)
Create a new cbfs client based on url stored in config
type Datafile ¶
type Datafile struct { ElasticThoughtDoc ProcessingState ProcessingState `json:"processing-state"` ProcessingLog string `json:"processing-log"` UserID string `json:"user-id"` Url string `json:"url" binding:"required"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration }
A Datafile is a raw "bundle" of data, typically a zip or .tar.gz file. It cannot be used by a solver directly, instead it used to create dataset objects which can be used by the solver. A single datafile can be used to create any number of dataset objects.
func FindDatafile ¶
Find Datafile by Id from the db
func (Datafile) CopyToBlobStore ¶
Copy the contents of Datafile.Url to CBFS and return the cbfs dest path
func (Datafile) Failed ¶
Update the dataset state to record that it failed Codereview: datafile.go has same method
func (Datafile) FinishedSuccessfully ¶
Mark this datafile as having finished processing succesfully
func (*Datafile) GetProcessingState ¶
func (d *Datafile) GetProcessingState() ProcessingState
func (Datafile) HasValidId ¶
Does this datafile have a valid Id?
func (*Datafile) RefreshFromDB ¶
func (*Datafile) SetProcessingState ¶
func (d *Datafile) SetProcessingState(newState ProcessingState)
func (*Datafile) UpdateProcessingState ¶
func (d *Datafile) UpdateProcessingState(newState ProcessingState) (bool, error)
Update the processing state to new state.
type DatafileDownloader ¶
type DatafileDownloader struct { Configuration Configuration Datafile Datafile }
Worker job that downloads a datafile url contents to cbfs
type Dataset ¶
type Dataset struct { ElasticThoughtDoc ProcessingState ProcessingState `json:"processing-state"` ProcessingLog string `json:"processing-log"` TrainingDataset TrainingDataset `json:"training" binding:"required"` TestDataset TestDataset `json:"test" binding:"required"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration }
A dataset is created from a datafile, and represents a partition of the datafile to be used for a particular purpose. The typical example would involve:
- Datafile with 100 examples
- Training dataset with 70 examples
- Test dataset with 30 examples
func NewDataset ¶
func NewDataset(c Configuration) *Dataset
Create a new dataset. If you don't use this, you must set the embedded ElasticThoughtDoc Type field.
func (*Dataset) AddArtifactUrls ¶
Update this dataset with the artifact urls (cbfs://<id>/training.tar.gz, ..) even though these artifacts might not exist yet.
func (Dataset) Failed ¶
Update the dataset state to record that it failed Codereview: datafile.go has same method
func (Dataset) FinishedSuccessfully ¶
Update the dataset state to record that it finished successfully Codereview: de-dupe with datafile FinishedSuccessfully
func (*Dataset) GetProcessingState ¶
func (d *Dataset) GetProcessingState() ProcessingState
func (Dataset) GetSplittableDatafile ¶
Find and return the datafile associated with this dataset
func (Dataset) GetTestingDatafile ¶
Get the testing datafile object
func (Dataset) GetTestingDatafileUrl ¶
Get the source url associated with the testing datafile
func (Dataset) GetTrainingDatafile ¶
Get the training datafile object
func (Dataset) GetTrainingDatafileUrl ¶
Get the source url associated with the training datafile
func (*Dataset) Insert ¶
Insert into database (only call this if you know it doesn't arleady exist, or else you'll end up w/ unwanted dupes)
func (*Dataset) RefreshFromDB ¶
func (*Dataset) SetProcessingState ¶
func (d *Dataset) SetProcessingState(newState ProcessingState)
func (Dataset) TestingArtifactPath ¶
Path to testing artifact file, eg <id>/testing.tar.gz
func (Dataset) TrainingArtifactPath ¶
Path to training artifact file, eg <id>/training.tar.gz
func (*Dataset) UpdateArtifactUrls ¶
func (*Dataset) UpdateProcessingLog ¶
func (*Dataset) UpdateProcessingState ¶
func (d *Dataset) UpdateProcessingState(newState ProcessingState) (bool, error)
Update the processing state to new state.
type DatasetSplitter ¶
type DatasetSplitter struct { Configuration Configuration Dataset Dataset }
Worker job that splits a dataset into training/test set
func (DatasetSplitter) DownloadDatafiles ¶
func (d DatasetSplitter) DownloadDatafiles()
func (DatasetSplitter) SplitDatafile ¶
func (d DatasetSplitter) SplitDatafile()
type ElasticThoughtDoc ¶
type ElasticThoughtDoc struct { Revision string `json:"_rev"` Id string `json:"_id"` Type string `json:"type"` }
All document structs should embed this struct go get access to the sync gateway metadata (_id, _rev) and the "type" field which differentiates the different doc types.
type EndpointContext ¶
type EndpointContext struct {
Configuration Configuration
}
func (EndpointContext) CreateClassificationJobEndpoint ¶
func (e EndpointContext) CreateClassificationJobEndpoint(c *gin.Context)
func (EndpointContext) CreateClassifierEndpoint ¶
func (e EndpointContext) CreateClassifierEndpoint(c *gin.Context)
Creates a classifier
func (EndpointContext) CreateDataFileEndpoint ¶
func (e EndpointContext) CreateDataFileEndpoint(c *gin.Context)
Creates a datafile
func (EndpointContext) CreateDataSetsEndpoint ¶
func (e EndpointContext) CreateDataSetsEndpoint(c *gin.Context)
Creates datasets from a datafile
func (EndpointContext) CreateSolverEndpoint ¶
func (e EndpointContext) CreateSolverEndpoint(c *gin.Context)
Creates a solver
func (EndpointContext) CreateTrainingJob ¶
func (e EndpointContext) CreateTrainingJob(c *gin.Context)
Create Training Job
func (EndpointContext) CreateUserEndpoint ¶
func (e EndpointContext) CreateUserEndpoint(c *gin.Context)
Creates a new user
type FileSystemBlobHandle ¶
type FileSystemBlobHandle struct{}
type FileSystemBlobStore ¶
type FileSystemBlobStore struct {
// contains filtered or unexported fields
}
func NewFileSystemBlobStore ¶
func NewFileSystemBlobStore(rootPath string) (*FileSystemBlobStore, error)
func (FileSystemBlobStore) Get ¶
func (f FileSystemBlobStore) Get(path string) (io.ReadCloser, error)
func (FileSystemBlobStore) OpenFile ¶
func (f FileSystemBlobStore) OpenFile(path string) (BlobHandle, error)
func (FileSystemBlobStore) Put ¶
func (f FileSystemBlobStore) Put(srcname, dest string, r io.Reader, opts BlobPutOptions) error
func (FileSystemBlobStore) Rm ¶
func (f FileSystemBlobStore) Rm(path string) error
type InProcessJobScheduler ¶
type InProcessJobScheduler struct { Configuration Configuration JobsOutstanding *sync.WaitGroup }
Run worker jobs in a goroutine in the rest server process (as oppposed to using nsq) Makes certain testing easier
func NewInProcessJobScheduler ¶
func NewInProcessJobScheduler(c Configuration) *InProcessJobScheduler
func (InProcessJobScheduler) ScheduleJob ¶
func (j InProcessJobScheduler) ScheduleJob(jobDescriptor JobDescriptor) error
type JobDescriptor ¶
type JobDescriptor struct {
DocIdToProcess string `json:"doc-id-to-process"`
}
A job descriptor is meant to fully describe a worker job. It needs to be easily serializable into json so it can be passed on the wire.
func NewJobDescriptor ¶
func NewJobDescriptor(docId string) *JobDescriptor
Create a new JobDescriptor
type JobScheduler ¶
type JobScheduler interface {
ScheduleJob(job JobDescriptor) error
}
By swapping out the job scheduler, you can easily swap out the ability to have jobs placed on NSQ or to be run in a local goroutine inside the rest server process
type MockBlobStore ¶
type MockBlobStore struct { // Queued responses for blob Get requests. The key is a // regex that should match the path in the Get request. GetResponses map[string]ResponseQueue }
var (
DefaultMockBlobStore *MockBlobStore
)
func NewMockBlobStore ¶
func NewMockBlobStore() *MockBlobStore
func (*MockBlobStore) Get ¶
func (m *MockBlobStore) Get(path string) (io.ReadCloser, error)
func (*MockBlobStore) OpenFile ¶
func (m *MockBlobStore) OpenFile(path string) (BlobHandle, error)
func (*MockBlobStore) Put ¶
func (m *MockBlobStore) Put(srcname, dest string, r io.Reader, opts BlobPutOptions) error
func (*MockBlobStore) QueueGetResponse ¶
func (m *MockBlobStore) QueueGetResponse(pathRegex string, response io.Reader)
Queue up a response to a Get request
func (*MockBlobStore) Rm ¶
func (m *MockBlobStore) Rm(fn string) error
type NsqJobScheduler ¶
type NsqJobScheduler struct {
Configuration Configuration
}
func NewNsqJobScheduler ¶
func NewNsqJobScheduler(c Configuration) *NsqJobScheduler
func (NsqJobScheduler) ScheduleJob ¶
func (j NsqJobScheduler) ScheduleJob(jobDescriptor JobDescriptor) error
type NsqWorker ¶
type NsqWorker struct {
Configuration Configuration
}
A worker which pulls jobs off of NSQ and processes them
func NewNsqWorker ¶
func NewNsqWorker(c Configuration) *NsqWorker
func (NsqWorker) HandleEvents ¶
func (n NsqWorker) HandleEvents()
type Processable ¶
type Processable interface { GetProcessingState() ProcessingState SetProcessingState(newState ProcessingState) RefreshFromDB(db couch.Database) error }
type ProcessingState ¶
type ProcessingState int
For objects that require processing, like Dataset objects, the ProcessingState helps track the current state of processing.
const ( Pending ProcessingState = iota Processing FinishedSuccessfully Failed )
func (ProcessingState) MarshalJSON ¶
func (p ProcessingState) MarshalJSON() ([]byte, error)
func (*ProcessingState) UnmarshalJSON ¶
func (p *ProcessingState) UnmarshalJSON(bytes []byte) error
Custom Unmarshal so that "Pending" is mapped to the numeric ProcessingState
type ResponseQueue ¶
type Runnable ¶
func CreateJob ¶
func CreateJob(config Configuration, jobDescriptor JobDescriptor) (Runnable, error)
Create a new job based on the Job Descriptor
type Solver ¶
type Solver struct { ElasticThoughtDoc DatasetId string `json:"dataset-id"` SpecificationUrl string `json:"specification-url" binding:"required"` SpecificationNetUrl string `json:"specification-net-url" binding:"required"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration // distinguish between IMAGE_DATA, LEVELDB, LMDB, etc. // this assumes that test and training input layers are // of the same layer type. LayerType LayerType }
A solver can generate trained models, which ban be used to make predictions
func NewSolver ¶
func NewSolver(config Configuration) *Solver
Create a new solver. If you don't use this, you must set the embedded ElasticThoughtDoc Type field.
func (Solver) DownloadSpecToBlobStore ¶
download contents of solver-spec-url into cbfs://<solver-id>/spec.prototxt and update solver object's solver-spec-url with cbfs url
func (Solver) Insert ¶
Insert into database (only call this if you know it doesn't arleady exist, or else you'll end up w/ unwanted dupes)
func (Solver) SaveTrainTestData ¶
func (s Solver) SaveTrainTestData(config Configuration, destDirectory string) (trainingLabelIndex []string, err error)
Download and untar the training and test .tar.gz files associated w/ solver, as well as index files.
Returns the label index (each label indexed by its numeric label id), and an error or nil
func (Solver) SpecificationNetUrlPath ¶
func (Solver) SpecificationUrlPath ¶
If spefication url is "cbfs://foo/bar.txt", return "/foo/bar.txt"
type TestDataset ¶
type TrainingDataset ¶
type TrainingJob ¶
type TrainingJob struct { ElasticThoughtDoc ProcessingState ProcessingState `json:"processing-state"` ProcessingLog string `json:"processing-log"` UserID string `json:"user-id"` SolverId string `json:"solver-id" binding:"required"` StdOutUrl string `json:"std-out-url"` StdErrUrl string `json:"std-err-url"` TrainedModelUrl string `json:"trained-model-url"` Labels []string `json:"labels"` // had to make exported, due to https://github.com/gin-gonic/gin/pull/123 // waiting for this to get merged into master branch, since go get // pulls from master branch. Configuration Configuration }
A training job represents a "training session" of a solver against training/test data
func NewTrainingJob ¶
func NewTrainingJob(c Configuration) *TrainingJob
Create a new training job. If you don't use this, you must set the embedded ElasticThoughtDoc Type field.
func (TrainingJob) Failed ¶
func (j TrainingJob) Failed(db couch.Database, processingErr error) error
Update the state to record that it failed Codereview: de-dupe
func (*TrainingJob) Find ¶
func (j *TrainingJob) Find(id string) error
Find a training job in the db with the given id, or return an error if not found
func (TrainingJob) FinishedSuccessfully ¶
func (j TrainingJob) FinishedSuccessfully(db couch.Database, logPath string) error
Update the state to record that it succeeded Codereview: de-dupe
func (*TrainingJob) GetProcessingState ¶
func (j *TrainingJob) GetProcessingState() ProcessingState
func (TrainingJob) Insert ¶
func (j TrainingJob) Insert(db couch.Database) (*TrainingJob, error)
Insert into database (only call this if you know it doesn't arleady exist, or else you'll end up w/ unwanted dupes) TODO: use same approach as Classifier#Insert() Codereview: de-dupe
func (*TrainingJob) RefreshFromDB ¶
func (j *TrainingJob) RefreshFromDB(db couch.Database) error
func (*TrainingJob) SetProcessingState ¶
func (j *TrainingJob) SetProcessingState(newState ProcessingState)
func (*TrainingJob) UpdateLabels ¶
func (j *TrainingJob) UpdateLabels(labels []string) (bool, error)
func (*TrainingJob) UpdateProcessingLog ¶
func (j *TrainingJob) UpdateProcessingLog(val string) (bool, error)
func (*TrainingJob) UpdateProcessingState ¶
func (j *TrainingJob) UpdateProcessingState(newState ProcessingState) (bool, error)
Update the processing state to new state.
type User ¶
type User struct { ElasticThoughtDoc Username string `json:"username"` Email string `json:"email"` Password string `json:"password"` }
An ElasticThought user.
func AuthenticateUser ¶
Does this username/password combo exist in the database? If so, return the user. If not, return an error.
func NewUserFromUser ¶
Create a new User based on values in another user
Source Files ¶
- bindata.go
- blobstore.go
- cbfs_blobstore.go
- cbfs_util.go
- changes_listener.go
- classifier.go
- classify_job.go
- configuration.go
- constants.go
- datafile.go
- datafile_downloader.go
- dataset.go
- dataset_splitter.go
- elasticthought.go
- endpoint_context.go
- environment_check.go
- filemap.go
- filesystem_blobstore.go
- inprocess_jobscheduler.go
- job_descriptor.go
- job_factory.go
- jobscheduler.go
- log.go
- middleware.go
- mockblobstore.go
- models.go
- nsq_jobscheduler.go
- nsq_worker.go
- os_util.go
- package.go
- processable.go
- processingstate.go
- runnable.go
- solver.go
- tar_util.go
- training_job.go
- user.go
Directories ¶
Path | Synopsis |
---|---|
Package caffe is a generated protocol buffer package.
|
Package caffe is a generated protocol buffer package. |
cli
|
|
elastic-thought
Command line utility to launch the ElasticThought REST API server.
|
Command line utility to launch the ElasticThought REST API server. |
worker
Command line utility to launch an ElasticThought worker
|
Command line utility to launch an ElasticThought worker |
docker
|
|