Documentation ¶
Index ¶
- Constants
- Variables
- func CheckForAcceptRequest(urlQueue string, urlStat string, statusChanged bool) bool
- func GenerateLandingPage() string
- func InstrumentHttpStatusHandler(ocrHttpHandler *OcrHTTPStatusHandler) http.Handler
- func SetResManagerState(ampqAPIConfig RabbitConfig)
- type ConvertPdf
- type FlagFunction
- type FlagFunctionWorker
- type IdentityPreprocessor
- type MockEngine
- type OcrEngine
- type OcrEngineType
- type OcrHTTPStatusHandler
- type OcrHttpMultipartHandler
- type OcrHttpStatusHandler
- type OcrQueueManager
- type OcrRequest
- type OcrResult
- type OcrRpcClient
- type OcrRpcWorker
- type Preprocessor
- type PreprocessorRpcWorker
- type RabbitConfig
- type SandwichEngine
- type SandwichEngineArgs
- type StrokeWidthTransformer
- type TesseractEngine
- type TesseractEngineArgs
- type WorkerConfig
Constants ¶
const ( EngineTesseract = OcrEngineType(iota) EngineGoTesseract EngineSandwichTesseract EngineMock )
const MOCK_ENGINE_RESPONSE = "mock engine decoder response"
const PreprocessorConvertPdf = "convert-pdf"
const PreprocessorIdentity = "identity"
const PreprocessorStrokeWidthTransform = "stroke-width-transform"
Variables ¶
var ( // AppStop and ServiceCanAccept are global. Used to set the flag for logging and stopping the application AppStop bool ServiceCanAccept bool ServiceCanAcceptMu sync.Mutex )
var ( // Requests is for holding and monitoring queued requests Requests = make(map[string]chan OcrResult) )
var ( // StopChan is used to gracefully stop http daemon StopChan = make(chan bool, 1) )
Functions ¶
func CheckForAcceptRequest ¶
checks if resources for incoming request are available
func GenerateLandingPage ¶
func GenerateLandingPage() string
GenerateLandingPage will generate a simple landing page
func InstrumentHttpStatusHandler ¶
func InstrumentHttpStatusHandler(ocrHttpHandler *OcrHTTPStatusHandler) http.Handler
InstrumentHttpStatusHandler wraps httpHandler to provide prometheus metrics
func SetResManagerState ¶
func SetResManagerState(ampqAPIConfig RabbitConfig)
SetResManagerState returns boolean value of resource manager; if memory of rabbitMQ and the number messages is not exceeding the limit
Types ¶
type ConvertPdf ¶
type ConvertPdf struct { }
type FlagFunction ¶
type FlagFunction func()
func NoOpFlagFunction ¶
func NoOpFlagFunction() FlagFunction
type FlagFunctionWorker ¶
type FlagFunctionWorker func()
FlagFunctionWorker will be used as argument type for DefaultConfigFlagsWorkerOverride
func NoOpFlagFunctionWorker ¶
func NoOpFlagFunctionWorker() FlagFunctionWorker
NoOpFlagFunctionWorker will return an empty set of cli parameters. In this case default parameter will be used
type IdentityPreprocessor ¶
type IdentityPreprocessor struct { }
type MockEngine ¶
type MockEngine struct { }
func (MockEngine) ProcessRequest ¶
func (m MockEngine) ProcessRequest(ocrRequest OcrRequest, workerConfig WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type OcrEngine ¶
type OcrEngine interface {
ProcessRequest(ocrRequest OcrRequest, workerConfig WorkerConfig) (OcrResult, error)
}
func NewOcrEngine ¶
func NewOcrEngine(engineType OcrEngineType) OcrEngine
type OcrEngineType ¶
type OcrEngineType int
func (OcrEngineType) String ¶
func (e OcrEngineType) String() string
func (*OcrEngineType) UnmarshalJSON ¶
func (e *OcrEngineType) UnmarshalJSON(b []byte) (err error)
type OcrHTTPStatusHandler ¶
type OcrHTTPStatusHandler struct {
RabbitConfig RabbitConfig
}
OcrHTTPStatusHandler is for initial handling of ocr request
func NewOcrHttpHandler ¶
func NewOcrHttpHandler(r RabbitConfig) *OcrHTTPStatusHandler
func (*OcrHTTPStatusHandler) ServeHTTP ¶
func (s *OcrHTTPStatusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrHttpMultipartHandler ¶
type OcrHttpMultipartHandler struct {
RabbitConfig RabbitConfig
}
func NewOcrHttpMultipartHandler ¶
func NewOcrHttpMultipartHandler(r RabbitConfig) *OcrHttpMultipartHandler
func (*OcrHttpMultipartHandler) ServeHTTP ¶
func (s *OcrHttpMultipartHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrHttpStatusHandler ¶
type OcrHttpStatusHandler struct { }
func NewOcrHttpStatusHandler ¶
func NewOcrHttpStatusHandler() *OcrHttpStatusHandler
func (*OcrHttpStatusHandler) ServeHTTP ¶
func (s *OcrHttpStatusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrQueueManager ¶
type OcrQueueManager struct { NumMessages uint `json:"messages"` NumConsumers uint `json:"consumers"` MessageBytes uint `json:"message_bytes"` }
OcrQueueManager is used as a main component of resource manager
type OcrRequest ¶
type OcrRequest struct { ImgUrl string `json:"img_url"` ImgBase64 string `json:"img_base64"` EngineType OcrEngineType `json:"engine"` ImgBytes []byte `json:"img_bytes"` PreprocessorChain []string `json:"preprocessors"` PreprocessorArgs map[string]interface{} `json:"preprocessor-args"` EngineArgs map[string]interface{} `json:"engine_args"` Deferred bool `json:"deferred"` ReplyTo string `json:"reply_to"` DocType string `json:"doc_type"` RequestID string `json:"req_id"` PageNumber uint16 `json:"page_number"` UserAgent string `json:"user_agent"` TimeOut uint `json:"time_out"` ReferenceID string `json:"reference_id"` // decode ocr in http handler rather than putting in queue InplaceDecode bool `json:"inplace_decode"` }
func (OcrRequest) String ¶
func (ocrRequest OcrRequest) String() string
type OcrResult ¶
type OcrResult struct { Text string `json:"text"` Status string `json:"status"` ID string `json:"id"` }
func CheckOcrStatusByID ¶
CheckOcrStatusByID checks status of an ocr request based on origin of request
func HandleOcrRequest ¶
func HandleOcrRequest(ocrRequest OcrRequest, workerConfig RabbitConfig) (OcrResult, error)
HandleOcrRequest will process incoming OCR request by routing it through the whole process chain
type OcrRpcClient ¶
type OcrRpcClient struct {
// contains filtered or unexported fields
}
func NewOcrRpcClient ¶
func NewOcrRpcClient(rc RabbitConfig) (*OcrRpcClient, error)
func (*OcrRpcClient) DecodeImage ¶
func (c *OcrRpcClient) DecodeImage(ocrRequest OcrRequest, requestID string) (OcrResult, error)
DecodeImage is the main function to do a ocr on incoming request. It's handling the parameter and the whole workflow
type OcrRpcWorker ¶
type OcrRpcWorker struct { Done chan error // contains filtered or unexported fields }
func NewOcrRpcWorker ¶
func NewOcrRpcWorker(wc WorkerConfig) (*OcrRpcWorker, error)
NewOcrRpcWorker is needed to establish a connection to a message broker
func (OcrRpcWorker) Run ¶
func (w OcrRpcWorker) Run() error
func (*OcrRpcWorker) Shutdown ¶
func (w *OcrRpcWorker) Shutdown() error
type Preprocessor ¶
type Preprocessor interface {
// contains filtered or unexported methods
}
type PreprocessorRpcWorker ¶
type PreprocessorRpcWorker struct { Done chan error // contains filtered or unexported fields }
func NewPreprocessorRpcWorker ¶
func NewPreprocessorRpcWorker(rc RabbitConfig, preprocessor string) (*PreprocessorRpcWorker, error)
func (PreprocessorRpcWorker) Run ¶
func (w PreprocessorRpcWorker) Run() error
func (*PreprocessorRpcWorker) Shutdown ¶
func (w *PreprocessorRpcWorker) Shutdown() error
type RabbitConfig ¶
type RabbitConfig struct { AmqpURI string Exchange string ExchangeType string RoutingKey string Reliable bool AmqpAPIURI string APIPathQueue string APIQueueName string APIPathStats string QueuePrio map[string]uint8 QueuePrioArg string /* ResponseCacheTimeout sets default(!!!) global timeout in seconds for request engine will be killed after reaching the time limit, user will get timeout error */ ResponseCacheTimeout uint // MaximalResponseCacheTimeout client won't be able set the ResponseCacheTimeout higher of it's value MaximalResponseCacheTimeout uint FactorForMessageAccept uint // contains filtered or unexported fields }
func DefaultConfigFlagsOverride ¶
func DefaultConfigFlagsOverride(flagFunction FlagFunction) RabbitConfig
func DefaultTestConfig ¶
func DefaultTestConfig() RabbitConfig
type SandwichEngine ¶
type SandwichEngine struct { }
This variant of the SandwichEngine calls pdfsandwich via exec This implementation returns either the pdf with ocr layer only or merged variant of pdf plus ocr layer with the ability to optimize the output pdf file by calling "gs" tool
func (SandwichEngine) ProcessRequest ¶
func (t SandwichEngine) ProcessRequest(ocrRequest OcrRequest, workerConfig WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type SandwichEngineArgs ¶
type SandwichEngineArgs struct {
// contains filtered or unexported fields
}
func NewSandwichEngineArgs ¶
func NewSandwichEngineArgs(ocrRequest OcrRequest, workerConfig WorkerConfig) (*SandwichEngineArgs, error)
NewSandwichEngineArgs generates arguments for SandwichEngine which will be used to start involved tools
func (SandwichEngineArgs) Export ¶
func (t SandwichEngineArgs) Export() []string
return a slice that can be passed to tesseract binary as command line args, eg, ["-c", "tessedit_char_whitelist=0123456789", "-c", "foo=bar"]
type StrokeWidthTransformer ¶
type StrokeWidthTransformer struct { }
type TesseractEngine ¶
type TesseractEngine struct { }
This variant of the TesseractEngine calls tesseract via exec
func (TesseractEngine) ProcessRequest ¶
func (t TesseractEngine) ProcessRequest(ocrRequest OcrRequest, workerConfig WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type TesseractEngineArgs ¶
type TesseractEngineArgs struct {
// contains filtered or unexported fields
}
func NewTesseractEngineArgs ¶
func NewTesseractEngineArgs(ocrRequest OcrRequest) (*TesseractEngineArgs, error)
func (TesseractEngineArgs) Export ¶
func (t TesseractEngineArgs) Export() []string
return a slice that can be passed to tesseract binary as command line args, eg, ["-c", "tessedit_char_whitelist=0123456789", "-c", "foo=bar"]
type WorkerConfig ¶
type WorkerConfig struct { AmqpURI string Exchange string ExchangeType string RoutingKey string Reliable bool AmqpAPIURI string APIPathQueue string APIQueueName string APIPathStats string SaveFiles bool Debug bool Tiff2pdfConverter string }
WorkerConfig will be passed to ocr engines and is used to establish connection to a message broker
func DefaultConfigFlagsWorkerOverride ¶
func DefaultConfigFlagsWorkerOverride(flagFunction FlagFunctionWorker) (WorkerConfig, error)
func DefaultWorkerConfig ¶
func DefaultWorkerConfig() WorkerConfig
DefaultWorkerConfig will set the default set of worker parameters which are needed for testing and connecting to a broker
Source Files ¶
- convert-pdf.go
- generate_landing_page.go
- mock_engine.go
- ocr_engine.go
- ocr_http_handler.go
- ocr_http_multipart_handler.go
- ocr_http_status_handler.go
- ocr_postback_client.go
- ocr_request.go
- ocr_res_manager.go
- ocr_rpc_client.go
- ocr_rpc_worker.go
- ocr_util.go
- preprocessor.go
- preprocessor_rpc_worker.go
- prometheus_metrics.go
- rabbit_config.go
- sandwich_engine.go
- stroke_width_transform.go
- tesseract_engine.go
- worker_config.go