o

package
v0.0.0-...-90d11f1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2021 License: Apache-2.0 Imports: 28 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CopyMap

func CopyMap(from map[string]string) map[string]string

CopyMap return copy of the provided map

func GetJpegDirPath

func GetJpegDirPath(sessionDirPath string) string

func GetMaskDirPath

func GetMaskDirPath(sessionDirPath string) string

func GetZipDirPath

func GetZipDirPath(sessionDirPath string) string

func IsSessionFile

func IsSessionFile(fileName string) bool

func MakeFlow

func MakeFlow(config Config) *flow.Flow

func RunFlow

func RunFlow(flow *flow.Flow, workers int,
	sessionDir, sourceFileName string,
	ocr OcrEngineType, ner NerEngineType) error

func RunFlowOnce

func RunFlowOnce(config Config, sessionDir, sourceFileName string, ocr OcrEngineType, ner NerEngineType) error

func ZipImagesFromDir

func ZipImagesFromDir(dirpath string) error

Types

type AnalyzeSpec

type AnalyzeSpec struct {
	Content  string    `json:"content"`
	Features []Feature `json:"features"`
}

type Config

type Config struct {
	// contains filtered or unexported fields
}

func NewConfig

func NewConfig(
	workers int,

	unoconvServiceURL string,

	tesseractOcrServiceURL string,
	tesseractPyOcrServiceURL string,
	easyocrOcrServiceURL string,
	yandexOcrURL, yandexOcrToken, yandexOcrFolderId string,

	nerStanzaServiceURL string,
	nerMironServiceURL string,
) Config

type Feature

type Feature struct {
	Type                string              `json:"type"`
	TextDetectionConfig TextDetectionConfig `json:"text_detection_config"`
}

type NerEngineType

type NerEngineType string
var (
	StanzaNER           NerEngineType = "ner-stanza"
	HFLabsNER           NerEngineType = "ner-miron"
	CombineNER          NerEngineType = "ner-combination"
	CombineNERWithDicts NerEngineType = "ner-combination-dicts"
)

func NerEngineFromString

func NerEngineFromString(in string) NerEngineType

type OcrEngineType

type OcrEngineType string
var (
	YandexOCR      OcrEngineType = "ocr-yandex"
	TesseractOCR   OcrEngineType = "ocr-tesseract"
	TesseractPyOCR OcrEngineType = "ocr-tesseract-py"
	EasyocrOCR     OcrEngineType = "ocr-easyocr"
)

func OcrEngineFromString

func OcrEngineFromString(in string) OcrEngineType

type Text

type Text struct {
	Results []struct {
		Results []struct {
			TextDetection struct {
				Pages []struct {
					Blocks []struct {
						BoundingBox struct {
							Vertices []struct {
								X string `json:"x"`
								Y string `json:"y"`
							} `json:"vertices"`
						} `json:"boundingBox"`
						Lines []struct {
							BoundingBox struct {
								Vertices []struct {
									X string `json:"x"`
									Y string `json:"y"`
								} `json:"vertices"`
							} `json:"boundingBox"`
							Words []struct {
								BoundingBox struct {
									Vertices []struct {
										X string `json:"x"`
										Y string `json:"y"`
									} `json:"vertices"`
								} `json:"boundingBox"`
								Languages []struct {
									LanguageCode string  `json:"languageCode"`
									Confidence   float64 `json:"confidence"`
								} `json:"languages"`
								Text        string  `json:"text"`
								Confidence  float64 `json:"confidence"`
								EntityIndex string  `json:"entityIndex"`
							} `json:"words"`
							Confidence float64 `json:"confidence"`
						} `json:"lines"`
					} `json:"blocks"`
					Width  string `json:"width"`
					Height string `json:"height"`
				} `json:"pages"`
			} `json:"textDetection"`
		} `json:"results"`
	} `json:"results"`
}

type TextDetectionConfig

type TextDetectionConfig struct {
	LanguageCodes []string `json:"language_codes"`
}

type YandexOCRRequest

type YandexOCRRequest struct {
	FolderID     string        `json:"folderId"`
	AnalyzeSpecs []AnalyzeSpec `json:"analyze_specs"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL