pdfutils

package
v0.0.0-...-7e443b9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 13, 2023 License: AGPL-3.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	Highlight   string = "highlight"
	Strike             = "strike"
	Underline          = "underline"
	Text               = "text"
	Rectangle          = "rectangle"
	Image              = "image"
	Unsupported        = "unsupported"
)

Variables

This section is empty.

Functions

func ApplyPageRotation

func ApplyPageRotation(page *model.PdfPage, rect []float64) []float64

func CheckForTesseract

func CheckForTesseract(path string) bool

func CondenseSpaces

func CondenseSpaces(str string) string

func CropImage

func CropImage(img *image.Image, crop image.Rectangle) (image.Image, error)

func DeHyphen

func DeHyphen(str string) string

func ExpandLigatures

func ExpandLigatures(str string) string

func GetAnnotationColor

func GetAnnotationColor(annotation *model.PdfAnnotation) string

func GetAnnotationColorCategory

func GetAnnotationColorCategory(annotation *model.PdfAnnotation) string

func GetAnnotationDate

func GetAnnotationDate(annot *model.PdfAnnotation) *time.Time

func GetAnnotationID

func GetAnnotationID(ids map[string]bool, pageIndex int, x float64, y float64, annotType string) string

func GetAnnotationRects

func GetAnnotationRects(page *model.PdfPage, annotation *model.PdfAnnotation) []r2.Rect

func GetAnnotationSortKey

func GetAnnotationSortKey(page int, offset int, top int) string

func GetAnnotationType

func GetAnnotationType(t interface{}) string

func GetBoundsFromAnnotMarks

func GetBoundsFromAnnotMarks(annotRect r2.Rect, markRects []r2.Rect) (r2.Rect, int)

func GetClosestMark

func GetClosestMark(x float64, y float64, markRects []r2.Rect) int

func GetCoordinates

func GetCoordinates(annotation *model.PdfAnnotation) (float64, float64)

func GetFallbackText

func GetFallbackText(text string, annotRect r2.Rect, markRects []r2.Rect, marks []extractor.TextMark) string

func GetMarkRect

func GetMarkRect(mark extractor.TextMark) r2.Rect

func GetMediaBox

func GetMediaBox(page *model.PdfPage) *model.PdfRectangle

func GetQuadPoint

func GetQuadPoint(annotation *model.PdfAnnotation) *core.PdfObjectArray

func GetTextByAnnotBounds

func GetTextByAnnotBounds(fitzDoc *fitz.Document, pageIndex int, page *model.PdfPage, bounds r2.Rect) (string, error)

func HandleImageOCR

func HandleImageOCR(
	page *model.PdfPage,
	ocrImg *image.Image,
	annotRect []float64,
	tessPath string,
	lang string,
	dataDir string,
) string

func IsWithinOverlapThresh

func IsWithinOverlapThresh(annot r2.Rect, mark r2.Rect, thresh float64) bool

func OCRImage

func OCRImage(img image.Image, tessPath, lang, dataDir string) (string, error)

func PDFObjToColorCategory

func PDFObjToColorCategory(c core.PdfObject) string

func PDFObjToHex

func PDFObjToHex(c core.PdfObject) string

func RemoveNul

func RemoveNul(str string) string

func ShouldUseFallback

func ShouldUseFallback(str string, fallback string) bool

func ValidateLang

func ValidateLang(tessPath, code string) bool

func WriteImage

func WriteImage(img *image.Image, name string, format string, quality int) error

Types

type Annotation

type Annotation struct {
	AnnotatedText string  `json:"annotatedText,omitempty"`
	Color         string  `json:"color,omitempty"`
	ColorCategory string  `json:"colorCategory,omitempty"`
	Comment       string  `json:"comment,omitempty"`
	Date          string  `json:"date,omitempty"`
	ID            string  `json:"id"`
	ImagePath     string  `json:"imagePath,omitempty"`
	OCRText       string  `json:"ocrText,omitempty"`
	Page          int     `json:"page"`
	PageLabel     string  `json:"pageLabel"`
	Type          string  `json:"type"`
	X             float64 `json:"x"`
	Y             float64 `json:"y"`
	SortIndex     string  `json:"-"`
}

func HandleImageAnnot

func HandleImageAnnot(args ImageAnnotArgs) (*Annotation, error)

type BySortIndex

type BySortIndex []*Annotation

func (BySortIndex) Len

func (a BySortIndex) Len() int

func (BySortIndex) Less

func (a BySortIndex) Less(i, j int) bool

func (BySortIndex) Swap

func (a BySortIndex) Swap(i, j int)

type ImageAnnotArgs

type ImageAnnotArgs struct {
	Page            *model.PdfPage
	PageImg         *image.Image
	OCRImg          *image.Image
	PageIndex       int
	Annotation      *model.PdfAnnotation
	X               float64
	Y               float64
	ID              string
	Write           bool
	AttemptOCR      bool
	ImageOutputPath string
	ImageBaseName   string
	ImageFormat     string
	ImageQuality    int
	TessPath        string
	TessLang        string
	TessDataDir     string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL