page

package
v0.6.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 16, 2018 License: MIT Imports: 7 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Coords added in v0.6.0

type Coords struct {
	Points []image.Point `xml:"points,attr"`
}

Coords are rectangles of points.

func (Coords) BoundingBox added in v0.6.0

func (p Coords) BoundingBox() image.Rectangle

BoundingBox returns the bounding box of the polygon.

func (*Coords) UnmarshalXML added in v0.6.0

func (c *Coords) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error

UnmarshalXML unmarshals a Coords instance.

type Glyph added in v0.6.0

type Glyph struct {
	TextRegionBase
}

Glyph is a single character in a word.

type Metadata added in v0.6.0

type Metadata map[string]string

Metadata defines

func (Metadata) UnmarshalXML added in v0.6.0

func (m Metadata) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error

UnmarshalXML unmarshals the Metadata of a PcGts structure.

type OrderedGroup added in v0.6.0

type OrderedGroup struct {
	ID               string `xml:"id,attr"`
	Caption          string `xml:"caption,attr"`
	RegionRefIndexed []RegionRefIndexed
}

OrderedGroup is a collection of regions.

type Page

type Page struct {
	ImageFilename string `xml:"imageFilename,attr"`
	ImageHeight   int    `xml:"imageHeight,attr"`
	ImageWidth    int    `xml:"imageWidth,attr"`
	Type          string `xml:"type,attr"`
	PrintSpace    PrintSpace
	ReadingOrder  ReadingOrder
	TextRegion    []TextRegion
}

Page is a page in a PcGts structure.

type PcGts added in v0.6.0

type PcGts struct {
	Attributes []xml.Attr
	Metadata   Metadata `xml:"Metadata"`
	Page       Page     `xml:"page"`
}

PcGts is the top level node of page XML files.

func OpenPcGts added in v0.6.0

func OpenPcGts(path string) (*PcGts, error)

OpenPcGts reads a new page xml file from the given file path.

func ReadPcGts added in v0.6.0

func ReadPcGts(r io.Reader) (*PcGts, error)

ReadPcGts reads a new page xml file from an input stream.

func (*PcGts) UnmarshalXML added in v0.6.0

func (p *PcGts) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error

UnmarshalXML unmarshals the top-level PcGts node of page xml files.

type PrintSpace added in v0.6.0

type PrintSpace struct {
	Coords Coords
}

PrintSpace defines the print space of a page.

type ReadingOrder added in v0.6.0

type ReadingOrder struct {
	OrderedGroup []OrderedGroup
}

ReadingOrder is a collection of ordered groups.

type RegionRefIndexed added in v0.6.0

type RegionRefIndexed struct {
	Index     int    `xml:"index,attr"`
	RegionRef string `xml:"regionRef,attr"`
}

RegionRefIndexed is a index region.

type TextEquiv added in v0.6.0

type TextEquiv struct {
	PlainText []string
	Unicode   []string
}

TextEquiv defines the text string of text regions.

type TextLine

type TextLine struct {
	TextRegionBase
	BaseLine Coords `xml:"Baseline"`
	Word     []Word
}

TextLine is a line of text in a text region.

type TextRegion added in v0.5.0

type TextRegion struct {
	TextRegionBase
	TextLine []TextLine
}

TextRegion is a region of text (paragraph, block, ...)

type TextRegionBase added in v0.6.0

type TextRegionBase struct {
	Type      string `xml:"type,attr"`
	ID        string `xml:"id,attr"`
	Custom    string `xml:"custom,attr"`
	Coords    Coords
	TextStyle TextStyle
	TextEquiv TextEquiv
}

TextRegionBase defines the base data structure for all text regions (TextRegion, Line, Word, Glyph) in a page XML document.

type TextStyle added in v0.6.0

type TextStyle struct {
	FontFamaily  string  `xml:"fontFamily,attr"`
	Serif        bool    `xml:"serif,attr"`
	Monospace    bool    `xml:"monospace,attr"`
	FontSize     float32 `xml:"fontSize,attr"`
	Kerning      int     `xml:"kerning,attr"`
	TextColor    string  `xml:"textColour,attr"`
	TextColorRGB int     `xml:"textColourRgb,attr"`
}

TextStyle specifies font information of any text region.

type Word

type Word struct {
	TextRegionBase
	Glyph []Glyph
}

Word is a token in a line.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL