fulltext

package
v0.0.0-...-7b63a7d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 10, 2024 License: Apache-2.0 Imports: 9 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func GetFulltext

func GetFulltext(bucket string, key string, format string, ftype string, context string) (string, error)

GetFulltext ...

Types

type PAGEXML1

type PAGEXML1 struct {
	XMLName  xml.Name         `xml:"PcGts"`
	Metadata PAGEXML1Metadata `xml:"Metadata"`
	Page     PAGEXML1Page     `xml:"Page"` // array?
}

PAGEXML1 ... gdz

type PAGEXML1Baseline

type PAGEXML1Baseline struct {
	XMLName xml.Name `xml:"Baseline"`
	Points  string   `xml:"points,attr"`
}

PAGEXML1Baseline ..

type PAGEXML1Border

type PAGEXML1Border struct {
	XMLName xml.Name       `xml:"Border"`
	Coords  PAGEXML1Coords `xml:"Coords"` // array?
}

PAGEXML1Border ..

type PAGEXML1Coords

type PAGEXML1Coords struct {
	XMLName xml.Name `xml:"Coords"`
	Points  string   `xml:"points,attr"`
}

PAGEXML1Coords ..

type PAGEXML1Metadata

type PAGEXML1Metadata struct {
	XMLName    xml.Name `xml:"Metadata"`
	Creator    string   `xml:"Creator"`
	Created    string   `xml:"Created"`
	LastChange string   `xml:"LastChange"`
}

PAGEXML1Metadata ..

type PAGEXML1OrderedGroup

type PAGEXML1OrderedGroup struct {
	XMLName xml.Name `xml:"OrderedGroup"`
	ID      string   `xml:"id,attr"`
	Caption string   `xml:"caption,attr"`

	RegionRefIndexed []PAGEXML1RegionRefIndexed `xml:"RegionRefIndexed"` // array?
}

PAGEXML1OrderedGroup ..

type PAGEXML1Page

type PAGEXML1Page struct {
	XMLName          xml.Name `xml:"Page"`
	ImageFilename    string   `xml:"imageFilename,attr"`
	ImageWidth       string   `xml:"imageWidth,attr"`
	ImageHeight      string   `xml:"imageHeight,attr"`
	ImageXResolution string   `xml:"imageXResolution,attr"`
	ImageYResolution string   `xml:"imageYResolution,attr"`
	Type             string   `xml:"type,attr"`

	Border       PAGEXML1Border       `xml:"Border"`
	ReadingOrder PAGEXML1ReadingOrder `xml:"ReadingOrder"`
	Relations    PAGEXML1Relations    `xml:"Relations"`
	TextRegion   PAGEXML1TextRegion   `xml:"TextRegion"`
}

PAGEXML1Page ..

type PAGEXML1Paragraph

type PAGEXML1Paragraph struct {
	XMLName xml.Name       `xml:"p"`
	ID      string         `xml:"id,attr"`
	Words   []PAGEXML1Word `xml:"w"`
}

PAGEXML1Paragraph ..

type PAGEXML1ReadingOrder

type PAGEXML1ReadingOrder struct {
	XMLName xml.Name `xml:"ReadingOrder"`

	OrderedGroup PAGEXML1OrderedGroup `xml:"OrderedGroup"` // array?
}

PAGEXML1ReadingOrder ..

type PAGEXML1RegionRefIndexed

type PAGEXML1RegionRefIndexed struct {
	XMLName   xml.Name `xml:"RegionRefIndexed"`
	Index     string   `xml:"index,attr"`
	RegionRef string   `xml:"regionRef,attr"`
}

PAGEXML1RegionRefIndexed ..

type PAGEXML1Relation

type PAGEXML1Relation struct {
	XMLName xml.Name `xml:"Relation"`
	ID      string   `xml:"id,attr"`
	Type    string   `xml:"type,attr"`

	SourceRegionRef []PAGEXML1SourceRegionRef `xml:"SourceRegionRef"`
	TargetRegionRef []PAGEXML1TargetRegionRef `xml:"TargetRegionRef"`
}

PAGEXML1Relation ..

type PAGEXML1Relations

type PAGEXML1Relations struct {
	XMLName xml.Name `xml:"Relations"`

	Relation PAGEXML1Relation `xml:"Relation"` // array?
}

PAGEXML1Relations ..

type PAGEXML1SourceRegionRef

type PAGEXML1SourceRegionRef struct {
	XMLName   xml.Name `xml:"SourceRegionRef"`
	RegionRef string   `xml:"regionRef,attr"`
}

PAGEXML1SourceRegionRef ..

type PAGEXML1TargetRegionRef

type PAGEXML1TargetRegionRef struct {
	XMLName   xml.Name `xml:"TargetRegionRef"`
	RegionRef string   `xml:"regionRef,attr"`
}

PAGEXML1TargetRegionRef ..

type PAGEXML1TextEquiv

type PAGEXML1TextEquiv struct {
	XMLName xml.Name `xml:"TextEquiv"`

	Unicode PAGEXML1Unicode `xml:"Unicode"` // array?
}

PAGEXML1TextEquiv ..

type PAGEXML1TextLine

type PAGEXML1TextLine struct {
	XMLName         xml.Name `xml:"TextLine"`
	ID              string   `xml:"id,attr"`
	PrimaryLanguage string   `xml:"primaryLanguage,attr"`

	Coords    PAGEXML1Coords    `xml:"Coords"`    // array?
	TextEquiv PAGEXML1TextEquiv `xml:"TextEquiv"` // array?
	Baseline  PAGEXML1Baseline  `xml:"Baseline"`  // array?

	Word PAGEXML1Word `xml:"Word"` // array?
}

PAGEXML1TextLine ..

index this Coords + TextEquiv to index lines

type PAGEXML1TextRegion

type PAGEXML1TextRegion struct {
	XMLName           xml.Name `xml:"TextRegion"`
	ID                string   `xml:"id,attr"`
	Type              string   `xml:"type,attr"`
	PrimaryLanguage   string   `xml:"primaryLanguage,attr"`
	SecondaryLanguage string   `xml:"secondaryLanguage,attr"`

	Coords    PAGEXML1Coords      `xml:"Coords"`    // array?
	TextLine  []PAGEXML1TextLine  `xml:"TextLine"`  // array?
	TextEquiv []PAGEXML1TextEquiv `xml:"TextEquiv"` // array?
}

PAGEXML1TextRegion ..

type PAGEXML1Unicode

type PAGEXML1Unicode struct {
	XMLName xml.Name `xml:"Unicode"`

	Value string `xml:",chardata"` // array?
}

PAGEXML1Unicode ..

type PAGEXML1Word

type PAGEXML1Word struct {
	XMLName  xml.Name `xml:"Word"`
	ID       string   `xml:"id,attr"`
	Language string   `xml:"language,attr"`

	Coords    PAGEXML1Coords    `xml:"Coords"`    // array?
	TextEquiv PAGEXML1TextEquiv `xml:"TextEquiv"` // array?
}

PAGEXML1Word ..

index this Coords + TextQuiv to index words

type TEI2

type TEI2 struct {
	XMLName   xml.Name   `xml:"TEI.2"`
	TEIHeader TEI2Header `xml:"teiHeader"`
	Text      TEI2Text   `xml:"text"`
}

TEI2 ... nlh-eai1, nlh-eai2

type TEI2Body

type TEI2Body struct {
	XMLName    xml.Name        `xml:"body"`
	Paragraphs []TEI2Paragraph `xml:"p"`
}

TEI2Body ..

type TEI2Header

type TEI2Header struct {
	XMLName xml.Name `xml:"teiHeader"`
}

TEI2Header ..

type TEI2Paragraph

type TEI2Paragraph struct {
	XMLName xml.Name   `xml:"p"`
	ID      string     `xml:"id,attr"`
	Words   []TEI2Word `xml:"w"`
}

TEI2Paragraph ..

type TEI2Text

type TEI2Text struct {
	XMLName xml.Name `xml:"text"`
	Body    TEI2Body `xml:"body"`
}

TEI2Text ..

type TEI2Word

type TEI2Word struct {
	XMLName xml.Name `xml:"w"`
	Style   string   `xml:"style,attr"`
	Value   string   `xml:",chardata"`
}

TEI2Word ..

type TEI2a

type TEI2a struct {
	XMLName   xml.Name    `xml:"TEI.2"`
	TEIHeader TEI2aHeader `xml:"teiHeader"`
	Text      TEI2aText   `xml:"text"`
}

TEI2a ... gdz

type TEI2aBody

type TEI2aBody struct {
	XMLName    xml.Name         `xml:"body"`
	Paragraphs []TEI2aParagraph `xml:"p"`
}

TEI2aBody ..

type TEI2aHeader

type TEI2aHeader struct {
	XMLName xml.Name `xml:"teiHeader"`
}

TEI2aHeader ..

type TEI2aParagraph

type TEI2aParagraph struct {
	XMLName xml.Name    `xml:"p"`
	ID      string      `xml:"id,attr"`
	Words   []TEI2aWord `xml:"w"`
}

TEI2aParagraph ..

type TEI2aText

type TEI2aText struct {
	XMLName xml.Name  `xml:"text"`
	Body    TEI2aBody `xml:"body"`
}

TEI2aText ..

type TEI2aWord

type TEI2aWord struct {
	XMLName  xml.Name `xml:"w"`
	Function string   `xml:"function,attr"`
	Value    string   `xml:",chardata"`
}

TEI2aWord ..

type TXT1

type TXT1 struct {
	XMLName  xml.Name          `xml:"articles"`
	Articles []TXT1ArticleInfo `xml:"artInfo"`
}

TXT1 ... nlh_tda1, nlh-tda2

type TXT1ArticleInfo

type TXT1ArticleInfo struct {
	XMLName     xml.Name    `xml:"artInfo"`
	ID          string      `xml:"id,attr"`
	ProductLink string      `xml:"ProductLink"`
	OCRText     TXT1OCRText `xml:"ocrText"`
}

TXT1ArticleInfo ...

type TXT1OCRText

type TXT1OCRText struct {
	XMLName xml.Name `xml:"ocrText"`
	Value   string   `xml:",chardata"`
}

TXT1OCRText ...

type TXT2

type TXT2 struct {
	XMLName   xml.Name   `xml:"page"`
	TXTHeader TXT2Header `xml:"pageInfo"`
	Text      TXT2Text   `xml:"pageContent"`
}

TXT2 ... nlh-ecc

type TXT2Header

type TXT2Header struct {
	XMLName    xml.Name `xml:"pageInfo"`
	PageID     string   `xml:"pageID"`
	RecordID   string   `xml:"recordID"`
	SourcePage string   `xml:"sourcePage"`
	OCR        string   `xml:"ocr"`
	ImageLink  string   `xml:"imageLink"`
}

TXT2Header ...

type TXT2Paragraph

type TXT2Paragraph struct {
	XMLName xml.Name   `xml:"p"`
	ID      string     `xml:"id,attr"`
	Words   []TXT2Word `xml:"w"`
}

TXT2Paragraph ...

type TXT2Text

type TXT2Text struct {
	XMLName    xml.Name        `xml:"pageContent"`
	Paragraphs []TXT2Paragraph `xml:"p"`
}

TXT2Text ...

type TXT2Word

type TXT2Word struct {
	XMLName xml.Name `xml:"wd"`
	Pos     string   `xml:"pos,attr"`
	Value   string   `xml:",chardata"`
}

TXT2Word ...

type TXT3

type TXT3 struct {
	XMLName  xml.Name      `xml:"page"`
	PageID   string        `xml:"pageid"`
	Articles []TXT3Article `xml:"article"`
}

TXT3 ... nlh-tls

type TXT3Article

type TXT3Article struct {
	XMLName xml.Name `xml:"article"`
	Type    string   `xml:"type,attr"`
	ID      string   `xml:"id"`
	Text    TXT3Text `xml:"text"`
}

TXT3Article ...

type TXT3Block

type TXT3Block struct {
	Pg TXT3PG  `xml:"pg"`
	P  []TXT3P `xml:"p"`
}

TXT3Block ...

type TXT3P

type TXT3P struct {
	XMLName xml.Name `xml:"p"`
	Type    string   `xml:"type,attr"`
	WDs     []TXT3WD `xml:"wd"`
}

TXT3P ...

type TXT3PG

type TXT3PG struct {
	XMLName xml.Name `xml:"pg"`
	Pgref   string   `xml:"pgref,attr"`
	Clipref string   `xml:"clipref,attr"`
	Pos     string   `xml:"pos,attr"`
}

TXT3PG ...

type TXT3Text

type TXT3Text struct {
	XMLName    xml.Name    `xml:"text"`
	TitleBlock TXT3Block   `xml:"text.title"`
	TextBlocks []TXT3Block `xml:"text.cr"`
}

TXT3Text ...

type TXT3WD

type TXT3WD struct {
	XMLName xml.Name `xml:"wd"`
	Pos     string   `xml:"pos,attr"`
	Value   string   `xml:",chardata"`
}

TXT3WD ...

type TXT3a

type TXT3a struct {
	XMLName xml.Name    `xml:"page"`
	PageID  string      `xml:"page-id"`
	IssueID string      `xml:"issue-id"`
	Texts   []TXT3aText `xml:"text"`
}

TXT3a ... nlh-ncn, nlh-bcn, nlh-bln

type TXT3aBlock

type TXT3aBlock struct {
	Pg TXT3aPG  `xml:"pg"`
	P  []TXT3aP `xml:"p"`
}

TXT3aBlock ...

type TXT3aP

type TXT3aP struct {
	XMLName xml.Name `xml:"p"`
	Type    string   `xml:"type,attr"`
	WDs     []TXT3WD `xml:"wd"`
}

TXT3aP ...

type TXT3aPG

type TXT3aPG struct {
	XMLName xml.Name `xml:"pg"`
	Pgref   string   `xml:"pgref,attr"`
	Clipref string   `xml:"clipref,attr"`
	Pos     string   `xml:"pos,attr"`
}

TXT3aPG ...

type TXT3aText

type TXT3aText struct {
	XMLName    xml.Name     `xml:"text"`
	TitleBlock TXT3aBlock   `xml:"text.title"`
	TextBlocks []TXT3aBlock `xml:"text.cr"`
}

TXT3aText ...

type TXT3aWD

type TXT3aWD struct {
	XMLName xml.Name `xml:"wd"`
	Pos     string   `xml:"pos,attr"`
	Value   string   `xml:",chardata"`
}

TXT3aWD ...

type TXT3b

type TXT3b struct {
	XMLName    xml.Name     `xml:"page"`
	PageID     string       `xml:"page-id"`
	IssueID    string       `xml:"issue-id"`
	TextBlocks []TXT3aBlock `xml:"text.cr"`
}

TXT3b ... nlh-bln

type TXT4

type TXT4 struct {
	XMLName xml.Name   `xml:"articles"`
	Texts   []TXT3Text `xml:"text"`
}

TXT4 ... nlh-ahn

type TXT5

type TXT5 struct {
	XMLName xml.Name `xml:"IMAGE"`
	Name    string   `xml:"NAME,attr"`
	WDs     []TXT5WD `xml:"WD"`
}

TXT5 ... nlh-nid

type TXT5WD

type TXT5WD struct {
	XMLName xml.Name `xml:"WD"`
	Pos     string   `xml:"POS,attr"`
	Value   string   `xml:",chardata"`
}

TXT5WD ...

type Word

type Word struct {
	Content string `xml:",chardata"`
}

Word ...

type XML

type XML struct {
	XMLName xml.Name `xml:"TEI.2"`
	Word    []string `xml:"text>body>p"` // `xml:"mdWrap>xmlData>mods>name"`
}

XML fulltext

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL