Documentation ¶
Index ¶
- Constants
- func SavePlan(confHansardType HansardType, workingDir string, sourcePDFPath string, ...)
- func SetupSplitPlanFixture(testDir string, fixtureDir string, scenarioDir string, sourcePDFPath string, ...) error
- func Split(t string, c string) []string
- type HansardDocument
- func (hd *HansardDocument) Finalize()
- func (hd *HansardDocument) PersistForSplit(absoluteRawDataPath string) error
- func (hd *HansardDocument) ProcessLinesExcerpt(pageNum int, linesExcerpt []string) error
- func (hd *HansardDocument) ShowQuestions()
- func (hd *HansardDocument) ShowState()
- func (hd *HansardDocument) SplitPDFByQuestions() error
- func (hd *HansardDocument) String()
- type HansardPage
- type HansardQuestion
- type HansardType
- type PDFDocument
- type PDFPage
- type QuestionStatus
- type SplitHansardDocumentPlan
Constants ¶
View Source
const (
MaxLineProcessed = 7
)
Variables ¶
This section is empty.
Functions ¶
func SavePlan ¶
func SavePlan(confHansardType HansardType, workingDir string, sourcePDFPath string, hansardDoc *HansardDocument)
func SetupSplitPlanFixture ¶
func SetupSplitPlanFixture(testDir string, fixtureDir string, scenarioDir string, sourcePDFPath string, ht HansardType) error
Helper Functions for Split Testing
Types ¶
type HansardDocument ¶
type HansardDocument struct { ParliamentSession string // Get this from the front page cover .. or the reference lookup .. HansardType HansardType HansardQuestions []HansardQuestion // contains filtered or unexported fields }
func LoadSplitHansardDocPlanFromFile ¶
func LoadSplitHansardDocPlanFromFile(confHansardType HansardType, workingDir string, sourcePDFPath string) *HansardDocument
func NewHansardDocument ¶
func NewHansardDocument(pdfPath string) (*HansardDocument, error)
func (*HansardDocument) Finalize ¶
func (hd *HansardDocument) Finalize()
Finalize clean up all state and put it back into the structure TODO: Refactor this to be one clear structure
func (*HansardDocument) PersistForSplit ¶
func (hd *HansardDocument) PersistForSplit(absoluteRawDataPath string) error
func (*HansardDocument) ProcessLinesExcerpt ¶
func (hd *HansardDocument) ProcessLinesExcerpt(pageNum int, linesExcerpt []string) error
ProcessLinesExcerpt takes the extracted excerpt; and pull out all the metadata
func (*HansardDocument) ShowQuestions ¶
func (hd *HansardDocument) ShowQuestions()
func (*HansardDocument) ShowState ¶
func (hd *HansardDocument) ShowState()
Debug function to dump out final state; it should be cleared after all the run ..
func (*HansardDocument) SplitPDFByQuestions ¶
func (hd *HansardDocument) SplitPDFByQuestions() error
SplitPDFByQuestions output to actual PDF based on derived data
func (*HansardDocument) String ¶
func (hd *HansardDocument) String()
String to dump out the structure we have derived; ready to output to pdfcpu to split Command!
type HansardPage ¶
type HansardPage struct {
// contains filtered or unexported fields
}
type HansardQuestion ¶
type HansardQuestion struct { QuestionNum string PageNumStart int PageNumEnd int // contains filtered or unexported fields }
func NewHansardQuestion ¶
func NewHansardQuestion(pageNumStart int, possibleQuestionNum string) (*HansardQuestion, error)
type PDFDocument ¶
func NewPDFDoc ¶
func NewPDFDoc(sourcePath string) (*PDFDocument, error)
type QuestionStatus ¶
type QuestionStatus int
const ( QUESTION_NOT_SEEN QuestionStatus = iota QUESTION_SEEN QUESTION_EXTRACTED )
type SplitHansardDocumentPlan ¶
type SplitHansardDocumentPlan struct {
// contains filtered or unexported fields
}
func NewSplitHansardDocumentPlan ¶
func NewSplitHansardDocumentPlan(confHansardType HansardType, workingDir string, sourcePDFPath string) *SplitHansardDocumentPlan
func (*SplitHansardDocumentPlan) ExecuteSplit ¶
func (shdp *SplitHansardDocumentPlan) ExecuteSplit(label string, hq HansardQuestion)
Click to show internal directories.
Click to hide internal directories.