filter

package
v0.0.0-...-a75fe09 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 14, 2022 License: GPL-3.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Condition

type Condition struct {
	// contains filtered or unexported fields
}

Condition holds an optional key and value regex and determines if a value passes its requirements or not.

func NewCondition

func NewCondition(keyExpr *string, valueExpr *string) *Condition

func (*Condition) MatchOne

func (c *Condition) MatchOne(key *string, value any) bool

type ConditionInterpreter

type ConditionInterpreter interface {
	Interpret(data any) bool
}

ConditionInterpreter functions as a Mediator for Condition, allowing the data to be typed and matched.

type Criteria

type Criteria struct {
	Extractor Extractor

	Depth  int
	Parent *Criteria
	Child  *Criteria
	// contains filtered or unexported fields
}

Criteria defines if a set of data passes its requirements or not, and can optionally extract the matched data using Extractor.

func NewCriteria

func NewCriteria(extractor Extractor, interpreters ...ConditionInterpreter) *Criteria

func (*Criteria) Clone

func (c *Criteria) Clone() *Criteria

func (*Criteria) Match

func (c *Criteria) Match(data any) bool

func (*Criteria) Next

func (c *Criteria) Next() *Criteria

func (*Criteria) Previous

func (c *Criteria) Previous() *Criteria

type CriteriaBuilder

type CriteriaBuilder struct {
	// contains filtered or unexported fields
}

CriteriaBuilder simplifies the adding of children to a Criteria.

func NewCriteriaBuilder

func NewCriteriaBuilder(c *Criteria) *CriteriaBuilder

func (*CriteriaBuilder) AddChild

func (cb *CriteriaBuilder) AddChild(child *Criteria) *CriteriaBuilder

func (*CriteriaBuilder) Build

func (cb *CriteriaBuilder) Build() *Criteria

type Extractor

type Extractor interface {
	Extract(data any) map[string]any
}

Extractor attempts to map, organise and extract data and return it.

type Filter

type Filter interface {
	attribute.Taggable
	Clone() Filter
	Filter(s string) map[string]any
}

Filter defines the ability to filter a type of data potentially extract any desired results using Criteria.

type HtmlAttributeExtractor

type HtmlAttributeExtractor struct {
	Clean *func(data map[string]any) map[string]any
	// contains filtered or unexported fields
}

HtmlAttributeExtractor implements Extractor and extracts attributes from the given *html.Token. An optional Clean function can be provided which is run on the results found before they are returned.

func NewHtmlAttributeExtractor

func NewHtmlAttributeExtractor(keyExpr string, clean ...func(map[string]any) map[string]any) *HtmlAttributeExtractor

func (*HtmlAttributeExtractor) Extract

func (hae *HtmlAttributeExtractor) Extract(data any) map[string]any

type HtmlFilter

type HtmlFilter struct {
	*Tracker
}

HtmlFilter implements Filter and iterates over HTML using HtmlTokenIterator. As it walks through the HTML document it searches for any matching Criteria, and extracts any found data with if Criteria has an Extractor.

func NewHtmlFilter

func NewHtmlFilter(criteria ...*Criteria) *HtmlFilter

func (*HtmlFilter) Clone

func (hf *HtmlFilter) Clone() Filter

func (*HtmlFilter) Filter

func (hf *HtmlFilter) Filter(s string) map[string]any

Filter iterates over all tags within the given HTML, and applies Criteria for every found start tag. Any fully matched Criteria that have an Extractor will extract data from the matched tag and return it once the filter is finished.

type HtmlTextExtractor

type HtmlTextExtractor struct {
	Clean *func(data map[string]any) map[string]any
	// contains filtered or unexported fields
}

HtmlTextExtractor implements Extractor and extracts text from the *html.Token. An optional Clean function can be provided which is run on the results found before they are returned.

func NewHtmlTextExtractor

func NewHtmlTextExtractor(id string, clean ...func(data map[string]any) map[string]any) *HtmlTextExtractor

func (*HtmlTextExtractor) Extract

func (hte *HtmlTextExtractor) Extract(data any) map[string]any

type HtmlTokenAttributeInterpreter

type HtmlTokenAttributeInterpreter struct {
	// contains filtered or unexported fields
}

HtmlTokenAttributeInterpreter implements ConditionInterpreter and allows an instance of *html.Token to be parsed, and checks if any matching attributes are found.

func NewHtmlTokenAttributeInterpreter

func NewHtmlTokenAttributeInterpreter(keyExpr string, valueExpr string) *HtmlTokenAttributeInterpreter

func (*HtmlTokenAttributeInterpreter) Interpret

func (htai *HtmlTokenAttributeInterpreter) Interpret(data any) bool

Interpret implements ConditionInterpreter.Interpret.

type HtmlTokenIterator

type HtmlTokenIterator struct {
	// contains filtered or unexported fields
}

HtmlTokenIterator walks through a given HTML document using html.Tokenizer. The depth of the HTML document is also tracked and is available through Depth().

func (*HtmlTokenIterator) Depth

func (ti *HtmlTokenIterator) Depth() int

func (*HtmlTokenIterator) Next

func (ti *HtmlTokenIterator) Next() html.TokenType

func (*HtmlTokenIterator) Token

func (ti *HtmlTokenIterator) Token() html.Token

type HtmlTokenTagInterpreter

type HtmlTokenTagInterpreter struct {
	// contains filtered or unexported fields
}

HtmlTokenTagInterpreter implements ConditionInterpreter and allows an instance of *html.Token to be parsed, and checks if its tag matches or not.

func NewHtmlTokenTagInterpreter

func NewHtmlTokenTagInterpreter(expr string) *HtmlTokenTagInterpreter

func (*HtmlTokenTagInterpreter) Interpret

func (htti *HtmlTokenTagInterpreter) Interpret(data any) bool

Interpret implements ConditionInterpreter.Interpret.

type JsonFilter

type JsonFilter struct {
	*Tracker
}

JsonFilter implements Filter and iterates over the given JSON, As it walks over the given JSON it uses Criteria to search for any matching data. These matches are optionally extracted using Extractor and returned once the Filter has finished running.

func NewJsonFilter

func NewJsonFilter(criteria ...*Criteria) *JsonFilter

func (*JsonFilter) Clone

func (jf *JsonFilter) Clone() Filter

func (*JsonFilter) Filter

func (jf *JsonFilter) Filter(s string) map[string]any

func (*JsonFilter) Walk

func (jf *JsonFilter) Walk(js map[string]any, data map[string]any, depths ...int)

type KeyValueExtractor

type KeyValueExtractor struct {
	Clean *func(data map[string]any) map[string]any
	// contains filtered or unexported fields
}

KeyValueExtractor implements Extractor and extracts data based on the provided key and value regexes. An optional Clean function can be provided which is run on the results found before they are returned.

func NewKeyValueExtractor

func NewKeyValueExtractor(keyExpr string, valueExpr string, clean ...func(data map[string]any) map[string]any) *KeyValueExtractor

func (*KeyValueExtractor) Extract

func (kve *KeyValueExtractor) Extract(data any) map[string]any

type KeyValueInterpreter

type KeyValueInterpreter struct {
	// contains filtered or unexported fields
}

KeyValueInterpreter implements ConditionInterpreter and allows a key value pair in the format map[string]any to be parsed.

func NewKeyValueInterpreter

func NewKeyValueInterpreter(keyExpr string, valueExpr string) *KeyValueInterpreter

func (*KeyValueInterpreter) Interpret

func (kvi *KeyValueInterpreter) Interpret(data any) bool

Interpret implements ConditionInterpreter.Interpret.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager oversees all Filter instances manages workers to run them in using supervisor.Supervisor.

func NewManager

func NewManager(db *database.Db) *Manager

func (*Manager) RegisterFilter

func (m *Manager) RegisterFilter(f Filter)

func (*Manager) RegisterFilters

func (m *Manager) RegisterFilters(filters []Filter)

func (*Manager) Start

func (m *Manager) Start(amountOfWorkers int)

Start starts an amount of Filter workers based on the amountOfWorkers parameter. All data in the "scraped_data" table will be queued to be filtered.

type Tracker

type Tracker struct {
	*attribute.Tag
	// contains filtered or unexported fields
}

Tracker is used to track and manage a Filter's Criteria.

func NewFilterTracker

func NewFilterTracker(criteria []*Criteria) *Tracker

func (*Tracker) SetTag

func (tr *Tracker) SetTag(t *attribute.Tag)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL