parser

package
v0.1.1-0...-c572a67 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 22, 2018 License: Apache-2.0 Imports: 16 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrEmptyXpath      = errors.New("empty xpath of node conf")
	ErrInvalidRuleType = errors.New("invalid rule_type of node conf")
	ErrEmptyRuleType   = errors.New("empty rule_type of node conf")
	ErrEmptyItemKey    = errors.New("empty item_key of node conf")
)
View Source
var Parsers = make(map[string]Parser)

Functions

func MakeAbsoluteUrl

func MakeAbsoluteUrl(href, baseurl string) (string, error)

func MatchRegex

func MatchRegex(content, pattern string) bool

func ParseHTML

func ParseHTML(content []byte, encoding string, options ...libxml2Parser.HTMLOption) (types.Document, error)

ParseHTML parses an HTML document. You can omit the options argument, or you can provide one bitwise-or'ed option

func ParseHTMLReader

func ParseHTMLReader(in io.Reader, encoding string, options ...libxml2Parser.HTMLOption) (types.Document, error)

ParseHTMLReader parses an HTML document. You can omit the options argument, or you can provide one bitwise-or'ed option

func ParseHTMLString

func ParseHTMLString(content string, encoding string, options ...libxml2Parser.HTMLOption) (types.Document, error)

ParseHTMLString parses an HTML document. You can omit the options argument, or you can provide one bitwise-or'ed option

func ParseRegex

func ParseRegex(content, pattern string) ([]string, error)

func UrlEncode

func UrlEncode(rawurl string) (string, error)

Types

type DOMNode

type DOMNode struct {
	Name string // always start with name root
	Node interface{}
	Item map[string]interface{}
}

type HtmlParser

type HtmlParser struct {
	Name string
}

func (HtmlParser) Parse

func (parser HtmlParser) Parse(
	page, pageUrl string,
	parseConf *types.ParseConf) ([]types.Task, []map[string]interface{}, error)

func (HtmlParser) String

func (parser HtmlParser) String() string

type JsonParser

type JsonParser struct {
	Name string
}

func (JsonParser) Parse

func (parser JsonParser) Parse(page, pageUrl string, parseConf *types.ParseConf) ([]types.Task, []map[string]interface{}, error)

func (JsonParser) String

func (parser JsonParser) String() string

type Parser

type Parser interface {
	String() string
	Parse(page, pageUrl string, parseConf *types.ParseConf) ([]types.Task, []map[string]interface{}, error)
}

func GetParser

func GetParser(name string) Parser

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL