scraper

package
v0.0.0-...-28926d1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 8, 2024 License: MIT Imports: 11 Imported by: 0

Documentation

Overview

DO NOT EDIT

Index

Constants

View Source
const (
	HTML      = "html"
	JSON      = "json"
	JSONXpath = "json.xpath"
	XML       = "xml"
	XPATH     = "xpath"
)
View Source
const (
	FieldTypeCommon = ""
	FieldTypeObject = "object"
	FieldTypeArray  = "array"
)
View Source
const (
	TaskTypeSource        = "source"
	TaskTypeRequest       = "request"
	TaskTypeQuery         = "query"
	DefaultRequestTimeout = 5 * time.Second

	DefaultAcceptLanguage = "en-US;q=0.6,en;q=0.4"
	DefaultEncoding       = "utf-8"
)

Variables

View Source
var (
	ErrDocumentIsNil = errors.New("document is nil")
)

Functions

This section is empty.

Types

type Args

type Args struct {
	ResultBuilder *ResultBuilder
	ExecuteTime   time.Duration
	// contains filtered or unexported fields
}

Args is a struct for passing arguments to scraper.

func NewArgs

func NewArgs(variables Variables, httpClient *http.Client) *Args

type Field

type Field struct {
	Path string `json:"path,omitempty"`
	Type string `json:"type,omitempty"`

	Query string `json:"query,omitempty"` // XPath or JSONPath or CSS selector

	// Element is used for FieldTypeArray
	Element *Field `json:"element,omitempty"`

	// Object is used for FieldTypeObject
	Object []*Field `json:"object,omitempty"`

	// Transformers is a list of transformers to apply to the field.
	Transformers transform.Transformers `json:"transformers,omitempty"`
}

Field describes a field in result.

type ResultBuilder

type ResultBuilder struct {
	// contains filtered or unexported fields
}

func NewResultBuilder

func NewResultBuilder() *ResultBuilder

func (*ResultBuilder) GetData

func (b *ResultBuilder) GetData() []byte

func (*ResultBuilder) GetString

func (b *ResultBuilder) GetString() string

func (*ResultBuilder) Set

func (b *ResultBuilder) Set(path string, value interface{}, transformers ...transform.Transformer)

type Scraper

type Scraper struct {
	Code  string `json:"code,omitempty"`
	Tasks []Task `json:"tasks,omitempty"`
}

Scraper can scrape data from delivery service.

func (*Scraper) Scrape

func (s *Scraper) Scrape(args *Args) error

type Task

type Task struct {
	Type    string                 `json:"type,omitempty"`
	Payload string                 `json:"payload,omitempty"`
	Params  map[string]interface{} `json:"params,omitempty"`
	Field   Field                  `json:"field,omitempty"`
}

func (*Task) Process

func (t *Task) Process(args *Args) error

func (*Task) Query

func (t *Task) Query(args *Args) error

Query parses document with xpath or jsonpath or css selector.

func (*Task) Request

func (t *Task) Request(args *Args) error

Request makes http request.

func (*Task) Source

func (t *Task) Source(args *Args) error

Source creates document from payload.

type Variables

type Variables map[string]string

func (Variables) ReplaceStringFromVariables

func (v Variables) ReplaceStringFromVariables(str string) string

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL