scrapy

package module
v0.0.0-...-d6eb851 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 7, 2020 License: Apache-2.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func RandomMobileUserAgent

func RandomMobileUserAgent(r *Request)

func RandomUserAgent

func RandomUserAgent(r *Request)

func Text

func Text(selection *goquery.Selection) string

Types

type ErrorCallback

type ErrorCallback func(*http.Response, error)

type Handler

type Handler struct {
	Priority int
	// contains filtered or unexported fields
}

func NewHandler

func NewHandler() *Handler

func (*Handler) AddRequest

func (h *Handler) AddRequest(req *Request)

func (*Handler) AddURL

func (h *Handler) AddURL(ctx context.Context, url string) error

func (*Handler) AddURLWithContext

func (h *Handler) AddURLWithContext(url string, ctx context.Context) error

func (*Handler) OnError

func (h *Handler) OnError(callback ErrorCallback)

func (*Handler) OnHtml

func (h *Handler) OnHtml(callback HtmlCallback)

func (*Handler) OnRequest

func (h *Handler) OnRequest(callback RequestCallback)

func (*Handler) OnResponse

func (h *Handler) OnResponse(callback ResponseCallback)

type HtmlCallback

type HtmlCallback func(*Request, *http.Response, *goquery.Document)

type Request

type Request struct {
	Request *http.Request
	Ctx     context.Context
	// contains filtered or unexported fields
}

Request...

type RequestCallback

type RequestCallback func(*Request)

type RequestQueue

type RequestQueue struct {
	// contains filtered or unexported fields
}

func NewQueue

func NewQueue(cap int) *RequestQueue

func (*RequestQueue) Len

func (r *RequestQueue) Len() int

func (*RequestQueue) PopRequest

func (r *RequestQueue) PopRequest() *Request

func (*RequestQueue) PushRequest

func (r *RequestQueue) PushRequest(req *Request)

type ResponseCallback

type ResponseCallback func(*http.Response)

type Scrapy

type Scrapy struct {
	Client  *http.Client
	Threads int
	Timeout time.Duration
	Queue   *RequestQueue
	// contains filtered or unexported fields
}

func NewScrapy

func NewScrapy(opts ...ScrapyOptions) *Scrapy

New...

func (*Scrapy) AddHandler

func (s *Scrapy) AddHandler(h *Handler)

func (*Scrapy) Run

func (s *Scrapy) Run()

func (*Scrapy) Shutdown

func (s *Scrapy) Shutdown()

func (*Scrapy) ShutdownAfter

func (s *Scrapy) ShutdownAfter(t time.Duration)

func (*Scrapy) Wait

func (s *Scrapy) Wait()

type ScrapyOptions

type ScrapyOptions func(*Scrapy)

ScrapyOptions...

func SetTimeout

func SetTimeout(timeout time.Duration) ScrapyOptions

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL