spider

package
v0.0.0-...-ce55088 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 8, 2022 License: MIT Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

Functions

This section is empty.

Types

type Context

type Context struct {
	Ctx      context.Context
	Cancel   context.CancelFunc
	Response *http.Response

	Temp map[string]interface{}
	// contains filtered or unexported fields
}

func (*Context) Abort

func (c *Context) Abort()

func (*Context) GetDom

func (c *Context) GetDom() (*goquery.Document, error)

type MiddlewareHandler

type MiddlewareHandler func(ctx *Context)

type MiddlewareHandlerErr

type MiddlewareHandlerErr func(ctx *Context) error

type Resource

type Resource struct {
	SpiderUniqueKey string
	*downloader.Request
}

func NewResource

func NewResource(key, rule, link string) Resource

type Schedule

type Schedule struct {
	ResourcePoolList chan Resource
	ConcurrentNum    int // 并发数量
	// contains filtered or unexported fields
}

func NewSchedule

func NewSchedule() *Schedule

func (*Schedule) AddResource

func (s *Schedule) AddResource(resource Resource) (err error)

func (*Schedule) Close

func (s *Schedule) Close()

func (*Schedule) Init

func (s *Schedule) Init()

func (*Schedule) Register

func (s *Schedule) Register(spider *Spider) *Schedule

func (*Schedule) UnReg

func (s *Schedule) UnReg(spider *Spider) *Schedule

type Spider

type Spider struct {
	UniqueKey string // 唯一标识符
	STATUS    uint   // 状态

	Downloader    downloader.Downloader          // 下载器
	RuleHandlers  map[string][]MiddlewareHandler // 规则中间件
	CloseCallback func(s *Spider)                // 回调关闭
	// contains filtered or unexported fields
}

func NewSpider

func NewSpider() *Spider

func (*Spider) SetConcurrent

func (s *Spider) SetConcurrent(num int) *Spider

func (*Spider) SetGlobalPreRun

func (s *Spider) SetGlobalPreRun(f MiddlewareHandlerErr) *Spider

func (*Spider) SetRules

func (s *Spider) SetRules(key string, h ...MiddlewareHandler) *Spider

SetRules 设置爬虫key=规则名

func (*Spider) SetTimeTicker

func (s *Spider) SetTimeTicker(num int) *Spider

SetTimeTicker 设置探活时间 默认十秒

func (*Spider) Start

func (s *Spider) Start()

func (*Spider) Stop

func (s *Spider) Stop()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL