engine

package
v0.0.0-...-caa0154 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2023 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Store = &CrawlerStore{
	list: []*spider.Task{},
	Hash: map[string]*spider.Task{},
}

Functions

func AddJsReq

func AddJsReq(jreq map[string]interface{}) []*spider.Request

AddJsReq 用于动态规则添加请求。

func AddJsReqs

func AddJsReqs(jsreqs []map[string]interface{}) []*spider.Request

func GetFields

func GetFields(taskName string, ruleName string) []string

Types

type Config

type Config struct {
	WorkCount int
	Fetcher   spider.Fetcher
	Logger    *zap.Logger
	Seeds     []*spider.Request
}

Config 配置选项

type Crawler

type Crawler struct {
	Visited     map[string]bool
	VisitedLock sync.Mutex
	// contains filtered or unexported fields
}

func NewEngine

func NewEngine(opts ...Option) (*Crawler, error)

func (*Crawler) CreateWork

func (c *Crawler) CreateWork()

func (*Crawler) HandleFailure

func (c *Crawler) HandleFailure(req *spider.Request)

func (*Crawler) HandleResult

func (c *Crawler) HandleResult()

func (*Crawler) HasVisited

func (c *Crawler) HasVisited(r *spider.Request) bool

func (*Crawler) Run

func (c *Crawler) Run(id string, cluster bool)

func (*Crawler) Schedule

func (c *Crawler) Schedule()

func (*Crawler) StoreVisited

func (c *Crawler) StoreVisited(reqs ...*spider.Request)

type CrawlerStore

type CrawlerStore struct {
	Hash map[string]*spider.Task
	// contains filtered or unexported fields
}

func (*CrawlerStore) Add

func (c *CrawlerStore) Add(task *spider.Task)

func (*CrawlerStore) AddJsTask

func (c *CrawlerStore) AddJsTask(m *spider.TaskModule)

type Option

type Option func(opts *options)

func WithFetcher

func WithFetcher(fetcher spider.Fetcher) Option

func WithLogger

func WithLogger(logger *zap.Logger) Option

func WithRegistryUrl

func WithRegistryUrl(url string) Option

func WithScheduler

func WithScheduler(scheduler Scheduler) Option

func WithSeeds

func WithSeeds(seed []*spider.Task) Option

func WithStorage

func WithStorage(s spider.Storage) Option

func WithWorkCount

func WithWorkCount(workCount int) Option

type Schedule

type Schedule struct {
	Logger *zap.Logger
	// contains filtered or unexported fields
}

Schedule 调度引擎

func NewSchedule

func NewSchedule() *Schedule

func (*Schedule) Pull

func (s *Schedule) Pull() *spider.Request

func (*Schedule) Push

func (s *Schedule) Push(reqs ...*spider.Request)

func (*Schedule) Schedule

func (s *Schedule) Schedule()

type Scheduler

type Scheduler interface {
	Schedule()
	Push(...*spider.Request)
	Pull() *spider.Request
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL