engine

package
v0.0.0-...-278ce41 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 28, 2024 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Store = &CrawlerStore{
	list: make([]*spider.Task, 0),
	Hash: make(map[string]*spider.Task),
}

Store 全局爬虫(蜘蛛)任务实例

Functions

func GetFields

func GetFields(taskName, ruleName string) []string

GetFields 获取任务规则的配置项。

Types

type Crawler

type Crawler struct {
	Visited     map[string]bool
	VisitedLock sync.Mutex
	// contains filtered or unexported fields
}

func NewEngine

func NewEngine(opts ...Option) (*Crawler, error)

func (*Crawler) CreateWork

func (e *Crawler) CreateWork()

func (*Crawler) HandleResult

func (e *Crawler) HandleResult()

func (*Crawler) HasVisited

func (e *Crawler) HasVisited(r *spider.Request) bool

func (*Crawler) Run

func (e *Crawler) Run(id string, cluster bool)

func (*Crawler) Schedule

func (e *Crawler) Schedule()

func (*Crawler) SetFailure

func (e *Crawler) SetFailure(req *spider.Request)

func (*Crawler) StoreVisited

func (e *Crawler) StoreVisited(reqs ...*spider.Request)

type CrawlerStore

type CrawlerStore struct {
	Hash map[string]*spider.Task
	// contains filtered or unexported fields
}

func (*CrawlerStore) Add

func (c *CrawlerStore) Add(task *spider.Task)

func (*CrawlerStore) AddJSTask

func (c *CrawlerStore) AddJSTask(m *spider.TaskModel)

type Option

type Option func(opts *options)

func WithFetcher

func WithFetcher(fetcher spider.Fetcher) Option

func WithLogger

func WithLogger(logger *zap.Logger) Option

func WithRegistryURL

func WithRegistryURL(registryURL string) Option

func WithScheduler

func WithScheduler(scheduler Scheduler) Option

func WithSeeds

func WithSeeds(seeds []*spider.Task) Option

func WithStorage

func WithStorage(s spider.Storage) Option

func WithWorkCount

func WithWorkCount(workCount int) Option

type Schedule

type Schedule struct {
	Logger *zap.Logger
	// contains filtered or unexported fields
}

func NewSchedule

func NewSchedule() *Schedule

func (*Schedule) Pull

func (s *Schedule) Pull() *spider.Request

func (*Schedule) Push

func (s *Schedule) Push(reqQueue ...*spider.Request)

func (*Schedule) Schedule

func (s *Schedule) Schedule()

type Scheduler

type Scheduler interface {
	Schedule()
	Push(reqQueue ...*spider.Request)
	Pull() *spider.Request
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL