varys

package module
v0.0.0-...-face1fc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 15, 2015 License: Apache-2.0 Imports: 15 Imported by: 0

README

varys

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrRunning = errors.New("already running")
)

Error var

Functions

This section is empty.

Types

type Crawler

type Crawler struct {
	Logger logo.Logger
	// contains filtered or unexported fields
}

Crawler struct

func NewCrawler

func NewCrawler(opts Options, queue Queue, fetcher Fetcher) (*Crawler, error)

NewCrawler creates a new instance of Crawler.

func (*Crawler) Crawl

func (c *Crawler) Crawl(startURLs ...string) error

Crawl starts crawling with these start URLs.

func (*Crawler) RegisterSpider

func (c *Crawler) RegisterSpider(spider Spider, ms ...SpiderMiddleware)

RegisterSpider registers spider and its middlewares.

func (*Crawler) Running

func (c *Crawler) Running() bool

Running returns crawler's running status.

type Fetcher

type Fetcher interface {
	Fetch(url string) (body []byte, err error)
}

Fetcher interface

func NewFetcher

func NewFetcher(opts FetcherOptions) Fetcher

NewFetcher creates a new Fetcher instance.

type FetcherOptions

type FetcherOptions struct {
	Timeout time.Duration
	Prepare func(*http.Request)
}

FetcherOptions struct

type Options

type Options struct {
	MaxDepth int
	SleptMin int
	SleptMax int
}

Options crawler options.

type Queue

type Queue interface {
	Enqueue(urls ...string) error
	Dequeue() (string, error)

	Repaire() error

	DoneURL(url string) error
	RetryURL(url string) error

	FailedURLs() []string

	Cleanup()
}

Queue interface

type RedisQueue

type RedisQueue struct {
	QueueReady   string
	QueuePending string
	QueueDone    string
	QueueFailed  string
	// contains filtered or unexported fields
}

RedisQueue is an redis-based implementation of Queue interface.

func NewRedisQueue

func NewRedisQueue(url, password, prefix string) *RedisQueue

NewRedisQueue creates a new RedisQueue instance.

func (*RedisQueue) Cleanup

func (q *RedisQueue) Cleanup()

func (*RedisQueue) Dequeue

func (q *RedisQueue) Dequeue() (url string, err error)

func (*RedisQueue) DoneURL

func (q *RedisQueue) DoneURL(url string) error

func (*RedisQueue) Enqueue

func (q *RedisQueue) Enqueue(urls ...string) error

Enqueue adds urls into ready queue.

func (*RedisQueue) FailedURLs

func (q *RedisQueue) FailedURLs() []string

func (*RedisQueue) Repaire

func (q *RedisQueue) Repaire() error

func (*RedisQueue) RetryURL

func (q *RedisQueue) RetryURL(url string) error

type Spider

type Spider interface {
	Parse(crawler *Crawler, url string, r io.Reader, err error) ([]string, error)
}

Spider interface.

func ReduceSpideMiddlewares

func ReduceSpideMiddlewares(spider Spider, ms ...SpiderMiddleware) Spider

ReduceSpideMiddlewares merges multi SpiderMiddlewares and a spider into a new Spider.

type SpiderFunc

type SpiderFunc func(crawler *Crawler, url string, r io.Reader, err error) ([]string, error)

SpiderFunc type Spider.

func (SpiderFunc) Parse

func (sf SpiderFunc) Parse(crawler *Crawler, url string, r io.Reader, err error) ([]string, error)

Parse implements Spider interface.

type SpiderMiddleware

type SpiderMiddleware func(Spider) Spider

SpiderMiddleware type.

type URLFetcher

type URLFetcher struct {
	// contains filtered or unexported fields
}

URLFetcher struct

func (*URLFetcher) Fetch

func (f *URLFetcher) Fetch(url string) (body []byte, err error)

Fetch web page from url.

type Wrapper

type Wrapper struct {
	sync.WaitGroup
}

Wrapper is a wrapper of sync.WaitGroup.

func (*Wrapper) Wrap

func (w *Wrapper) Wrap(fn func())

Wrap executes function in a context of WaitGroup, calls Add before func and calls Done after func.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL