nami

package module
v0.0.0-...-05d773b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 21, 2020 License: MIT Imports: 11 Imported by: 0

README

nami

Simple Crawler, BFS

graphic

Todo
  • proxy
  • context
  • optimize fetch
  • error handling
  • logger, middleware

Credits

Library/Resource Use
github.com/gocolly/colly random user agent
github.com/modood/Administrative-divisions-of-China example

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Context

type Context struct {
	MaxDepth int
	// contains filtered or unexported fields
}

Context provides a tiny layer for passing data between callbacks

func NewContext

func NewContext() *Context

NewContext initializes a new Context instance

func (*Context) ForEach

func (c *Context) ForEach(fn func(k string, v interface{}) interface{}) []interface{}

ForEach iterate context

func (*Context) Get

func (c *Context) Get(key string) string

Get retrieves a string value from Context. Get returns an empty string if key not found

func (*Context) GetAny

func (c *Context) GetAny(key string) interface{}

GetAny retrieves a value from Context. GetAny returns nil if key not found

func (*Context) Put

func (c *Context) Put(key string, value interface{})

Put stores a value of any type in Context

type Engine

type Engine struct {
	Pipeline chan Item
	Ctx      *Context

	QPS int
	// contains filtered or unexported fields
}

func NewEngine

func NewEngine(options ...Option) *Engine

func (*Engine) Process

func (e *Engine) Process(task Task) (Result, error)

func (*Engine) Run

func (e *Engine) Run(tasks ...Task)

type Item

type Item struct {
	Id   string
	Url  string
	Data interface{}
}

type Option

type Option func(*Engine)

func WithMaxDepth

func WithMaxDepth(maxDepth int) Option

func WithPipeline

func WithPipeline(pipeline chan Item) Option

func WithProxy

func WithProxy(ProxyURLs ...string) Option

func WithQPS

func WithQPS(QPS int) Option

func WithRandomUserAgent

func WithRandomUserAgent(randomUserAgent bool) Option

func WithScheduler

func WithScheduler(scheduler Scheduler) Option

func WithWorkerNum

func WithWorkerNum(num int) Option

type ParseFunc

type ParseFunc func(Response) Result

type ReadyNotifier

type ReadyNotifier interface {
	WorkerReady(chan Task)
}

type Request

type Request struct {
	Ctx   *Context
	Url   string
	Depth int
}

type Response

type Response struct {
	Ctx     *Context
	Body    []byte
	Request *Request
}

type Result

type Result struct {
	Tasks []Task
	Items []Item
}

func NilParser

func NilParser(_ Response) Result

func (*Result) AddTask

func (r *Result) AddTask(url string, parser ParseFunc, ctxMap ...map[string]interface{})

type Scheduler

type Scheduler interface {
	ReadyNotifier
	Run()
	Submit(Task)
	Worker() chan Task
}

func NewTaskScheduler

func NewTaskScheduler() Scheduler

type SimpleScheduler

type SimpleScheduler struct {
	// contains filtered or unexported fields
}

-------------------------------- SimpleScheduler -------------------------------------

func (*SimpleScheduler) Run

func (s *SimpleScheduler) Run()

func (*SimpleScheduler) Submit

func (s *SimpleScheduler) Submit(task Task)

func (*SimpleScheduler) Worker

func (s *SimpleScheduler) Worker() chan Task

func (*SimpleScheduler) WorkerReady

func (s *SimpleScheduler) WorkerReady(chan Task)

type Task

type Task struct {
	Request *Request
	ParseFunc
}

func NewTask

func NewTask(url string, parser ParseFunc) Task

type TaskScheduler

type TaskScheduler struct {
	// contains filtered or unexported fields
}

func (TaskScheduler) Run

func (s TaskScheduler) Run()

func (TaskScheduler) Submit

func (s TaskScheduler) Submit(task Task)

func (TaskScheduler) Worker

func (s TaskScheduler) Worker() chan Task

func (TaskScheduler) WorkerReady

func (s TaskScheduler) WorkerReady(w chan Task)

Directories

Path Synopsis
_examples

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL