crawler

package
v0.0.0-...-a0a42eb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 19, 2016 License: MIT Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Crawler

type Crawler struct {
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(opts *Options) *Crawler

NewCrawler returns a new Crawler structure

func (*Crawler) Run

func (crawler *Crawler) Run(shutdown chan os.Signal) error

func (Crawler) Status

func (crawler Crawler) Status() *Status

type DefaultMatcher

type DefaultMatcher struct {
	Seed *url.URL
}

Match is used to evaluate a Node and determine if it should be crawled.

func (DefaultMatcher) Match

func (m DefaultMatcher) Match(u *url.URL) bool

Match matches a given Node

type DefaultParser

type DefaultParser struct {
	Seed *url.URL
}
func (p *DefaultParser) Links(res *http.Response) map[string]*url.URL

Links returns an array of unique, normalized url.URL

type Matcher

type Matcher interface {
	Match(*url.URL) bool
}

Matcher is used to evaluate a node and determine if it should be crawled.

type Node

type Node struct {
	URL *url.URL
	Err error
	// contains filtered or unexported fields
}

func (*Node) Data

func (n *Node) Data() *ResponseData

func (Node) ID

func (n Node) ID() int

func (*Node) Record

func (n *Node) Record(d time.Duration, res *http.Response)

Record an HTTP Response

type Nodes

type Nodes []*Node

Nodes implements a sortable array of Nodes

func (Nodes) Len

func (n Nodes) Len() int

func (Nodes) Less

func (n Nodes) Less(i, j int) bool

func (Nodes) Swap

func (n Nodes) Swap(i, j int)

type Options

type Options struct {
	Seed     *url.URL
	Logger   *log.Logger
	MaxDepth int
	Matcher  Matcher
	Parser   Parser
	Debug    bool
}

type Parser

type Parser interface {
	Links(*http.Response) map[string]*url.URL
}

Parser parses an HTTP request returning specific information

type Recorder

type Recorder struct {
	// contains filtered or unexported fields
}

Recorder saves the result of crawling a Node in a graph structure.

func NewRecorder

func NewRecorder(opts *Options) *Recorder

func (*Recorder) Nodes

func (r *Recorder) Nodes() []*Node
func (r *Recorder) RecordLink(parent *Node, url *url.URL) *Node

RecordLink creates (or updates) a node and adds an Edge to the Graph

func (*Recorder) RecordResponse

func (r *Recorder) RecordResponse(ctx *fetchbot.Context, res *http.Response, err error) *Node

RecordResponse creates (or updates) a node with the http.Response

type ResponseData

type ResponseData struct {
	Code     int
	Headers  http.Header
	Length   int64
	Duration time.Duration
}

type Scheduler

type Scheduler struct {
	// contains filtered or unexported fields
}

Scheduler schedules a Node for crawling.

func NewScheduler

func NewScheduler(opts *Options) *Scheduler

func (*Scheduler) Depth

func (s *Scheduler) Depth() int

func (*Scheduler) Run

func (s *Scheduler) Run(q *fetchbot.Queue)

type Status

type Status struct {
	Running  bool
	Depth    int
	Options  *Options
	Nodes    []*Node
	Schedule []string
	Logs     []string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL