crawler

package module
v0.0.0-...-055068c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 5, 2016 License: MIT Imports: 7 Imported by: 0

README

crawler

Super simple go web page crawler. Has no manners, is not concurrent.

$ go get github.com/adrianduke/crawler
$ go run cmd/main.go http://adeduke.com
/
	Static Assets:
		CSS:
			http://adeduke.com/index.xml
			http://adeduke.com/css/bootstrap.min.css
			http://adeduke.com/css/hc.css
			http://netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css
		JS:
			http://adeduke.com/js/jquery-1.10.2.min.js
			http://adeduke.com/js/bootstrap.min.js
			http://adeduke.com/js/bootstrap.js
			http://adeduke.com/js/hc.js
	Internal Links:
		http://adeduke.com/2015/08/how-to-create-a-private-ethereum-chain/
		http://adeduke.com/projects
		http://adeduke.com/
		http://adeduke.com/2015/09/test-driven-development---a-guided-tour/

/
	Static Assets:
		CSS:
			http://adeduke.com/index.xml
			http://adeduke.com/css/bootstrap.min.css
			http://adeduke.com/css/hc.css
			http://netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css
		JS:
			http://adeduke.com/js/jquery-1.10.2.min.js
			http://adeduke.com/js/bootstrap.min.js
			http://adeduke.com/js/bootstrap.js
			http://adeduke.com/js/hc.js
	Internal Links:
		http://adeduke.com/2015/09/test-driven-development---a-guided-tour/
		http://adeduke.com/2015/08/how-to-create-a-private-ethereum-chain/
		http://adeduke.com/projects
		http://adeduke.com/

/projects
	Static Assets:
		CSS:
			http://adeduke.com/css/bootstrap.min.css
			http://adeduke.com/css/hc.css
			http://netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css
		JS:
			http://adeduke.com/js/jquery-1.10.2.min.js
			http://adeduke.com/js/bootstrap.min.js
			http://adeduke.com/js/bootstrap.js
			http://adeduke.com/js/hc.js
	Internal Links:
		http://adeduke.com/
		http://adeduke.com/projects

/2015/09/test-driven-development---a-guided-tour/
	Static Assets:
		CSS:
			http://adeduke.com/css/bootstrap.min.css
			http://adeduke.com/css/hc.css
			http://netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css
		JS:
			http://adeduke.com/js/jquery-1.10.2.min.js
			http://adeduke.com/js/bootstrap.min.js
			http://adeduke.com/js/bootstrap.js
			http://adeduke.com/js/hc.js
	Internal Links:
		http://adeduke.com/
		http://adeduke.com/2015/09/test-driven-development---a-guided-tour/
		http://adeduke.com/2015/08/how-to-create-a-private-ethereum-chain/
		http://adeduke.com/projects

/2015/08/how-to-create-a-private-ethereum-chain/
	Static Assets:
		CSS:
			http://adeduke.com/css/bootstrap.min.css
			http://adeduke.com/css/hc.css
			http://netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css
		JS:
			http://adeduke.com/js/jquery-1.10.2.min.js
			http://adeduke.com/js/bootstrap.min.js
			http://adeduke.com/js/bootstrap.js
			http://adeduke.com/js/hc.js
	Internal Links:
		http://adeduke.com/projects
		http://adeduke.com/
		http://adeduke.com/2015/09/test-driven-development---a-guided-tour/

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func EntryPoint

func EntryPoint(cliArgs []string, stdout, stderr io.Writer) int

Types

type CrawlerApp

type CrawlerApp struct {
	Output  io.Writer
	Visited map[string]bool
	Errors  chan error

	Fetcher
	// contains filtered or unexported fields
}

func NewCrawlerApp

func NewCrawlerApp(output io.Writer, fetcher Fetcher) *CrawlerApp

func (*CrawlerApp) Crawl

func (ca *CrawlerApp) Crawl(rootURL *url.URL, depth int)

func (*CrawlerApp) PrettyPrint

func (ca *CrawlerApp) PrettyPrint(rootURL *url.URL, results *PageResults)

func (*CrawlerApp) Run

func (ca *CrawlerApp) Run(rootURLString string, errorOutput io.Writer) error

type Fetcher

type Fetcher interface {
	Fetch(url string) (pageResults *PageResults, err error)
}

type FetcherAdapter

type FetcherAdapter func(string) (*PageResults, error)

func (FetcherAdapter) Fetch

func (f FetcherAdapter) Fetch(url string) (*PageResults, error)

type HTTPFetcher

type HTTPFetcher struct{}

func (*HTTPFetcher) Fetch

func (hf *HTTPFetcher) Fetch(url string) (*PageResults, error)

type PageResults

type PageResults struct {
	// contains filtered or unexported fields
}

func NewPageResults

func NewPageResults() *PageResults

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL