crawler

package
v0.0.0-...-973ba79 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 10, 2019 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ClickUrlFetcher

type ClickUrlFetcher struct {
}

A UrlFetcher that finds URLs by rendering any JavaScript and click around.

type CrawlParameters

type CrawlParameters struct {
	InputUrl string
	Depth    int64
}

type Crawler

type Crawler interface {
	Crawl(param CrawlParameters)
}

func NewCrawler

func NewCrawler(options CrawlerOptions) Crawler

type CrawlerImpl

type CrawlerImpl struct {
	// contains filtered or unexported fields
}

func (*CrawlerImpl) Crawl

func (c *CrawlerImpl) Crawl(param CrawlParameters)

type CrawlerOptions

type CrawlerOptions struct {
	InitialUrl         string
	FollowForeignHosts bool
	SaveDir            string
}

type Handler

type Handler interface {
	Handle(res *Resource) error
}

func NewPdfFileDownloadHandler

func NewPdfFileDownloadHandler(saveDir string) Handler

type PdfFileDownloadHandler

type PdfFileDownloadHandler struct {
	// contains filtered or unexported fields
}

func (*PdfFileDownloadHandler) Handle

func (h *PdfFileDownloadHandler) Handle(res *Resource) error

type PlainUrlFetcher

type PlainUrlFetcher struct {
}

A UrlFetcher that simply finds URLs on the HTML page.

func (*PlainUrlFetcher) Fetch

func (f *PlainUrlFetcher) Fetch(url string) (*Resource, error)

type RenderedUrlFetcher

type RenderedUrlFetcher struct {
}

A UrlFetcher that finds URLs by rendering any JavaScript.

type Resource

type Resource struct {
	// contains filtered or unexported fields
}

type UrlFetcher

type UrlFetcher interface {
	// Returns slice of URLs on the page of requested URL.
	Fetch(url string) (*Resource, error)
}

func NewUrlFetcher

func NewUrlFetcher() UrlFetcher

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL