spider

package
v0.0.0-...-aa6b52a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 16, 2023 License: Apache-2.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ISpider

type ISpider interface {
	RunManyAsync(targetUrls []string)
	RunManyAsyncAwait(targetUrls []string)
	RunMany(targetUrls []string)
	RunOneAsync(targetUrl string)
	RunOneAsyncAwait(targetUrl string)
	RunOne(targetUrl string)
	OnSuccess(func(*http.Response))
	OnError(func(string, error))
	OnHtml(func(*goquery.Document))
	OnXml(func(*goquery.Document))
	AddUrlFilter(string, func(string) bool)
}

type Spider

type Spider struct {
	ISpider
	// contains filtered or unexported fields
}

Basic spider struct implimenting ISpider

func NewSpider

func NewSpider() *Spider

func (*Spider) AddUrlFilter

func (spider *Spider) AddUrlFilter(filterId string, fn func(targetUrl string) bool)

func (*Spider) OnError

func (spider *Spider) OnError(fn func(string, error))

func (*Spider) OnHtml

func (spider *Spider) OnHtml(fn func(*goquery.Document))

func (*Spider) OnSuccess

func (spider *Spider) OnSuccess(fn func(*http.Response))

func (*Spider) OnXml

func (spider *Spider) OnXml(fn func(*goquery.Document))

func (*Spider) RandomProxy

func (spider *Spider) RandomProxy() *url.URL

func (*Spider) RoundRobinProxy

func (spider *Spider) RoundRobinProxy() *url.URL

func (*Spider) RunMany

func (spider *Spider) RunMany(targetUrls []string)

func (*Spider) RunManyAsync

func (spider *Spider) RunManyAsync(targetUrls []string)

func (*Spider) RunManyAsyncAwait

func (spider *Spider) RunManyAsyncAwait(targetUrls []string)

func (*Spider) RunOne

func (spider *Spider) RunOne(targetUrl string)

func (*Spider) RunOneAsync

func (spider *Spider) RunOneAsync(targetUrl string)

func (*Spider) RunOneAsyncAwait

func (spider *Spider) RunOneAsyncAwait(targetUrl string)

type SpiderConfig

type SpiderConfig struct {
	IgnoreRobotTxt bool
	Proxies        []string
}

func DefaultConfig

func DefaultConfig() *SpiderConfig

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL