job

package
v0.6.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 16, 2019 License: MIT Imports: 23 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ListOfSpider = []Crawler{
	&IpHai{},
	&rudnkh{},
	&coolProxy{},
	&xici{},
	&spys{},
	&pubProxy{},
	&kuaiProxy{},
	&cn66{},
	&feiyi{},
	&ip89{},
	&goubanjia{},
	&freeip{},
	&ab57{},
	&clarketm{},
	&httptunnel{},
	&proxylist{},
	&proxylistplus{},
	&aliveProxy{},
	&proxyDb{},
	&usProxy{},
	&siteDigger{},
	&dogdev{},
	&newProxy{},
	&xseo{},
	&ultraProxies{},
	&premProxy{},
	&nntime{},
	&proxyListsLine{},
	&myProxy{},
	&proxyIpList{},
	&blackHat{},
	&proxyLists{},
	&ip3366{},
	&xiladaili{},
	&nimadaili{},
	&zdy{},
}
View Source
var (
	MaxProxyReachedErr = errors.New("max proxy reached")
)

Functions

func DecodeProxy

func DecodeProxy(vm *otto.Otto, js string) (proxy string, err error)

func DecodeString

func DecodeString(vm *otto.Otto, js string) (proxy string, err error)

func ParsePort

func ParsePort(vm *otto.Otto, PortJs string) (port string, err error)

Types

type Crawler

type Crawler interface {
	Run()
	StartUrl() []string
	Cron() string
	Name() string
	Retry() uint
	NeedRetry() bool
	Enabled() bool
	// url , if use proxy
	Fetch(string, bool) (string, error)
	SetProxyChan(chan<- *model.HttpProxy)
	GetProxyChan() chan<- *model.HttpProxy
	Parse(string) ([]*model.HttpProxy, error)
}

func GetSpiders

func GetSpiders(ch chan<- *model.HttpProxy) []Crawler

type IpHai

type IpHai struct {
	Spider
}

func (*IpHai) Cron

func (s *IpHai) Cron() string

func (*IpHai) GetReferer

func (s *IpHai) GetReferer() string

func (*IpHai) Name

func (s *IpHai) Name() string

func (*IpHai) Parse

func (s *IpHai) Parse(body string) (proxies []*model.HttpProxy, err error)

func (*IpHai) Run

func (s *IpHai) Run()

func (*IpHai) StartUrl

func (s *IpHai) StartUrl() []string

type Spider

type Spider struct {
	// contains filtered or unexported fields
}

func (*Spider) Cron

func (s *Spider) Cron() string

func (*Spider) Enabled

func (s *Spider) Enabled() bool

func (*Spider) Fetch

func (s *Spider) Fetch(proxyURL string, useProxy bool) (body string, err error)

func (*Spider) GetProxyChan

func (s *Spider) GetProxyChan() chan<- *model.HttpProxy

func (*Spider) GetReferer

func (s *Spider) GetReferer() string

func (*Spider) Name

func (s *Spider) Name() string

func (*Spider) NeedRetry

func (s *Spider) NeedRetry() bool

func (*Spider) Parse

func (s *Spider) Parse(string) ([]*model.HttpProxy, error)

func (*Spider) RandomDelay

func (s *Spider) RandomDelay() bool

func (*Spider) Retry

func (s *Spider) Retry() uint

func (*Spider) SetProxyChan

func (s *Spider) SetProxyChan(ch chan<- *model.HttpProxy)

func (*Spider) StartUrl

func (s *Spider) StartUrl() []string

func (*Spider) TimeOut

func (s *Spider) TimeOut() int

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL