ziva

package module
v0.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 21, 2023 License: MIT Imports: 25 Imported by: 0

README

ziva

一个go实现的多任务、多线程爬虫库

一个demo

package main

import (
	"fmt"
	"github.com/zituocn/ziva"
	"net/http"
)

func main() {
	job := ziva.NewJob("article", ziva.Options{
		CreateQueue: func() ziva.TodoQueue {
			ids := []int{3263, 3262, 3261, 3260, 3259}
			queue := ziva.NewMemQueue()
			header := &http.Header{}
			header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
			for _, item := range ids {
				queue.Add(&ziva.Task{
					Url:    fmt.Sprintf("%s%d", "https://22v.net/article/", item),
					Method: "GET",
					Header: header,
				})
			}
			return queue
		},
		SucceedFunc: func(ctx *ziva.Context) {
			fmt.Println("成功的回调")
			fmt.Println("返回信息 :", ctx.Response.Status)
		},
		FailedFunc: func(ctx *ziva.Context) {
			fmt.Println("失败的回调")
			fmt.Println("返回状态 :", ctx.Response.StatusCode)
		},
		SheepTime: 3000,
		Num:       1,
	})

	job.Do()
}

使用redis queue

package main

import (
	"fmt"
	"net/http"

	"github.com/zituocn/ziva"
	"github.com/zituocn/ziva/goredis"
)

func main() {
	job := ziva.NewJob("article", ziva.Options{
		CreateQueue: func() ziva.TodoQueue {
			ids := []int{3263, 3262, 3261, 3260, 3259}
			queue := ziva.NewRedisQueue("article", &goredis.RedisConfig{
				Name:     "127.0.0.1",
				Host:     "127.0.0.1",
				Port:     6379,
				Password: "123456",
				DB:       1,
			})
			header := &http.Header{}
			header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
			for _, item := range ids {
				queue.Add(&ziva.Task{
					Url:    fmt.Sprintf("%s%d", "https://22v.net/article/", item),
					Method: "GET",
					Header: header,
				})
			}
			return queue
		},
		SucceedFunc: func(ctx *ziva.Context) {
			fmt.Println("成功的回调")
			fmt.Println("返回信息 :", ctx.Response.Status)
		},
		FailedFunc: func(ctx *ziva.Context) {
			fmt.Println("失败的回调")
			fmt.Println("返回状态 :", ctx.Response.StatusCode)
		},
		SheepTime: 3000,
		IsDebug:   true,
		Num:       1,
	})

	job.Do()
}

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BuilderHeader

func BuilderHeader(s string) *http.Header

BuilderHeader convert string headers to http.Header

func GetFuncName

func GetFuncName(i interface{}) string

GetFuncName return the function name by reflection

func GetRandSleepTime

func GetRandSleepTime(min, max int) int

GetRandSleepTime Generate random from min to max

including min and max

func GetStatusByCode

func GetStatusByCode(code int) string

func Str2Int64

func Str2Int64(s string) int64

Str2Int64 str convert to int64

Types

type CallbackFunc

type CallbackFunc func(ctx *Context)

type Context

type Context struct {

	// Request *http.Request
	Request *http.Request

	// Response *http.Response
	Response *http.Response

	Err error

	// RespBody http.Response.Body
	RespBody []byte

	Task *Task

	Data map[string]interface{}

	Options Options
	// contains filtered or unexported fields
}

func DoRequest

func DoRequest(url, method string, vs ...interface{}) *Context

func DoTask

func DoTask(task *Task) *Context

func HttpGet

func HttpGet(url string, vs ...interface{}) *Context

func HttpPost

func HttpPost(url string, vs ...interface{}) *Context

func HttpPut

func HttpPut(url string, vs ...interface{}) *Context

func NewContext

func NewContext(req *http.Request, vs ...interface{}) *Context

func NewRequest

func NewRequest(urlStr, method string, vs ...interface{}) (*Context, error)

func (*Context) Do

func (c *Context) Do()

func (*Context) SetProxy

func (c *Context) SetProxy(httpProxy string) *Context

func (*Context) SetProxyLib added in v0.0.3

func (c *Context) SetProxyLib(lib *ProxyLib) *Context

func (*Context) SetTransport

func (c *Context) SetTransport(f func() *http.Transport) *Context

func (*Context) ToByte

func (c *Context) ToByte() []byte

func (*Context) ToFile added in v0.0.2

func (c *Context) ToFile(writer io.Writer) error

ToFile to file

need io.Writer

func (*Context) ToHTML

func (c *Context) ToHTML() string

func (*Context) ToJSON

func (c *Context) ToJSON(v interface{}) error

func (*Context) ToSection

func (c *Context) ToSection(path string) string

func (*Context) ToString

func (c *Context) ToString() string
type Cookie struct {
	Name   string
	Value  string
	Domain string
	Path   string

	HttpOnly bool
}

type CreateQueue

type CreateQueue func() TodoQueue

type FormData

type FormData map[string]string

func BuilderFormData

func BuilderFormData(s string) FormData

BuilderFormData convert string FormData to esme.FormData

page=1&limit=15&id=&nick_name=&mobile=&source_type=-100 to FormData

func (FormData) Set

func (f FormData) Set(k, v string)

type Job

type Job struct {
	// contains filtered or unexported fields
}

func NewJob

func NewJob(name string, options Options) *Job

func (*Job) Do

func (j *Job) Do()

type MemQueue

type MemQueue struct {
	// contains filtered or unexported fields
}

func (*MemQueue) Add

func (m *MemQueue) Add(task *Task)

func (*MemQueue) AddTasks

func (m *MemQueue) AddTasks(list []*Task)

func (*MemQueue) Clear

func (m *MemQueue) Clear() bool

func (*MemQueue) IsEmpty

func (m *MemQueue) IsEmpty() bool

func (*MemQueue) Pop

func (m *MemQueue) Pop() *Task

func (*MemQueue) Print

func (m *MemQueue) Print()

func (*MemQueue) Size

func (m *MemQueue) Size() int

type Options

type Options struct {

	// goroutine num
	Num int

	// Queue all task
	Queue TodoQueue

	CreateQueue CreateQueue

	StartFunc CallbackFunc

	SucceedFunc CallbackFunc

	RetryFunc CallbackFunc

	FailedFunc CallbackFunc

	CompleteFunc CallbackFunc

	// ProxyIP http or https proxy ip
	ProxyIP string

	// ProxyLib proxy ips
	ProxyLib *ProxyLib

	SheepTime int

	TimeOut int

	IsDebug bool
}

type ProxyIP

type ProxyIP struct {
	IP    string
	Port  int
	User  string
	Pass  string
	IsTLS bool
}

func NewProxyIP

func NewProxyIP(ip string, port int, user, pass string, isTLS bool) *ProxyIP

func (*ProxyIP) String

func (p *ProxyIP) String() string

type ProxyLib added in v0.0.3

type ProxyLib struct {
	// contains filtered or unexported fields
}

func NewProxyLib added in v0.0.3

func NewProxyLib() *ProxyLib

NewProxyLib return new ProxyLib

func (*ProxyLib) Add added in v0.0.3

func (p *ProxyLib) Add(proxyIP *ProxyIP)

Add proxyIP to ProxyLib

func (*ProxyLib) Del added in v0.0.3

func (p *ProxyLib) Del(n int)

Del delete a ip by n

func (*ProxyLib) Get added in v0.0.3

func (p *ProxyLib) Get() (string, int32)

Get get a ip

type RedisQueue

type RedisQueue struct {
	// contains filtered or unexported fields
}

RedisQueue task queue in redis

func (*RedisQueue) Add

func (q *RedisQueue) Add(task *Task)

Add add a task to the queue

func (*RedisQueue) AddTasks

func (q *RedisQueue) AddTasks(list []*Task)

AddTasks add multiple tasks to the queue

func (*RedisQueue) Clear

func (q *RedisQueue) Clear() bool

Clear clear all tasks

func (*RedisQueue) IsEmpty

func (q *RedisQueue) IsEmpty() bool

IsEmpty returns whether the queue is empty

func (*RedisQueue) Pop

func (q *RedisQueue) Pop() *Task

Pop get a task while removing it from the queue

from left

func (*RedisQueue) Print

func (q *RedisQueue) Print()

func (*RedisQueue) Size

func (q *RedisQueue) Size() int

Size returns queue length

type StatusCode

type StatusCode map[int]string

type Task

type Task struct {
	Url string

	Method string

	Payload []byte

	FormData FormData

	Header *http.Header

	Data map[string]interface{}
}

type TodoQueue

type TodoQueue interface {
	Add(tas *Task)

	AddTasks(list []*Task)

	Pop() *Task

	Clear() bool

	Size() int

	IsEmpty() bool

	Print()
}

func NewMemQueue

func NewMemQueue() TodoQueue

func NewRedisQueue

func NewRedisQueue(key string, rc *goredis.RedisConfig) TodoQueue

NewRedisQueue use redis configuration

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL