quotes

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2023 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func NewQuotesEngine

func NewQuotesEngine(opts ...tegenaria.EngineOption) *tegenaria.CrawlEngine

NewQuotesEngine 创建引擎

Types

type ExampleSpider

type ExampleSpider struct {
	// Name 爬虫名
	Name string
	// 种子urls
	FeedUrls []string
}

ExampleSpider 定义一个spider

func (*ExampleSpider) ErrorHandler

func (e *ExampleSpider) ErrorHandler(err *tegenaria.Context, req chan<- *tegenaria.Context)

ErrorHandler 异常处理函数,用于处理数据抓取过程中出现的错误

func (*ExampleSpider) GetFeedUrls

func (e *ExampleSpider) GetFeedUrls() []string

GetFeedUrls 获取种子urls

func (*ExampleSpider) GetName

func (e *ExampleSpider) GetName() string

GetName 获取爬虫名

func (*ExampleSpider) Parser

func (e *ExampleSpider) Parser(resp *tegenaria.Context, req chan<- *tegenaria.Context) error

Parser 默认的解析函数

func (*ExampleSpider) StartRequest

func (e *ExampleSpider) StartRequest(req chan<- *tegenaria.Context)

StartRequest 爬虫启动,请求种子urls

type HeadersDownloadMiddler

type HeadersDownloadMiddler struct {
	// Priority 优先级
	Priority int
	// Name 中间件名称
	Name string
}

HeadersDownloadMiddler 请求头设置下载中间件

func (HeadersDownloadMiddler) GetName

func (m HeadersDownloadMiddler) GetName() string

func (HeadersDownloadMiddler) GetPriority

func (m HeadersDownloadMiddler) GetPriority() int

GetPriority 获取优先级,数字越小优先级越高

func (HeadersDownloadMiddler) ProcessRequest

func (m HeadersDownloadMiddler) ProcessRequest(ctx *tegenaria.Context) error

ProcessRequest 处理request请求对象 此处用于增加请求头 按优先级执行

func (HeadersDownloadMiddler) ProcessResponse

func (m HeadersDownloadMiddler) ProcessResponse(ctx *tegenaria.Context, req chan<- *tegenaria.Context) error

ProcessResponse 用于处理请求成功之后的response 执行顺序你优先级,及优先级越高执行顺序越晚

type ProxyDownloadMiddler

type ProxyDownloadMiddler struct {
	Priority int
	Name     string
}

ProxyDownloadMiddler 代理挂载中间件

type QuotesbotItem

type QuotesbotItem struct {
	Text   string
	Author string
	Tags   string
}

QuotesbotSpider tegenaria item示例

type QuotesbotItemPipeline

type QuotesbotItemPipeline struct {
	Priority int
}

QuotesbotItemPipeline tegenaria.PipelinesInterface 接口示例 用于item处理的pipeline

func (*QuotesbotItemPipeline) GetPriority

func (p *QuotesbotItemPipeline) GetPriority() int

GetPriority 获取该pipeline的优先级

func (*QuotesbotItemPipeline) ProcessItem

ProcessItem item处理函数

type QuotesbotItemPipeline2

type QuotesbotItemPipeline2 struct {
	Priority int
}

func (*QuotesbotItemPipeline2) GetPriority

func (p *QuotesbotItemPipeline2) GetPriority() int

func (*QuotesbotItemPipeline2) ProcessItem

type QuotesbotItemPipeline3

type QuotesbotItemPipeline3 struct {
	Priority int
}

func (*QuotesbotItemPipeline3) GetPriority

func (p *QuotesbotItemPipeline3) GetPriority() int

func (*QuotesbotItemPipeline3) ProcessItem

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL