configurable

package
v0.0.0-...-f35e05c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 5, 2022 License: Apache-2.0 Imports: 16 Imported by: 1

Documentation

Index

Constants

View Source
const (
	DefaultThread = 12 // 默认协程数量

	CollyConfName      = "__conf.name"      // 配置名称
	CollyConfStepName  = "__conf.step.name" // 步骤名称
	CollyConfExt       = "__conf.ext"       // 额外信息
	CollyConfStepStart = "start"            // 开始步骤名称
	CollyConfStepEnd   = "final"            // 结束步骤名称

	CollyPipelineURL = "_url" // 结果 - 请求的url
)

Variables

View Source
var (
	ErrNotStorage = errors.New("not found configurable storage")
)

Functions

This section is empty.

Types

type Collector

type Collector struct {
	// contains filtered or unexported fields
}

func New

func New(name string, opts ...Option) (*Collector, error)

func (*Collector) GracefulStop

func (c *Collector) GracefulStop(ctx context.Context) error

func (*Collector) Init

func (c *Collector) Init() (err error)

func (*Collector) Name

func (c *Collector) Name() string

func (*Collector) PackageName

func (c *Collector) PackageName() string

func (*Collector) Start

func (c *Collector) Start() error

func (*Collector) Stop

func (c *Collector) Stop() error

type DirStorage

type DirStorage struct {
	// contains filtered or unexported fields
}

func NewDirStorage

func NewDirStorage(path string, fs2 *embed.FS) (*DirStorage, error)

func (*DirStorage) GetConfig

func (d *DirStorage) GetConfig(name string) (IConfig, error)

type Element

type Element struct {
	CSSPath string `mapstructure:"css_path" json:"css_path"` // CSSPath提取
	Attr    string `mapstructure:"attr" json:"attr"`         // 节点内容提取
	ExtName string `mapstructure:"ext_name" json:"ext_name"` // 额外节点信息
	List    bool   `mapstructure:"list" json:"list"`         // 是否是一个列表
}

type FileConfig

type FileConfig struct {
	Name           string             `json:"name"`
	BaseURL        string             `json:"url"`
	BaseHttpMethod string             `json:"http_method"`
	Proxy          string             `json:"proxy"`
	Steps          map[string][]Step  `json:"steps"`
	Final          map[string]Element `json:"final"`
}

func (*FileConfig) GetBaseRequest

func (f *FileConfig) GetBaseRequest(urls ...string) *colly.Request

func (*FileConfig) GetBaseURL

func (f *FileConfig) GetBaseURL() string

func (*FileConfig) GetFinal

func (f *FileConfig) GetFinal() map[string]Element

func (*FileConfig) GetName

func (f *FileConfig) GetName() string

func (*FileConfig) GetProxy

func (f *FileConfig) GetProxy() string

func (*FileConfig) GetStep

func (f *FileConfig) GetStep(name string) []Step

type IConfig

type IConfig interface {
	GetBaseURL() string                      // 获取开始URI
	GetBaseRequest(...string) *colly.Request // 获取开始的request
	GetProxy() string                        // 获取代理信息
	GetName() string                         // 获取配置名称
	GetStep(name string) []Step              // 获取一个抓取步骤
	GetFinal() map[string]Element            // 获取结果 保留字段_*
}

type Logger

type Logger interface {
	Debugf(format string, v ...interface{})
	Infof(format string, args ...interface{})
	Warnf(format string, args ...interface{})
	Errorf(format string, args ...interface{})
}

type Option

type Option func(c *Collector)

func WithCollector

func WithCollector(collector *colly.Collector) Option

func WithConfigStorage

func WithConfigStorage(storage Storage) Option

func WithLogger

func WithLogger(l Logger) Option

func WithOnly

func WithOnly(only bool) Option

func WithPipeline

func WithPipeline(pipeLineFunc PipelineFunc) Option

func WithQueue

func WithQueue(q queue.Interface) Option

type PipelineFunc

type PipelineFunc func(name string, v interface{}) error

type Step

type Step struct {
	HttpMethod string             `mapstructure:"http_method" json:"http_method"` // Http请求方式 默认:GET
	CSSPath    string             `mapstructure:"css_path" json:"css_path"`       // CSSPath提取
	Attr       string             `mapstructure:"attr" json:"attr"`               // 节点内容提取
	Next       string             `mapstructure:"next" json:"next"`               // 下一步步骤 其中start-end 分别表示开始和抽取结果
	Ext        map[string]Element `mapstructure:"ext" json:"ext"`                 // 额外节点信息
}

func (Step) GetAttr

func (s Step) GetAttr() string

func (Step) GetHttpMethod

func (s Step) GetHttpMethod() string

type Storage

type Storage interface {
	GetConfig(name string) (IConfig, error)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL