webber

package module
v0.0.0-...-6ffd0bb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 29, 2019 License: MIT Imports: 12 Imported by: 0

README

Webber

一个轻量级爬虫框架

Get Started

package main

import (
	"strings"
	"github.com/tzxyz/webber"
)

func main() {
	webber.New().
		Name("妹子图").
		StartUrls("http://www.meizitu.com/a/more_1.html").
		Processor(func(response *webber.Response) *webber.Result {
		// 列表页
		if strings.HasPrefix(response.GetUrl(), "http://www.meizitu.com/a/more_") {
			links := response.Html().Xpath("//h3[@class = 'tit']/a/@href")
			return webber.NewResult().PushUrls(links...)
		}
		// 详情页
		return webber.NewResult().
			PushItem("images", response.Html().Xpath("//div[@id='picture']/p/img/@src")).
			PushItem("title", response.Html().Xpath("//div[@class='metaRight']/h2/a/text()"))
	}).Start()
}

LICENSE

MIT

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	DefaultDownloader = HttpDownloader
	DefaultScheduler  = InMemoryScheduler
	DefaultPipelines  = []Pipeline{ConsolePipeline}
)
View Source
var ConsolePipeline = func(result *Result) {
	logger.Info(result)
}
View Source
var HttpDownloader = func(request *Request) (*Response, []error) {

	logger.Debug("Starting download url: " + request.req.URL.String())

	resp, _, errs := requests.New().Get(request.url).EndBytes()

	if errs != nil && len(errs) != 0 {
		return nil, errs
	}

	return newResponse(request, resp), nil
}
View Source
var InMemoryScheduler = &QueueScheduler{queue: list.New()}

Functions

func Debug

func Debug(args ...interface{})

func Error

func Error(args ...interface{})

func Fatal

func Fatal(args ...interface{})

func Info

func Info(args ...interface{})

func Panic

func Panic(args ...interface{})

func Warn

func Warn(args ...interface{})

Types

type Downloader

type Downloader func(request *Request) (*Response, []error)

type Items

type Items map[string]interface{}

type Pipeline

type Pipeline func(result *Result)

type Processor

type Processor func(response *Response) *Result

type QueueScheduler

type QueueScheduler struct {
	// contains filtered or unexported fields
}

func (*QueueScheduler) Poll

func (s *QueueScheduler) Poll() *Request

func (*QueueScheduler) Push

func (s *QueueScheduler) Push(request *Request)

type Request

type Request struct {
	// contains filtered or unexported fields
}

type Response

type Response struct {
	// contains filtered or unexported fields
}

func (*Response) Html

func (r *Response) Html() *Response

func (*Response) Url

func (r *Response) Url() string

func (*Response) Xpath

func (r *Response) Xpath(path string) []string

type Result

type Result struct {
	// contains filtered or unexported fields
}

func NewResult

func NewResult() *Result

func (*Result) HasNextUrl

func (r *Result) HasNextUrl() bool

func (*Result) Items

func (r *Result) Items() Items

func (*Result) NextUrls

func (r *Result) NextUrls() []string

func (*Result) PushItem

func (r *Result) PushItem(key string, value interface{}) *Result

func (*Result) PushUrls

func (r *Result) PushUrls(urls ...string) *Result

type Scheduler

type Scheduler interface {
	Push(request *Request)
	Poll() *Request
}

type TraceHook

type TraceHook struct{}

func (*TraceHook) Fire

func (h *TraceHook) Fire(entry *logrus.Entry) error

func (*TraceHook) Levels

func (h *TraceHook) Levels() []logrus.Level

type Webber

type Webber struct {
	// contains filtered or unexported fields
}

func New

func New() *Webber

func (*Webber) Downloader

func (w *Webber) Downloader(downloader Downloader) *Webber

func (*Webber) Name

func (w *Webber) Name(name string) *Webber

func (*Webber) Pipelines

func (w *Webber) Pipelines(pipelines ...Pipeline) *Webber

func (*Webber) Processor

func (w *Webber) Processor(processor Processor) *Webber

func (*Webber) Scheduler

func (w *Webber) Scheduler(scheduler Scheduler) *Webber

func (*Webber) Start

func (w *Webber) Start()

func (*Webber) StartUrls

func (w *Webber) StartUrls(urls ...string) *Webber

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL