robin

package module
v0.0.0-...-e26e778 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 25, 2016 License: MIT Imports: 10 Imported by: 0

README

robin

a JSON configurable scraper built in Go #golang

Documentation

Index

Constants

View Source
const (
	LogLevelFatal = iota
	LogLevelError
	LogLevelInfo
	LogLevelDebug
)

Variables

View Source
var DefaultRunnerOptions = &RunnerOptions{
	ExportersSize: 1,
	LogLevel:      LogLevelFatal,
}

Functions

func NewAttributeExtractor

func NewAttributeExtractor(selector string, attributeName string) *attributeExtractor

func NewFollower

func NewFollower(name string, selector string, de DataExtractors) *follower

func NewTextExtractor

func NewTextExtractor(selector string) *textExtractor

Types

type AppLogger

type AppLogger interface {
	Fatal(f string)
	Error(f string)
	Info(f string)
	Debug(f string)
}

type ConfigDecodeError

type ConfigDecodeError struct {
	// contains filtered or unexported fields
}

func (*ConfigDecodeError) Error

func (e *ConfigDecodeError) Error() string

type ConfigFieldError

type ConfigFieldError struct {
	// contains filtered or unexported fields
}

func (*ConfigFieldError) Error

func (e *ConfigFieldError) Error() string

type DataExtractor

type DataExtractor interface {
	Extract(*goquery.Selection) string
}

type DataExtractors

type DataExtractors map[string]DataExtractor

type Exporter

type Exporter interface {
	Export(Item)
}

func NewStdoutExporter

func NewStdoutExporter() Exporter

type Extractor

type Extractor interface {
	Name() string
	Extract(sel *goquery.Selection) Items
}

type Extractors

type Extractors map[string]Extractor

type Fetcher

type Fetcher interface {
	Fetch(AppLogger) Response
	New(*url.URL) Fetcher
	URL() *url.URL
}

func NewFetcher

func NewFetcher(u string) (Fetcher, error)

type Follower

type Follower interface {
	Name() string
	Links(*goquery.Selection) []string
}

type Followers

type Followers map[string]Follower

type HTTPFetcher

type HTTPFetcher struct {
	// contains filtered or unexported fields
}

func NewHTTPFetcher

func NewHTTPFetcher(u *url.URL) *HTTPFetcher

func (*HTTPFetcher) Fetch

func (f *HTTPFetcher) Fetch(log AppLogger) Response

func (*HTTPFetcher) New

func (f *HTTPFetcher) New(u *url.URL) Fetcher

func (*HTTPFetcher) URL

func (f *HTTPFetcher) URL() *url.URL

type Item

type Item map[string]string

type ItemExtractor

type ItemExtractor struct {
	Selector       string
	DataExtractors DataExtractors
	// contains filtered or unexported fields
}

func NewItemExtractor

func NewItemExtractor(name string, de DataExtractors) *ItemExtractor

func (*ItemExtractor) Extract

func (e *ItemExtractor) Extract(sel *goquery.Selection) Items

func (*ItemExtractor) Name

func (e *ItemExtractor) Name() string

type Items

type Items []Item

type Response

type Response interface {
	ReadCloser() io.ReadCloser
	Error() error
}

type Runner

type Runner struct {
	Scrapers map[string]*Scraper
	// contains filtered or unexported fields
}

func NewRunner

func NewRunner(scrapers map[string]*Scraper, opts *RunnerOptions) *Runner

func (*Runner) Run

func (r *Runner) Run(name string, exp Exporter) error

type RunnerOptions

type RunnerOptions struct {
	ExportersSize int
	LogLevel      int
}

type Scraper

type Scraper struct {
	Name       string
	Fetcher    Fetcher
	Exporter   Exporter
	Extractors Extractors
	Followers  Followers
}

func (*Scraper) Scrape

func (s *Scraper) Scrape(log AppLogger, exp Exporter, q *exportQueue) error

type ScrapersConfig

type ScrapersConfig map[string]scraperConfig

func NewConfigFromFile

func NewConfigFromFile(p string) (ScrapersConfig, error)

func NewConfigFromReader

func NewConfigFromReader(r io.Reader) (ScrapersConfig, error)

func (ScrapersConfig) Scrapers

func (ssc ScrapersConfig) Scrapers() (map[string]*Scraper, error)

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL