generic

package
v0.0.0-...-7641ad8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 2, 2023 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func New

func New(conf *Configuration) source.Source

New generates a new scraper with given configuration

Types

type ChapterExtractor

type ChapterExtractor struct {
	// Selector CSS selector
	Selector string
	// Name function to get name from element found by selector.
	Name func(*goquery.Selection) string
	// URL function to get URL from element found by selector.
	URL func(*goquery.Selection) string
	// Volume function to get volume from element found by selector. Used by chapters extractor
	Volume func(*goquery.Selection) string
	// Date function to get the published date of the chapter if available.
	Date func(*goquery.Selection) *time.Time
}

ChapterExtractor is responsible for finding specified elements by selector and extracting required data from them

type Configuration

type Configuration struct {
	// Name of the scraper
	Name string
	// Delay between requests
	Delay time.Duration
	// Parallelism of the scraper
	Parallelism uint8

	// ReverseChapters if true, chapters will be shown in reverse order
	ReverseChapters bool

	// NeedsHeadlessBrowser if true, a headless browser will be used to proxy any request
	NeedsHeadlessBrowser bool

	// BaseURL of the source
	BaseURL string
	// GenerateSearchURL function to create search URL from the query.
	// E.g. "one piece" -> "https://manganelo.com/search/story/one%20piece"
	GenerateSearchURL func(baseUrl string, query string) string

	// MangaExtractor is responsible for finding manga elements and extracting required data from them
	MangaExtractor *MangaExtractor
	// ChapterExtractor is responsible for finding chapter elements and extracting required data from them
	ChapterExtractor *ChapterExtractor
	// PageExtractor is responsible for finding page elements and extracting required data from them
	PageExtractor *PageExtractor
}

Configuration is a generic scraper configuration that defines behavior of the scraper

func (*Configuration) ID

func (c *Configuration) ID() string

type MangaExtractor

type MangaExtractor struct {
	// Selector CSS selector
	Selector string
	// Name function to get name from element found by selector.
	Name func(*goquery.Selection) string
	// URL function to get URL from element found by selector.
	URL func(*goquery.Selection) string
	// Cover function to get cover from element found by selector. Used by manga extractor
	Cover func(*goquery.Selection) string
}

MangaExtractor is responsible for finding specified elements by selector and extracting required data from them

type MangaResult

type MangaResult struct {
	Mangas []*source.Manga `json:"mangas,omitempty"`
}

type PageExtractor

type PageExtractor struct {
	// Selector CSS selector
	Selector string
	// Name function to get name from element found by selector.
	Name func(*goquery.Selection) string
	// URL function to get URL from element found by selector.
	URL func(*goquery.Selection) string
}

PageExtractor is responsible for finding specified elements by selector and extracting required data from them

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper is a generic scraper downloads html pages and parses them

func (*Scraper) ID

func (s *Scraper) ID() string

ID of the scraper

func (*Scraper) LoadChaptersOf

func (s *Scraper) LoadChaptersOf(manga *source.Manga) error

ChaptersOf given source.Manga

func (*Scraper) LoadPagesOf

func (s *Scraper) LoadPagesOf(chapter *source.Chapter) error

PagesOf given source.Chapter

func (*Scraper) Name

func (s *Scraper) Name() string

Name of the scraper

func (*Scraper) Search

func (s *Scraper) Search(query string) ([]*source.Manga, error)

Search for mangas by given title

Directories

Path Synopsis
rod

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL