scrape

package
v0.0.0-...-fc280d6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 16, 2022 License: BSD-2-Clause-Views Imports: 16 Imported by: 0

README

this package contains extremely ugly code, if problems occur, look into functions that do *not* use the walkDeep function to traverse the DOM and rewrite the code to use it instead

also, not sure when it started but for some reason relation chapter orders would keep changing over time (only observed the touhou relation so far), I don't know what's up with that, but there's nothing I can do about it
seems like a possible way to get around it is to fetch really quickly

I think I'll fetch by chapter instead

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrNoContentScript   = errors.New("script element with chapter content not found")
	ErrNoChapterTitle    = errors.New("no chapter-title element")
	ErrNoChapterDetails  = errors.New("no chapter-details element")
	ErrBadContentScript  = errors.New("malformed script element with chapter content")
	ErrBadChapterTitle   = errors.New("malformed chapter-title element")
	ErrBadChapterDetails = errors.New("malformed chapter-details element")
)
View Source
var (
	ErrServer    = errors.New("server returned error page")
	ErrRateLimit = errors.New("server returned rate-limiting page")
)
View Source
var (
	ErrInvalidPath  = errors.New("invalid path in tag list")
	ErrNoTagList    = errors.New("tag list not found")
	ErrBadTagList   = errors.New("malformed tag list")
	ErrNoPaginator  = errors.New("paginator not found")
	ErrBadPaginator = errors.New("malformed paginator")
)
View Source
var (
	ErrNoImageLists  = errors.New("element with id image-lists not found")
	ErrBadImageLists = errors.New("malformed element with id image-lists")
)
View Source
var (
	ErrNoURL           = errors.New("element holding relation URL not found")
	ErrNoChapterList   = errors.New("element with class chapter-list not found")
	ErrNoTagTitle      = errors.New("element with class tag-title not found")
	ErrNoImage         = errors.New("element with class image not found")
	ErrBadURL          = errors.New("malformed element holding relation URL")
	ErrBadChapterList  = errors.New("malformed element with class chapter-list")
	ErrBadTagTitle     = errors.New("malformed element with class tag-title")
	ErrBadCoverChapter = errors.New("malformed element with class cover-chapters")
	ErrBadTagTags      = errors.New("malformed element with class tag-tags")
	ErrBadAliases      = errors.New("malformed element with class aliases")
	ErrBadImage        = errors.New("malformed element with class image")
	ErrBadDescription  = errors.New("malformed element with class description")
	ErrBadThumbnail    = errors.New("malformed element with class thumbnail")
)
View Source
var (
	ErrNoMainContainer = errors.New("main container not found")
)
View Source
var Verbose bool

Functions

func By

func By(ctx context.Context, relType db.RelationType, relation string, workers int, tries uint64, update, force, skip bool, q *db.Queue, s *db.Instance, progress ProgressInteraction) error

By recursively fetches content by a relation

func CommitQueue

func CommitQueue(ctx context.Context, q *db.Queue, println LogFunc, workers int, tries uint64, moreOutput bool) error

CommitQueue commits the queue to database concurrently

func Ping

func Ping() error

func UserAgent

func UserAgent(str string)

Types

type LogFunc

type LogFunc func(msg string)

type ProgressInteraction

type ProgressInteraction interface {
	NewRepresentation(total int, desc string) ProgressRepresentation
	Print(msg string)
}

type ProgressRepresentation

type ProgressRepresentation interface {
	Increment()
	Set(val int)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL