crawler

package
v0.0.0-...-24e6800 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 15, 2022 License: AGPL-3.0 Imports: 20 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Crawler

type Crawler struct {
	Client        *http.Client
	Domain        string
	DomainID      int
	Authoritative bool
	Exclude       []*regexp.Regexp
	Delay         time.Duration
	RetryAfter    time.Duration
	Robots        *robotstxt.Group
	UserAgent     string
	Start         time.Time
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(ua string, db *sql.DB, domain string) *Crawler

func (*Crawler) Crawl

func (c *Crawler) Crawl()

func (*Crawler) Get

func (c *Crawler) Get(ctx context.Context, url *url.URL) (*http.Response, error)

func (*Crawler) Head

func (c *Crawler) Head(ctx context.Context, url *url.URL) (*http.Response, error)

func (*Crawler) Index

func (c *Crawler) Index(ctx context.Context, url *url.URL) error

func (*Crawler) Schedule

func (c *Crawler) Schedule(url *url.URL)
func (c *Crawler) ScheduleLinks(from *url.URL, node *html.Node)

type Metadata

type Metadata struct {
	Title       *string
	Robots      []string
	Author      *string
	Description *string
	Canonical   *url.URL
	JavaScript  bool
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL