xcrawl3r

package
v0.0.0-...-417e374 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 2, 2023 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Crawler

type Crawler struct {
	Domain            string
	IncludeSubdomains bool
	Seeds             []string

	Depth     int
	Headless  bool
	Headers   []string
	Proxies   []string
	Render    bool
	Timeout   int
	UserAgent string

	Concurrency    int
	Delay          int
	MaxRandomDelay int
	Parallelism    int

	Debug bool

	PageCollector         *colly.Collector
	FileURLsRegex         *regexp.Regexp
	FileCollector         *colly.Collector
	URLsNotToRequestRegex *regexp.Regexp
	URLsRegex             *regexp.Regexp
}

func New

func New(options *Options) (crawler *Crawler, err error)

func (*Crawler) Crawl

func (crawler *Crawler) Crawl() (URLsChannel chan URL)

func (*Crawler) IsInScope

func (crawler *Crawler) IsInScope(URL string) (isInScope bool)

type Options

type Options struct {
	Domain            string
	IncludeSubdomains bool
	Seeds             []string

	Depth     int
	Headless  bool
	Headers   []string
	Proxies   []string
	Render    bool
	Timeout   int // seconds
	UserAgent string

	Concurrency    int
	Delay          int // seconds
	MaxRandomDelay int // seconds
	Parallelism    int

	Debug bool
}

type URL

type URL struct {
	Source string
	Value  string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL