Documentation ¶
Index ¶
- Constants
- Variables
- type Crawler
- type DirsPolicy
- type Option
- func WithBruteMode(v bool) Option
- func WithDelay(v time.Duration) Option
- func WithDirsPolicy(v DirsPolicy) Option
- func WithExtraCookies(v []string) Option
- func WithExtraHeaders(v []string) Option
- func WithIgnored(v []string) Option
- func WithMaxCrawlDepth(v int) Option
- func WithProxyAuth(v string) Option
- func WithRobotsPolicy(v RobotsPolicy) Option
- func WithScanCSS(v bool) Option
- func WithScanJS(v bool) Option
- func WithSkipSSL(v bool) Option
- func WithTagsFilter(v []string) Option
- func WithTimeout(v time.Duration) Option
- func WithUserAgent(v string) Option
- func WithWorkersCount(v int) Option
- func WithoutHeads(v bool) Option
- type RobotsPolicy
Constants ¶
const ( // TaskDefault marks result for printing only. TaskDefault taskFlag = iota // TaskCrawl marks result as to-be-crawled. TaskCrawl // TaskDone marks result as final - crawling ends here. TaskDone )
const ( // DefaultRobotsPolicy is a default policy name for robots handling. DefaultRobotsPolicy = "ignore" // DefaultDirsPolicy is a default policy name for non-resource URLs. DefaultDirsPolicy = "show" )
Variables ¶
var ErrUnknownPolicy = errors.New("unknown policy")
ErrUnknownPolicy is returned when requested policy unknown.
Functions ¶
This section is empty.
Types ¶
type Crawler ¶
type Crawler struct {
// contains filtered or unexported fields
}
Crawler holds crawling process config and state.
func (*Crawler) DumpConfig ¶
DumpConfig returns internal config representation.
type DirsPolicy ¶
type DirsPolicy byte
DirsPolicy is a policy for non-resorce urls.
const ( // DirsShow show directories. DirsShow DirsPolicy = 0 // DirsHide hide directories from output. DirsHide DirsPolicy = 1 // DirsOnly show only directories in output. DirsOnly DirsPolicy = 2 )
func ParseDirsPolicy ¶
func ParseDirsPolicy(s string) (p DirsPolicy, err error)
ParseDirsPolicy parses dirs policy from string.
type Option ¶
type Option func(*config)
Option is a configuration func.
func WithBruteMode ¶
WithBruteMode enables "brute-mode" - html comments scan.
func WithDirsPolicy ¶
func WithDirsPolicy(v DirsPolicy) Option
WithDirsPolicy sets DirsPolicy for crawler.
func WithExtraCookies ¶
WithExtraCookies add cookies to requests.
func WithExtraHeaders ¶
WithExtraHeaders add extra HTTP headers to requests.
func WithIgnored ¶
WithSkipPatterns apply URL skip filter for crawler.
func WithMaxCrawlDepth ¶
WithMaxCrawlDepth sets maximum depth to crawl.
func WithRobotsPolicy ¶
func WithRobotsPolicy(v RobotsPolicy) Option
WithRobotsPolicy sets RobotsPolicy for crawler.
func WithSkipSSL ¶
WithSkipSSL tells crawley to skip any ssl handshake errors.
func WithTagsFilter ¶
WithTagsFilter apply tag filter for crawler.
func WithoutHeads ¶
WithoutHeads disables pre-flight HEAD requests.
type RobotsPolicy ¶
type RobotsPolicy byte
RobotsPolicy is a policy for robots.txt.
const ( // RobotsIgnore ignores robots.txt completly. RobotsIgnore RobotsPolicy = 0 // RobotsCrawl crawls urls from robots.txt, ignoring its rules. RobotsCrawl RobotsPolicy = 1 // RobotsRespect same as above, but respects given rules. RobotsRespect RobotsPolicy = 2 )
func ParseRobotsPolicy ¶
func ParseRobotsPolicy(s string) (p RobotsPolicy, err error)
ParseRobotsPolicy parses robots policy from string.