core

package
v1.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 19, 2023 License: GPL-3.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	GeneralRegex = `((?:https?)://[\w\-]+(?:\.[\w\-]+)+[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])`
)

Functions

This section is empty.

Types

type Cache

type Cache struct {
	Visited map[string]bool
	Lock    sync.Mutex
}

func (*Cache) AddVisited

func (c *Cache) AddVisited(url string)

func (*Cache) Flush

func (c *Cache) Flush()

func (*Cache) IsVisited

func (c *Cache) IsVisited(url string) bool

type Crawler

type Crawler struct {
	RootURL        string
	Level          int
	LiveMode       bool
	ExportFile     string
	RegexMap       map[string]string
	ExcludedStatus []int
	IncludedUrls   []string
	Client         *http.Client
	Cache          Cache
}

func NewCrawler

func NewCrawler(url string, level int, liveMode bool, exportFile string, regexMap map[string]string, statusResponses []int, includes []string) *Crawler

func (*Crawler) AddMatches

func (c *Crawler) AddMatches(page webtree.Page)

func (*Crawler) Crawl

func (c *Crawler) Crawl()

func (*Crawler) CrawlNodeBlock

func (c *Crawler) CrawlNodeBlock(w *webtree.Node)

func (*Crawler) CrawlNodeLive

func (c *Crawler) CrawlNodeLive(w *webtree.Node)

func (*Crawler) Export

func (c *Crawler) Export(tree webtree.Node, format string, filename string) error

func (*Crawler) ExportJSON

func (c *Crawler) ExportJSON(root webtree.Node, filename string) error

func (*Crawler) ExportTXT

func (c *Crawler) ExportTXT(root webtree.Node, filename string) error

func (*Crawler) ExportXML

func (c *Crawler) ExportXML(tree webtree.Node, filename string) error
func (c *Crawler) ExtractLinks(page *webtree.Page) (links []string)

func (*Crawler) Fetch

func (c *Crawler) Fetch(page *webtree.Page)

func (*Crawler) IsSkipablePage

func (c *Crawler) IsSkipablePage(page webtree.Page) bool

func (*Crawler) SaveResults

func (c *Crawler) SaveResults(root webtree.Node)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL