internal

package
v0.0.0-...-decc0ca Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 6, 2023 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Crawl

func Crawl()

func CreateConfigFile

func CreateConfigFile(cfg CrawlerVars)

func WriteReports

func WriteReports(failRep bool, contRep bool, csvRep bool, htmlRep bool) error

Types

type ContentEl

type ContentEl struct {
	Element DomNode `json:"element"`
}

type CrawlerVars

type CrawlerVars struct {
	StarterURL     string   `yaml:"starterURL"`
	AllowedDomains []string `yaml:"allowedDomains"`
	Selectors      struct {
		GetContent []string `yaml:"get-content"`
		CheckLinks []string `yaml:"check-links"`
	} `yaml:"selectors"`
	Colly struct {
		MaxDepth         int  `yaml:"maxDepth"`
		Async            bool `yaml:"async"`
		ParallelRequests int  `yaml:"parallelRequests"`
	} `yaml:"Colly"`
}

type DomNode

type DomNode struct {
	Tag     string `json:"tag"`
	Text    string `json:"text"`
	Classes string `json:"classes"`
	ID      string `json:"id"`
	Inner   string `json:"innerHtml"`
}

type Html

type Html struct {
	Links   []Link      `json:"links"`
	Content []ContentEl `json:"content"`
}

type HtmlReport

type HtmlReport struct {
	Pages    []Page     `json:"pages"`
	Failures [][]string `json:"failures"`
	Content  [][]string `json:"content"`
}

func GetHtmlReport

func GetHtmlReport() *HtmlReport
type Link struct {
	Element DomNode `json:"element"`
	Href    string  `json:"href"`
}

type Page

type Page struct {
	Origin   string          `json:"origin"`
	OriginEl DomNode         `json:"originEl"`
	Url      string          `json:"url"`
	Html     Html            `json:"html"`
	Response *colly.Response `json:"response"`
}

type Reporter

type Reporter struct {
	Pages []Page `json:"allPages"`
}

func InitReporter

func InitReporter() *Reporter

func ReadReport

func ReadReport() (*Reporter, error)

func (*Reporter) AddContentToPage

func (r *Reporter) AddContentToPage(pageURL string, el DomNode) *ContentEl

func (*Reporter) AddLinkToPage

func (r *Reporter) AddLinkToPage(pageURL string, el DomNode, link string) *Link

func (*Reporter) AddPage

func (r *Reporter) AddPage(org string, orgEl DomNode, url string, html Html, res *colly.Response) *Page

func (*Reporter) WriteReport

func (r *Reporter) WriteReport() error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL