services

package
v0.0.0-...-eb77424 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 25, 2024 License: MIT Imports: 33 Imported by: 0

Documentation

Overview

The report_manager takes care of running the issue reporters against the crawled pages. There are two different types of issue reporters. On one hand there's the PageIssueReporters, which are run against single pages as they are crawled. This checks can detect issues in the headers and body of the PageReport, such as wrong headers or missing tags. On the other hand there is the MultipageIssuReporters, which can run checks that affect multiple pages, such as duplicated titles.

Index

Constants

View Source
const (
	UserKey     contextKey = "user"
	SessionName string     = "SESSION_ID"
)
View Source
const (
	MaxPageReports  = 20000 // Max number of page reports that will be created
	LastCrawlsLimit = 5     // Max number returned by GetLastCrawls
)
View Source
const (
	Critical = iota + 1
	Alert
	Warning
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Broker

type Broker struct {
	// contains filtered or unexported fields
}

PubSub broker service struct keeps a map of subscribers.

func NewPubSubBroker

func NewPubSubBroker() *Broker

func (*Broker) NewSubscriber

func (b *Broker) NewSubscriber(topic string, c func(*models.Message) error) *subscriber

Returns a new subsciber to the topic.

func (*Broker) Publish

func (b *Broker) Publish(topic string, m *models.Message)

Publishes a message to all subscribers of a topic.

func (*Broker) Unsubscribe

func (b *Broker) Unsubscribe(s *subscriber)

Unsubscribes a subscriber.

type CSVWriter

type CSVWriter struct {
	// contains filtered or unexported fields
}

func NewCSVWriter

func NewCSVWriter(f io.Writer) *CSVWriter

func (*CSVWriter) Write

func (cw *CSVWriter) Write(r *models.PageReport)

type Container

type Container struct {
	Config             *config.Config
	PubSubBroker       *Broker
	IssueService       *IssueService
	ReportService      *ReportService
	ReportManager      *ReportManager
	UserService        *UserService
	DashboardService   *DashboardService
	ProjectService     *ProjectService
	ProjectViewService *ProjectViewService
	ExportService      *Exporter
	CrawlerService     *CrawlerService
	Renderer           *Renderer
	CookieSession      *CookieSession
	// contains filtered or unexported fields
}

func NewContainer

func NewContainer(configFile string) *Container

func (*Container) InitConfig

func (c *Container) InitConfig(configFile string)

Load config file using the parameters in configFile.

func (*Container) InitCookieSession

func (c *Container) InitCookieSession()

Create cookie session handler

func (*Container) InitCrawlerService

func (c *Container) InitCrawlerService()

Create Crawler service.

func (*Container) InitDB

func (c *Container) InitDB()

Create the sql database connection.

func (*Container) InitDashboardService

func (c *Container) InitDashboardService()

Create the dashboard service.

func (*Container) InitExportService

func (c *Container) InitExportService()

Create the Export service.

func (*Container) InitIssueService

func (c *Container) InitIssueService()

Create the issue service.

func (*Container) InitProjectService

func (c *Container) InitProjectService()

Create the Project service.

func (*Container) InitProjectViewService

func (c *Container) InitProjectViewService()

Create the ProjectView service.

func (*Container) InitPubSubBroker

func (c *Container) InitPubSubBroker()

Create the PubSub broker.

func (*Container) InitRenderer

func (c *Container) InitRenderer()

Create html renderer.

func (*Container) InitReportManager

func (c *Container) InitReportManager()

Create the report manager and add all the available reporters.

func (*Container) InitReportService

func (c *Container) InitReportService()

Create the report service.

func (*Container) InitRepositories

func (c *Container) InitRepositories()

Create the data repositories.

func (*Container) InitUserService

func (c *Container) InitUserService()

Create the user service.

type CookieSession

type CookieSession struct {
	// contains filtered or unexported fields
}

func NewCookieSession

func NewCookieSession(s CookieSessionStorage) *CookieSession

func (*CookieSession) Auth

func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc

requireAuth is a middleware function that wraps the provided handler function and enforces authentication. It checks if the user is authenticated based on the session data.

func (*CookieSession) DestroySession

func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error

Destroys a user authentication session to deauthenticate a user.

func (*CookieSession) GetUser

func (s *CookieSession) GetUser(c context.Context) (*models.User, bool)

GetUserFromContext takes a context as input and retrieves the associated User value from it, if present.

func (*CookieSession) SetSession

func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error

Sets a user authentication session with the user Id.

type CookieSessionStorage

type CookieSessionStorage interface {
	FindUserByEmail(email string) (*models.User, error)
}

type CrawlerManager

type CrawlerManager struct {
	// contains filtered or unexported fields
}

func (*CrawlerManager) AddCrawler

func (s *CrawlerManager) AddCrawler(p *models.Project) (*crawler.Crawler, error)

AddCrawler creates a new project crawler and adds it to the crawlers map. It returns the crawler on success otherwise it returns an error indicating the crawler already exists or there was an error creating it.

func (*CrawlerManager) RemoveCrawler

func (s *CrawlerManager) RemoveCrawler(p *models.Project)

RemoveCrawler removes a project's crawler from the crawlers map.

func (*CrawlerManager) StopCrawler

func (s *CrawlerManager) StopCrawler(p *models.Project)

StopCrawler stops a crawler. If the crawler does not exsit it will just return.

type CrawlerService

type CrawlerService struct {
	// contains filtered or unexported fields
}

func (*CrawlerService) GetLastCrawls

func (s *CrawlerService) GetLastCrawls(p models.Project) []models.Crawl

Get a slice with 'LastCrawlsLimit' number of the crawls

func (*CrawlerService) StartCrawler

func (s *CrawlerService) StartCrawler(p models.Project) error

StartCrawler creates a new crawler and crawls the project's URL. It adds a new crawler for the project, it returns an error if there's one already running or if there's an error creating it. A crawl is created and it is updated with the crawler's data as urls are crawled. Finally the previous crawl's data is removed and the crawl is returned.

func (*CrawlerService) StopCrawler

func (s *CrawlerService) StopCrawler(p models.Project)

Get the crawler from the crawlers map and stop it. In case the crawler is not running it just returns.

type CrawlerServiceStorage

type CrawlerServiceStorage interface {
	SaveCrawl(models.Project) (*models.Crawl, error)
	GetLastCrawl(p *models.Project) models.Crawl
	GetLastCrawls(models.Project, int) []models.Crawl
	DeleteCrawlData(c *models.Crawl)

	CountIssuesByPriority(int64, int) int
	UpdateCrawl(*models.Crawl)

	SavePageReport(*models.PageReport, int64) (*models.PageReport, error)
}

type CrawlerServicesContainer

type CrawlerServicesContainer struct {
	Broker        *Broker
	ReportManager *ReportManager
	Config        *config.CrawlerConfig
}

type DashboardService

type DashboardService struct {
	// contains filtered or unexported fields
}

func NewDashboardService

func NewDashboardService(store DashboardServiceStorage) *DashboardService

func (*DashboardService) GetCanonicalCount

func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount

Returns a count of PageReports that are canonical or not.

func (*DashboardService) GetImageAltCount

func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount

Returns the count Images with and without the alt attribute.

func (*DashboardService) GetMediaCount

func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart

Returns a Chart with the PageReport's media type chart data.

func (*DashboardService) GetSchemeCount

func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount

Returns the count of PageReports with and without https.

func (*DashboardService) GetStatusCodeByDepth

func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth

GetStatusCodeByDepth returns a slice of StatusCodeByDepth models with the total number of pagereports by depth and status code.

func (*DashboardService) GetStatusCount

func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart

Returns a Chart with the PageReport's status code chart data.

type DashboardServiceStorage

type DashboardServiceStorage interface {
	CountByMediaType(int64) *models.CountList
	CountByStatusCode(int64) *models.CountList

	CountByCanonical(int64) int
	CountImagesAlt(int64) *models.AltCount
	CountScheme(int64) *models.SchemeCount
	CountByNonCanonical(int64) int
	GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
}

type ExportStorage

type ExportStorage interface {
	ExportLinks(*models.Crawl) <-chan *models.ExportLink
	ExportExternalLinks(*models.Crawl) <-chan *models.ExportLink
	ExportImages(crawl *models.Crawl) <-chan *models.ExportImage
	ExportScripts(crawl *models.Crawl) <-chan *models.Script
	ExportStyles(crawl *models.Crawl) <-chan *models.Style
	ExportIframes(crawl *models.Crawl) <-chan *models.Iframe
	ExportAudios(crawl *models.Crawl) <-chan *models.Audio
	ExportVideos(crawl *models.Crawl) <-chan *models.Video
	ExportHreflangs(crawl *models.Crawl) <-chan *models.ExportHreflang
}

type Exporter

type Exporter struct {
	// contains filtered or unexported fields
}

func NewExporter

func NewExporter(s ExportStorage) *Exporter

func (*Exporter) ExportAudios

func (e *Exporter) ExportAudios(f io.Writer, crawl *models.Crawl)

Export all audio as a CSV file

func (e *Exporter) ExportExternalLinks(f io.Writer, crawl *models.Crawl)

Export internal links as a CSV file

func (*Exporter) ExportHreflangs

func (e *Exporter) ExportHreflangs(f io.Writer, crawl *models.Crawl)

Export all hreflangs as a CSV file

func (*Exporter) ExportIframes

func (e *Exporter) ExportIframes(f io.Writer, crawl *models.Crawl)

Export all CSS styles as a CSV file

func (*Exporter) ExportImages

func (e *Exporter) ExportImages(f io.Writer, crawl *models.Crawl)

Export all images as a CSV file

func (e *Exporter) ExportLinks(f io.Writer, crawl *models.Crawl)

Export internal links as a CSV file

func (*Exporter) ExportScripts

func (e *Exporter) ExportScripts(f io.Writer, crawl *models.Crawl)

Export all scripts as a CSV file

func (*Exporter) ExportStyles

func (e *Exporter) ExportStyles(f io.Writer, crawl *models.Crawl)

Export all CSS styles as a CSV file

func (*Exporter) ExportVideos

func (e *Exporter) ExportVideos(f io.Writer, crawl *models.Crawl)

Export all video as a CSV file

type IssueService

type IssueService struct {
	// contains filtered or unexported fields
}

func NewIssueService

func NewIssueService(s IssueServiceStorage) *IssueService

func (*IssueService) GetIssuesCount

func (s *IssueService) GetIssuesCount(crawlID int64) *models.IssueCount

GetIssuesCount returns an IssueCount with the number of issues by type.

func (*IssueService) GetPaginatedReportsByIssue

func (s *IssueService) GetPaginatedReportsByIssue(crawlId int64, currentPage int, issueId string) (models.PaginatorView, error)

Returns a PaginatorView with the corresponding page reports.

type IssueServiceStorage

type IssueServiceStorage interface {
	GetNumberOfPagesForIssues(int64, string) int
	FindPageReportIssues(int64, int, string) []models.PageReport
	FindIssuesByTypeAndPriority(int64, int) []models.IssueGroup
}

type ProjectService

type ProjectService struct {
	// contains filtered or unexported fields
}

func NewProjectService

func NewProjectService(s ProjectServiceStorage) *ProjectService

func (*ProjectService) DeleteProject

func (s *ProjectService) DeleteProject(p *models.Project)

Delete a project and its related data.

func (*ProjectService) FindProject

func (s *ProjectService) FindProject(id, uid int) (models.Project, error)

Return a project specified by id and user. It populates the Host field from the project's URL.

func (*ProjectService) SaveProject

func (s *ProjectService) SaveProject(project *models.Project, userId int) error

SaveProject stores a new project.

func (*ProjectService) UpdateProject

func (s *ProjectService) UpdateProject(p *models.Project) error

Update project details.

type ProjectServiceStorage

type ProjectServiceStorage interface {
	SaveProject(*models.Project, int)
	DeleteProject(*models.Project)
	DisableProject(*models.Project)
	UpdateProject(p *models.Project) error
	FindProjectById(id int, uid int) (models.Project, error)

	DeleteProjectCrawls(*models.Project)
}

type ProjectViewService

type ProjectViewService struct {
	// contains filtered or unexported fields
}

func (*ProjectViewService) GetProjectView

func (s *ProjectViewService) GetProjectView(id, uid int) (*models.ProjectView, error)

GetProjectView returns a new ProjectView with the specified project and the project's last crawl.

func (*ProjectViewService) GetProjectViews

func (s *ProjectViewService) GetProjectViews(uid int) []models.ProjectView

GetProjectViews returns a slice of ProjectViews with all of the user's projects and its last crawls.

type ProjectViewServiceStorage

type ProjectViewServiceStorage interface {
	FindProjectsByUser(int) []models.Project
	FindProjectById(id int, uid int) (models.Project, error)

	GetLastCrawl(*models.Project) models.Crawl
}

type Renderer

type Renderer struct {
	// contains filtered or unexported fields
}

func NewRenderer

func NewRenderer(config *RendererConfig) (*Renderer, error)

NewRenderer will load a translation file and return a new template renderer.

func (*Renderer) RenderTemplate

func (r *Renderer) RenderTemplate(w io.Writer, t string, v interface{})

Render a template with the specified PageView data.

func (*Renderer) ToKByte

func (r *Renderer) ToKByte(b int) string

Returns an int formated as KB.

type RendererConfig

type RendererConfig struct {
	TemplatesFolder  string
	TranslationsFile string
}

type ReportManager

type ReportManager struct {
	// contains filtered or unexported fields
}

func NewReportManager

func NewReportManager(s ReportManagerStorage) *ReportManager

Create a new ReportManager with no issue reporters.

func (*ReportManager) AddMultipageReporter

func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)

Add a multi-page issue reporter to the ReportManager. Multi-page reporters are used to detect issues that affect multiple pages. It will be used when creating the multi page issues once all the pages have been crawled.

func (*ReportManager) AddPageReporter

func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)

Add an page issue reporter to the ReportManager. It will be used to create issues on each crawled page.

func (*ReportManager) CreateMultipageIssues

func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)

CreateMultipageIssues uses the Reporters to create and save issues found in a crawl.

func (*ReportManager) CreatePageIssues

func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, crawl *models.Crawl)

CreatePageIssues loops the page reporters calling the callback function and creating the issues found in the PageReport.

type ReportManagerStorage

type ReportManagerStorage interface {
	SaveIssues(<-chan *models.Issue)
}

type ReportService

type ReportService struct {
	// contains filtered or unexported fields
}

func NewReportService

func NewReportService(store ReportServiceStorage) *ReportService

func (*ReportService) GetPageReporsByIssueType

func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport

Return channel of PageReports by error type.

func (*ReportService) GetPageReport

func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView

Returns a PageReportView by PageReport Id and Crawl Id. It also loads the data specified in the tab paramater.

func (*ReportService) GetPaginatedReports

func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)

Returns a PaginatorView with the corresponding page reports.

func (*ReportService) GetSitemapPageReports

func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport

Returns a channel of crawlable PageReports that can be included in a sitemap.

type ReportServiceStorage

type ReportServiceStorage interface {
	FindPageReportById(int) models.PageReport
	FindErrorTypesByPage(int, int64) []string
	FindInLinks(string, int64, int) []models.InternalLink
	FindPageReportsRedirectingToURL(string, int64, int) []models.PageReport
	FindAllPageReportsByCrawlIdAndErrorType(int64, string) <-chan *models.PageReport
	FindAllPageReportsByCrawlId(int64) <-chan *models.PageReport
	FindSitemapPageReports(int64) <-chan *models.PageReport
	FindLinks(pageReport *models.PageReport, cid int64, page int) []models.InternalLink
	FindExternalLinks(pageReport *models.PageReport, cid int64, p int) []models.Link
	FindPaginatedPageReports(cid int64, p int, term string) []models.PageReport

	FindPageReportStyles(pageReport *models.PageReport, cid int64) []string
	FindPageReportScripts(pageReport *models.PageReport, cid int64) []string
	FindPageReportVideos(pageReport *models.PageReport, cid int64) []string
	FindPageReportAudios(pageReport *models.PageReport, cid int64) []string
	FindPageReportIframes(pageReport *models.PageReport, cid int64) []string
	FindPageReportImages(pageReport *models.PageReport, cid int64) []models.Image
	FindPageReportHreflangs(pageReport *models.PageReport, cid int64) []models.Hreflang

	GetNumberOfPagesForPageReport(cid int64, term string) int
	GetNumberOfPagesForInlinks(*models.PageReport, int64) int
	GetNumberOfPagesForRedirecting(*models.PageReport, int64) int
	GetNumberOfPagesForLinks(*models.PageReport, int64) int
	GetNumberOfPagesForExternalLinks(pageReport *models.PageReport, cid int64) int
}

type UserService

type UserService struct {
	// contains filtered or unexported fields
}

func NewUserService

func NewUserService(s UserServiceStorage) *UserService

func (*UserService) DeleteUser

func (s *UserService) DeleteUser(user *models.User)

Delete a User and all its associated projects and crawl data.

func (*UserService) SignIn

func (s *UserService) SignIn(email, password string) (*models.User, error)

SignIn validates the provided email and password combination for user authentication. It compares the provided password with the user's hashed password. If the passwords do not match, it returns an error.

func (*UserService) SignUp

func (s *UserService) SignUp(email, password string) (*models.User, error)

SignUp validates the user email and password, if they are both valid creates a password hash before storing it. If the storage is succesful it returns the new user.

func (*UserService) UpdatePassword

func (s *UserService) UpdatePassword(email, password string) error

UpdatePassword updates the password for the user with the given email. It validates the new password and generates a hashed password using bcrypt before storing it.

type UserServiceStorage

type UserServiceStorage interface {
	UserSignup(string, string) (*models.User, error)
	FindUserByEmail(string) (*models.User, error)
	UserUpdatePassword(email, hashedPassword string) error
	DeleteUser(*models.User) error
	DisableUser(*models.User) error

	DeleteProjectCrawls(*models.Project)

	DeleteProject(*models.Project)
	FindProjectsByUser(uid int) []models.Project
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL