Documentation ¶
Overview ¶
The report_manager takes care of running the issue reporters against the crawled pages. There are two different types of issue reporters. On one hand there's the PageIssueReporters, which are run against single pages as they are crawled. This checks can detect issues in the headers and body of the PageReport, such as wrong headers or missing tags. On the other hand there is the MultipageIssuReporters, which can run checks that affect multiple pages, such as duplicated titles.
Index ¶
- Constants
- type Broker
- type CSVWriter
- type Container
- func (c *Container) InitConfig(configFile string)
- func (c *Container) InitCookieSession()
- func (c *Container) InitCrawlerService()
- func (c *Container) InitDB()
- func (c *Container) InitDashboardService()
- func (c *Container) InitExportService()
- func (c *Container) InitIssueService()
- func (c *Container) InitProjectService()
- func (c *Container) InitProjectViewService()
- func (c *Container) InitPubSubBroker()
- func (c *Container) InitRenderer()
- func (c *Container) InitReportManager()
- func (c *Container) InitReportService()
- func (c *Container) InitRepositories()
- func (c *Container) InitUserService()
- type CookieSession
- func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc
- func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error
- func (s *CookieSession) GetUser(c context.Context) (*models.User, bool)
- func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error
- type CookieSessionStorage
- type CrawlerManager
- type CrawlerService
- type CrawlerServiceStorage
- type CrawlerServicesContainer
- type DashboardService
- func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount
- func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount
- func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart
- func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount
- func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
- func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart
- type DashboardServiceStorage
- type ExportStorage
- type Exporter
- func (e *Exporter) ExportAudios(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportExternalLinks(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportHreflangs(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportIframes(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportImages(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportLinks(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportScripts(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportStyles(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportVideos(f io.Writer, crawl *models.Crawl)
- type IssueService
- type IssueServiceStorage
- type ProjectService
- type ProjectServiceStorage
- type ProjectViewService
- type ProjectViewServiceStorage
- type Renderer
- type RendererConfig
- type ReportManager
- func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)
- func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)
- func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)
- func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, ...)
- type ReportManagerStorage
- type ReportService
- func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport
- func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView
- func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)
- func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport
- type ReportServiceStorage
- type UserService
- type UserServiceStorage
Constants ¶
const ( UserKey contextKey = "user" SessionName string = "SESSION_ID" )
const ( MaxPageReports = 20000 // Max number of page reports that will be created LastCrawlsLimit = 5 // Max number returned by GetLastCrawls )
const ( Critical = iota + 1 Alert Warning )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Broker ¶
type Broker struct {
// contains filtered or unexported fields
}
PubSub broker service struct keeps a map of subscribers.
func NewPubSubBroker ¶
func NewPubSubBroker() *Broker
func (*Broker) NewSubscriber ¶
Returns a new subsciber to the topic.
type CSVWriter ¶
type CSVWriter struct {
// contains filtered or unexported fields
}
func NewCSVWriter ¶
func (*CSVWriter) Write ¶
func (cw *CSVWriter) Write(r *models.PageReport)
type Container ¶
type Container struct { Config *config.Config PubSubBroker *Broker IssueService *IssueService ReportService *ReportService ReportManager *ReportManager UserService *UserService DashboardService *DashboardService ProjectService *ProjectService ProjectViewService *ProjectViewService ExportService *Exporter CrawlerService *CrawlerService Renderer *Renderer CookieSession *CookieSession // contains filtered or unexported fields }
func NewContainer ¶
func (*Container) InitConfig ¶
Load config file using the parameters in configFile.
func (*Container) InitCookieSession ¶
func (c *Container) InitCookieSession()
Create cookie session handler
func (*Container) InitCrawlerService ¶
func (c *Container) InitCrawlerService()
Create Crawler service.
func (*Container) InitDashboardService ¶
func (c *Container) InitDashboardService()
Create the dashboard service.
func (*Container) InitExportService ¶
func (c *Container) InitExportService()
Create the Export service.
func (*Container) InitIssueService ¶
func (c *Container) InitIssueService()
Create the issue service.
func (*Container) InitProjectService ¶
func (c *Container) InitProjectService()
Create the Project service.
func (*Container) InitProjectViewService ¶
func (c *Container) InitProjectViewService()
Create the ProjectView service.
func (*Container) InitPubSubBroker ¶
func (c *Container) InitPubSubBroker()
Create the PubSub broker.
func (*Container) InitReportManager ¶
func (c *Container) InitReportManager()
Create the report manager and add all the available reporters.
func (*Container) InitReportService ¶
func (c *Container) InitReportService()
Create the report service.
func (*Container) InitRepositories ¶
func (c *Container) InitRepositories()
Create the data repositories.
type CookieSession ¶
type CookieSession struct {
// contains filtered or unexported fields
}
func NewCookieSession ¶
func NewCookieSession(s CookieSessionStorage) *CookieSession
func (*CookieSession) Auth ¶
func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc
requireAuth is a middleware function that wraps the provided handler function and enforces authentication. It checks if the user is authenticated based on the session data.
func (*CookieSession) DestroySession ¶
func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error
Destroys a user authentication session to deauthenticate a user.
func (*CookieSession) GetUser ¶
GetUserFromContext takes a context as input and retrieves the associated User value from it, if present.
func (*CookieSession) SetSession ¶
func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error
Sets a user authentication session with the user Id.
type CookieSessionStorage ¶
type CrawlerManager ¶
type CrawlerManager struct {
// contains filtered or unexported fields
}
func (*CrawlerManager) AddCrawler ¶
AddCrawler creates a new project crawler and adds it to the crawlers map. It returns the crawler on success otherwise it returns an error indicating the crawler already exists or there was an error creating it.
func (*CrawlerManager) RemoveCrawler ¶
func (s *CrawlerManager) RemoveCrawler(p *models.Project)
RemoveCrawler removes a project's crawler from the crawlers map.
func (*CrawlerManager) StopCrawler ¶
func (s *CrawlerManager) StopCrawler(p *models.Project)
StopCrawler stops a crawler. If the crawler does not exsit it will just return.
type CrawlerService ¶
type CrawlerService struct {
// contains filtered or unexported fields
}
func NewCrawlerService ¶
func NewCrawlerService(s CrawlerServiceStorage, services CrawlerServicesContainer) *CrawlerService
func (*CrawlerService) GetLastCrawls ¶
func (s *CrawlerService) GetLastCrawls(p models.Project) []models.Crawl
Get a slice with 'LastCrawlsLimit' number of the crawls
func (*CrawlerService) StartCrawler ¶
func (s *CrawlerService) StartCrawler(p models.Project) error
StartCrawler creates a new crawler and crawls the project's URL. It adds a new crawler for the project, it returns an error if there's one already running or if there's an error creating it. A crawl is created and it is updated with the crawler's data as urls are crawled. Finally the previous crawl's data is removed and the crawl is returned.
func (*CrawlerService) StopCrawler ¶
func (s *CrawlerService) StopCrawler(p models.Project)
Get the crawler from the crawlers map and stop it. In case the crawler is not running it just returns.
type CrawlerServiceStorage ¶
type CrawlerServiceStorage interface { SaveCrawl(models.Project) (*models.Crawl, error) GetLastCrawl(p *models.Project) models.Crawl GetLastCrawls(models.Project, int) []models.Crawl DeleteCrawlData(c *models.Crawl) CountIssuesByPriority(int64, int) int UpdateCrawl(*models.Crawl) SavePageReport(*models.PageReport, int64) (*models.PageReport, error) }
type CrawlerServicesContainer ¶
type CrawlerServicesContainer struct { Broker *Broker ReportManager *ReportManager Config *config.CrawlerConfig }
type DashboardService ¶
type DashboardService struct {
// contains filtered or unexported fields
}
func NewDashboardService ¶
func NewDashboardService(store DashboardServiceStorage) *DashboardService
func (*DashboardService) GetCanonicalCount ¶
func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount
Returns a count of PageReports that are canonical or not.
func (*DashboardService) GetImageAltCount ¶
func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount
Returns the count Images with and without the alt attribute.
func (*DashboardService) GetMediaCount ¶
func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart
Returns a Chart with the PageReport's media type chart data.
func (*DashboardService) GetSchemeCount ¶
func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount
Returns the count of PageReports with and without https.
func (*DashboardService) GetStatusCodeByDepth ¶
func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
GetStatusCodeByDepth returns a slice of StatusCodeByDepth models with the total number of pagereports by depth and status code.
func (*DashboardService) GetStatusCount ¶
func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart
Returns a Chart with the PageReport's status code chart data.
type DashboardServiceStorage ¶
type DashboardServiceStorage interface { CountByMediaType(int64) *models.CountList CountByStatusCode(int64) *models.CountList CountByCanonical(int64) int CountImagesAlt(int64) *models.AltCount CountScheme(int64) *models.SchemeCount CountByNonCanonical(int64) int GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth }
type ExportStorage ¶
type ExportStorage interface { ExportLinks(*models.Crawl) <-chan *models.ExportLink ExportExternalLinks(*models.Crawl) <-chan *models.ExportLink ExportImages(crawl *models.Crawl) <-chan *models.ExportImage ExportScripts(crawl *models.Crawl) <-chan *models.Script ExportStyles(crawl *models.Crawl) <-chan *models.Style ExportIframes(crawl *models.Crawl) <-chan *models.Iframe ExportAudios(crawl *models.Crawl) <-chan *models.Audio ExportVideos(crawl *models.Crawl) <-chan *models.Video ExportHreflangs(crawl *models.Crawl) <-chan *models.ExportHreflang }
type Exporter ¶
type Exporter struct {
// contains filtered or unexported fields
}
func NewExporter ¶
func NewExporter(s ExportStorage) *Exporter
func (*Exporter) ExportAudios ¶
Export all audio as a CSV file
func (*Exporter) ExportExternalLinks ¶
Export internal links as a CSV file
func (*Exporter) ExportHreflangs ¶
Export all hreflangs as a CSV file
func (*Exporter) ExportIframes ¶
Export all CSS styles as a CSV file
func (*Exporter) ExportImages ¶
Export all images as a CSV file
func (*Exporter) ExportLinks ¶
Export internal links as a CSV file
func (*Exporter) ExportScripts ¶
Export all scripts as a CSV file
func (*Exporter) ExportStyles ¶
Export all CSS styles as a CSV file
type IssueService ¶
type IssueService struct {
// contains filtered or unexported fields
}
func NewIssueService ¶
func NewIssueService(s IssueServiceStorage) *IssueService
func (*IssueService) GetIssuesCount ¶
func (s *IssueService) GetIssuesCount(crawlID int64) *models.IssueCount
GetIssuesCount returns an IssueCount with the number of issues by type.
func (*IssueService) GetPaginatedReportsByIssue ¶
func (s *IssueService) GetPaginatedReportsByIssue(crawlId int64, currentPage int, issueId string) (models.PaginatorView, error)
Returns a PaginatorView with the corresponding page reports.
type IssueServiceStorage ¶
type ProjectService ¶
type ProjectService struct {
// contains filtered or unexported fields
}
func NewProjectService ¶
func NewProjectService(s ProjectServiceStorage) *ProjectService
func (*ProjectService) DeleteProject ¶
func (s *ProjectService) DeleteProject(p *models.Project)
Delete a project and its related data.
func (*ProjectService) FindProject ¶
func (s *ProjectService) FindProject(id, uid int) (models.Project, error)
Return a project specified by id and user. It populates the Host field from the project's URL.
func (*ProjectService) SaveProject ¶
func (s *ProjectService) SaveProject(project *models.Project, userId int) error
SaveProject stores a new project.
func (*ProjectService) UpdateProject ¶
func (s *ProjectService) UpdateProject(p *models.Project) error
Update project details.
type ProjectServiceStorage ¶
type ProjectViewService ¶
type ProjectViewService struct {
// contains filtered or unexported fields
}
func NewProjectViewService ¶
func NewProjectViewService(s ProjectViewServiceStorage) *ProjectViewService
func (*ProjectViewService) GetProjectView ¶
func (s *ProjectViewService) GetProjectView(id, uid int) (*models.ProjectView, error)
GetProjectView returns a new ProjectView with the specified project and the project's last crawl.
func (*ProjectViewService) GetProjectViews ¶
func (s *ProjectViewService) GetProjectViews(uid int) []models.ProjectView
GetProjectViews returns a slice of ProjectViews with all of the user's projects and its last crawls.
type Renderer ¶
type Renderer struct {
// contains filtered or unexported fields
}
func NewRenderer ¶
func NewRenderer(config *RendererConfig) (*Renderer, error)
NewRenderer will load a translation file and return a new template renderer.
func (*Renderer) RenderTemplate ¶
Render a template with the specified PageView data.
type RendererConfig ¶
type ReportManager ¶
type ReportManager struct {
// contains filtered or unexported fields
}
func NewReportManager ¶
func NewReportManager(s ReportManagerStorage) *ReportManager
Create a new ReportManager with no issue reporters.
func (*ReportManager) AddMultipageReporter ¶
func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)
Add a multi-page issue reporter to the ReportManager. Multi-page reporters are used to detect issues that affect multiple pages. It will be used when creating the multi page issues once all the pages have been crawled.
func (*ReportManager) AddPageReporter ¶
func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)
Add an page issue reporter to the ReportManager. It will be used to create issues on each crawled page.
func (*ReportManager) CreateMultipageIssues ¶
func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)
CreateMultipageIssues uses the Reporters to create and save issues found in a crawl.
func (*ReportManager) CreatePageIssues ¶
func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, crawl *models.Crawl)
CreatePageIssues loops the page reporters calling the callback function and creating the issues found in the PageReport.
type ReportManagerStorage ¶
type ReportService ¶
type ReportService struct {
// contains filtered or unexported fields
}
func NewReportService ¶
func NewReportService(store ReportServiceStorage) *ReportService
func (*ReportService) GetPageReporsByIssueType ¶
func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport
Return channel of PageReports by error type.
func (*ReportService) GetPageReport ¶
func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView
Returns a PageReportView by PageReport Id and Crawl Id. It also loads the data specified in the tab paramater.
func (*ReportService) GetPaginatedReports ¶
func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)
Returns a PaginatorView with the corresponding page reports.
func (*ReportService) GetSitemapPageReports ¶
func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport
Returns a channel of crawlable PageReports that can be included in a sitemap.
type ReportServiceStorage ¶
type ReportServiceStorage interface { FindPageReportById(int) models.PageReport FindErrorTypesByPage(int, int64) []string FindInLinks(string, int64, int) []models.InternalLink FindPageReportsRedirectingToURL(string, int64, int) []models.PageReport FindAllPageReportsByCrawlIdAndErrorType(int64, string) <-chan *models.PageReport FindAllPageReportsByCrawlId(int64) <-chan *models.PageReport FindSitemapPageReports(int64) <-chan *models.PageReport FindLinks(pageReport *models.PageReport, cid int64, page int) []models.InternalLink FindExternalLinks(pageReport *models.PageReport, cid int64, p int) []models.Link FindPaginatedPageReports(cid int64, p int, term string) []models.PageReport FindPageReportStyles(pageReport *models.PageReport, cid int64) []string FindPageReportScripts(pageReport *models.PageReport, cid int64) []string FindPageReportVideos(pageReport *models.PageReport, cid int64) []string FindPageReportAudios(pageReport *models.PageReport, cid int64) []string FindPageReportIframes(pageReport *models.PageReport, cid int64) []string FindPageReportImages(pageReport *models.PageReport, cid int64) []models.Image FindPageReportHreflangs(pageReport *models.PageReport, cid int64) []models.Hreflang GetNumberOfPagesForPageReport(cid int64, term string) int GetNumberOfPagesForInlinks(*models.PageReport, int64) int GetNumberOfPagesForRedirecting(*models.PageReport, int64) int GetNumberOfPagesForLinks(*models.PageReport, int64) int GetNumberOfPagesForExternalLinks(pageReport *models.PageReport, cid int64) int }
type UserService ¶
type UserService struct {
// contains filtered or unexported fields
}
func NewUserService ¶
func NewUserService(s UserServiceStorage) *UserService
func (*UserService) DeleteUser ¶
func (s *UserService) DeleteUser(user *models.User)
Delete a User and all its associated projects and crawl data.
func (*UserService) SignIn ¶
func (s *UserService) SignIn(email, password string) (*models.User, error)
SignIn validates the provided email and password combination for user authentication. It compares the provided password with the user's hashed password. If the passwords do not match, it returns an error.
func (*UserService) SignUp ¶
func (s *UserService) SignUp(email, password string) (*models.User, error)
SignUp validates the user email and password, if they are both valid creates a password hash before storing it. If the storage is succesful it returns the new user.
func (*UserService) UpdatePassword ¶
func (s *UserService) UpdatePassword(email, password string) error
UpdatePassword updates the password for the user with the given email. It validates the new password and generates a hashed password using bcrypt before storing it.
type UserServiceStorage ¶
type UserServiceStorage interface { UserSignup(string, string) (*models.User, error) FindUserByEmail(string) (*models.User, error) UserUpdatePassword(email, hashedPassword string) error DeleteUser(*models.User) error DisableUser(*models.User) error DeleteProjectCrawls(*models.Project) DeleteProject(*models.Project) FindProjectsByUser(uid int) []models.Project }