Documentation ¶
Index ¶
- Variables
- func GetSelenium() (selenium.WebDriver, error)
- func LoadConfig() error
- type Config
- type Factory
- type FactoryRegistry
- type ListCrawler
- type MatchType
- type NotificationSender
- type Offer
- type PageCrawler
- type SitesProcessor
- func (s *SitesProcessor) GetProcessingState() (admin.ProcessorState, error)
- func (s *SitesProcessor) MapOfferToDB(offer *Offer, url string) *model.Offer
- func (s *SitesProcessor) Process() error
- func (s *SitesProcessor) ProcessSite(ctx context.Context, wd selenium.WebDriver, url string) error
- func (s *SitesProcessor) ProcessSiteList(ctx context.Context, wd selenium.WebDriver, url string) error
- func (s *SitesProcessor) Run() error
- func (s *SitesProcessor) RunProcessing() error
- func (s *SitesProcessor) StopProcessing() error
Constants ¶
This section is empty.
Variables ¶
View Source
var AlreadyRunningError = errors.New("processor already running")
View Source
var (
ConfigNotLoadedError = errors.New("config not loaded")
)
View Source
var CrawlerNotFoundError = errors.New("crawler not found")
View Source
var NotRunningError = errors.New("processor not running")
View Source
var NotStartedError = errors.New("processor not started")
Functions ¶
func GetSelenium ¶
func LoadConfig ¶
func LoadConfig() error
Types ¶
type Config ¶
type Config struct { DatabaseUrl string `mapstructure:"DATABASE_URL"` DatabaseName string `mapstructure:"DATABASE_NAME"` CrawlerPeriod time.Duration `mapstructure:"CRAWLER_PERIOD"` CrawlerPagesCount int `mapstructure:"CRAWLER_PAGES_COUNT"` SeleniumUrl string `mapstructure:"SELENIUM_URL"` ServerEnabled bool `mapstructure:"SERVER_ENABLED"` ServerPort int `mapstructure:"SERVER_PORT"` ServerIp string `mapstructure:"SERVER_IP"` }
type Factory ¶
type Factory interface { // NewPageCrawler should return struct implementing PageCrawler interface NewPageCrawler() PageCrawler // NewListCrawler should return struct implementing ListCrawler interface NewListCrawler() ListCrawler // MatchUrl should return MatchType for the given url // CrawlerMatchPage if the given url is supported by NewPageCrawler // CrawlerMatchList if the given url is supported by NewListCrawler // CrawlerNotMatch if the given url is not supported MatchUrl(url string) MatchType }
Factory is an interface for creating new crawlers and determining if the given url is supported
type FactoryRegistry ¶
type FactoryRegistry struct {
// contains filtered or unexported fields
}
func NewCrawlerFactoryRegistry ¶
func NewCrawlerFactoryRegistry() *FactoryRegistry
func (*FactoryRegistry) GetCrawler ¶
func (r *FactoryRegistry) GetCrawler(url string) (PageCrawler, ListCrawler)
func (*FactoryRegistry) Register ¶
func (r *FactoryRegistry) Register(factory Factory)
type ListCrawler ¶
type ListCrawler interface { // GetUrls should return list of urls from the given url or error GetUrls(wd selenium.WebDriver, url string) ([]string, error) // NextPage should return next page url or error NextPage(wd selenium.WebDriver, url string) (string, error) }
ListCrawler is an interface for crawling list of offers
type NotificationSender ¶
type PageCrawler ¶
type PageCrawler interface { // CrawlOffer should return Offer struct with all the data from the given url or error CrawlOffer(wd selenium.WebDriver, url string) (*Offer, error) }
PageCrawler is an interface for crawling single offer page
type SitesProcessor ¶
type SitesProcessor struct {
// contains filtered or unexported fields
}
func NewSitesProcessor ¶
func NewSitesProcessor(ctx context.Context, registry *FactoryRegistry, sender NotificationSender, watchUrlRepo db.WatchUrlRepository, offerRepo db.OfferRepository) *SitesProcessor
func (*SitesProcessor) GetProcessingState ¶
func (s *SitesProcessor) GetProcessingState() (admin.ProcessorState, error)
func (*SitesProcessor) MapOfferToDB ¶
func (s *SitesProcessor) MapOfferToDB(offer *Offer, url string) *model.Offer
func (*SitesProcessor) Process ¶
func (s *SitesProcessor) Process() error
Process runs the processor once
func (*SitesProcessor) ProcessSite ¶
func (*SitesProcessor) ProcessSiteList ¶
func (*SitesProcessor) Run ¶
func (s *SitesProcessor) Run() error
Run starts the processor with periodic jobs
func (*SitesProcessor) RunProcessing ¶
func (s *SitesProcessor) RunProcessing() error
func (*SitesProcessor) StopProcessing ¶
func (s *SitesProcessor) StopProcessing() error
Click to show internal directories.
Click to hide internal directories.