Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ( QueueEmpty = errors.New("Queue is empty") QueueClosed = errors.New("Queue was closed") )
View Source
var ( ERR_MANY_REDIRECT = errors.New("Many Redirect Error") ERR_TIMEOUT = errors.New("Request timed out") ERR_DATABASE = errors.New("Database returned an error") ERR_DOWNLOAD = errors.New("Failed to download a page") ERR_INTERNAL = errors.New("Occur a internal error") ERR_INVALIDURL = errors.New("URL is invalid") ERR_INVALID_ROBOTS = errors.New("Robots.txt is invalid format") ERR_NOT_HTML = errors.New("This page is not written in HTML") ERR_HTML_PARSE_ERROR = errors.New("Failed to parse HTML") )
Functions ¶
Types ¶
type CrawlQueue ¶
func NewCrawlQueue ¶
func NewCrawlQueue(duration time.Duration) *CrawlQueue
func (*CrawlQueue) Close ¶
func (q *CrawlQueue) Close()
func (*CrawlQueue) Flush ¶
func (q *CrawlQueue) Flush() []*urlparse.URL
func (CrawlQueue) Len ¶
func (q CrawlQueue) Len() int
func (CrawlQueue) Swap ¶
func (q CrawlQueue) Swap(i int, j int)
type CrawlingState ¶
type Page ¶
type Page struct { URL string `riak:"url"` ContentType string `riak:"contentType"` Body []byte `riak:"body"` RedirectTo string `riak:"redirectTo"` State CrawlingState `riak:"state"` riak.Model }
type PageStore ¶
type PageStore struct {
// contains filtered or unexported fields
}
func NewPageStore ¶
type QueueElement ¶
type QueueElement struct {
// contains filtered or unexported fields
}
Click to show internal directories.
Click to hide internal directories.