Documentation ¶
Index ¶
- Variables
- func ClearCache() error
- func ConvertResourceType(contentType string) network.ResourceType
- func Instance() *browser
- func IsValidStatus(statuscode int) bool
- func New() *browser
- func NewPagenation(tab *Tab, pagerule string, spdierrule string, data interface{}, f func() *Tab) (*pagenation, error)
- func SimpleGet(url string) (res []byte, contentType string, statuscode int, err error)
- type DocumentInfo
- type Resource
- type Tab
- func (self *Tab) Close()
- func (self *Tab) DisableCrawlResource() *resourceParams
- func (self *Tab) Evaluate(rule string, v interface{}) error
- func (self *Tab) GetAllLinks() ([]string, error)
- func (self *Tab) GetDocument() (res []byte, err error)
- func (self *Tab) GetPdfBytes(url string) ([]byte, error)
- func (self *Tab) GetSnapShot(url string) (string, error)
- func (self *Tab) Navigate(rawUrl string) (doc DocumentInfo, err error)
- func (self *Tab) NavigateEvaluate(rawUrl string, rule string, v interface{}) (err error)
- func (self *Tab) NewPagenation(pagerule string, spdierrule string, data interface{}) (page *pagenation, err error)
- func (self *Tab) SetLoadTimeOut(loadtime int) *Tab
- func (self *Tab) SetWaitTime(waittime int) *Tab
Constants ¶
This section is empty.
Variables ¶
View Source
var Crawler_CacheDirectory = ""
指定缓存目录
View Source
var Crawler_Capacity int = 10
标签页面上限
View Source
var Crawler_Headless = true
全局配置参数 无头模式
View Source
var Crawler_LoadTimeOut = 30
全局超时时间(秒)
View Source
var Crawler_WaitTime = 0
全局页面加载完成后等待时间(毫秒)
View Source
var Default_ResourceType_Allow = map[network.ResourceType]struct{}{network.ResourceTypeImage: struct{}{}, network.ResourceTypeScript: struct{}{}, network.ResourceTypeStylesheet: struct{}{}, network.ResourceTypeFont: struct{}{}}
View Source
var ERR_INVALID_RESPONSE error = errors.New("无效的响应")
View Source
var ERR_INVALID_URL error = errors.New("无效的网站")
View Source
var ERR_URL_LOAD_FAIL error = errors.New("网站加载失败")
View Source
var ERR_URL_TIMEOUT error = errors.New("网站已超时")
Functions ¶
func ConvertResourceType ¶
func ConvertResourceType(contentType string) network.ResourceType
func IsValidStatus ¶
func NewPagenation ¶
Types ¶
type DocumentInfo ¶
type Tab ¶
type Tab struct { LoadTimeOut int //秒 WaitTime int //毫秒 AcceptDialog bool //true表示在js弹出窗中按确认, false表示取消(默认) // contains filtered or unexported fields }
func (*Tab) DisableCrawlResource ¶
func (self *Tab) DisableCrawlResource() *resourceParams
func (*Tab) Navigate ¶
func (self *Tab) Navigate(rawUrl string) (doc DocumentInfo, err error)
跳转页面并获取各种页面信息
func (*Tab) NavigateEvaluate ¶
跳转一个页面,并执行脚本,返回数据给v
Click to show internal directories.
Click to hide internal directories.