Documentation ¶
Index ¶
- func ConvertToString(src string, srcCode string, tagCode string) string
- func GbkToUtf8(src string) string
- type AbstractSpider
- func (this *AbstractSpider) AddCaption(c *Caption)
- func (this *AbstractSpider) DelCaption(name string)
- func (this *AbstractSpider) GetCaption(name string) *Caption
- func (this *AbstractSpider) GetCaptionByIndex(index int) *Caption
- func (this *AbstractSpider) GetCaptionCount() int
- func (this *AbstractSpider) GetDocument(url string) *goquery.Document
- func (this *AbstractSpider) GetUrl(url string) (*html.Node, error)
- func (this *AbstractSpider) Run(f grapfunc)
- func (this *AbstractSpider) SetPath(p string)
- type Caption
- type CaptionManager
- type Collecter
- type FileCollectManager
- func (this *FileCollectManager) AddCaption(c *Caption)
- func (this *FileCollectManager) AddCollecter(key string, c Collecter)
- func (this *FileCollectManager) GetCaption(name string) *Caption
- func (this *FileCollectManager) GetFileName(name string, index int) string
- func (this *FileCollectManager) Init()
- func (this *FileCollectManager) JobHandle()
- func (this *FileCollectManager) Load()
- type WuxianSpider
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ConvertToString ¶
-------------------------字符编码转换 -----------------------------------//
Types ¶
type AbstractSpider ¶
type AbstractSpider struct { CaptionPath string //配置文件的路径 WorkPath string //抓取文件存储路径 // contains filtered or unexported fields }
标准的爬虫
func (*AbstractSpider) AddCaption ¶
func (this *AbstractSpider) AddCaption(c *Caption)
增加抓取主题(加入队列中)
func (*AbstractSpider) DelCaption ¶
func (this *AbstractSpider) DelCaption(name string)
移除抓取的主题(并不删除文件,只是不再此队列中)
func (*AbstractSpider) GetCaption ¶
func (this *AbstractSpider) GetCaption(name string) *Caption
获取可用的主题
func (*AbstractSpider) GetCaptionByIndex ¶
func (this *AbstractSpider) GetCaptionByIndex(index int) *Caption
func (*AbstractSpider) GetCaptionCount ¶
func (this *AbstractSpider) GetCaptionCount() int
func (*AbstractSpider) GetDocument ¶
func (this *AbstractSpider) GetDocument(url string) *goquery.Document
通过url获取内容,并将内容转换成goquery的document对象
func (*AbstractSpider) GetUrl ¶
func (this *AbstractSpider) GetUrl(url string) (*html.Node, error)
获取url的内容
func (*AbstractSpider) SetPath ¶
func (this *AbstractSpider) SetPath(p string)
type Caption ¶
type Caption struct { Name string //标题名称 Title string //书籍标题 Index int //当前的索引号 Enabled bool //是否启用 // Url string //对应的url IndexUrl string //索引对应的地址 Collect string //收集者 Fix string //文件后缀类型 }
标题记录
type CaptionManager ¶
type CaptionManager interface { GetCaption(name string) *Caption //获取对应索引的文件名称 GetFileName(name string, index int) string }
标题管理库
type FileCollectManager ¶
type FileCollectManager struct { Root string `Value:"collect.workpath"` // contains filtered or unexported fields }
func (*FileCollectManager) AddCaption ¶
func (this *FileCollectManager) AddCaption(c *Caption)
增加收集的主题
func (*FileCollectManager) AddCollecter ¶
func (this *FileCollectManager) AddCollecter(key string, c Collecter)
注册收集器
func (*FileCollectManager) GetCaption ¶
func (this *FileCollectManager) GetCaption(name string) *Caption
func (*FileCollectManager) GetFileName ¶
func (this *FileCollectManager) GetFileName(name string, index int) string
获取对应索引的文件名称
func (*FileCollectManager) Init ¶
func (this *FileCollectManager) Init()
func (*FileCollectManager) Load ¶
func (this *FileCollectManager) Load()
type WuxianSpider ¶
type WuxianSpider struct { Collecter *FileCollectManager `Inject:""` AbstractSpider }
无限小说抓取
func (*WuxianSpider) Init ¶
func (this *WuxianSpider) Init()
func (*WuxianSpider) Run ¶
func (this *WuxianSpider) Run()
Click to show internal directories.
Click to hide internal directories.