collect

package

v0.0.0-...-65d0321 Latest Latest Go to latest Published: Aug 29, 2020 License: MIT Imports: 12 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/aosfather/kingreading

Links

Open Source Insights

Documentation ¶

Index ¶

func ConvertToString(src string, srcCode string, tagCode string) string
func GbkToUtf8(src string) string
type AbstractSpider
type Caption
- func (this *Caption) Load(filename string)
- func (this *Caption) Save(path string)
type CaptionManager
type Collecter
type FileCollectManager
type WuxianSpider

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ConvertToString ¶

func ConvertToString(src string, srcCode string, tagCode string) string

-------------------------字符编码转换 -----------------------------------//

func GbkToUtf8 ¶

func GbkToUtf8(src string) string

Types ¶

type AbstractSpider ¶

type AbstractSpider struct {
	CaptionPath string //配置文件的路径
	WorkPath    string //抓取文件存储路径
	// contains filtered or unexported fields
}

标准的爬虫

func (*AbstractSpider) AddCaption ¶

func (this *AbstractSpider) AddCaption(c *Caption)

增加抓取主题(加入队列中)

func (*AbstractSpider) DelCaption ¶

func (this *AbstractSpider) DelCaption(name string)

移除抓取的主题（并不删除文件，只是不再此队列中)

func (*AbstractSpider) GetCaption ¶

func (this *AbstractSpider) GetCaption(name string) *Caption

获取可用的主题

func (*AbstractSpider) GetCaptionByIndex ¶

func (this *AbstractSpider) GetCaptionByIndex(index int) *Caption

func (*AbstractSpider) GetCaptionCount ¶

func (this *AbstractSpider) GetCaptionCount() int

func (*AbstractSpider) GetDocument ¶

func (this *AbstractSpider) GetDocument(url string) *goquery.Document

通过url获取内容，并将内容转换成goquery的document对象

func (*AbstractSpider) GetUrl ¶

func (this *AbstractSpider) GetUrl(url string) (*html.Node, error)

获取url的内容

func (*AbstractSpider) Run ¶

func (this *AbstractSpider) Run(f grapfunc)

执行抓取

func (*AbstractSpider) SetPath ¶

func (this *AbstractSpider) SetPath(p string)

type Caption ¶

type Caption struct {
	Name    string //标题名称
	Title   string //书籍标题
	Index   int    //当前的索引号
	Enabled bool   //是否启用
	//
	Url      string //对应的url
	IndexUrl string //索引对应的地址
	Collect  string //收集者
	Fix      string //文件后缀类型

}

标题记录

func (*Caption) Load ¶

func (this *Caption) Load(filename string)

从文件加载数据

func (*Caption) Save ¶

func (this *Caption) Save(path string)

保存

type CaptionManager ¶

type CaptionManager interface {
	GetCaption(name string) *Caption
	//获取对应索引的文件名称
	GetFileName(name string, index int) string
}

标题管理库

type Collecter ¶

type Collecter interface {
	AddCaption(c *Caption)           //增加抓取主题
	GetCaption(name string) *Caption //获取主题
	Run()                            //执行抓取
	SetPath(p string)                //设置工作目录
}

type FileCollectManager ¶

type FileCollectManager struct {
	Root string `Value:"collect.workpath"`
	// contains filtered or unexported fields
}

func (*FileCollectManager) AddCaption ¶

func (this *FileCollectManager) AddCaption(c *Caption)

增加收集的主题

func (*FileCollectManager) AddCollecter ¶

func (this *FileCollectManager) AddCollecter(key string, c Collecter)

注册收集器

func (*FileCollectManager) GetCaption ¶

func (this *FileCollectManager) GetCaption(name string) *Caption

func (*FileCollectManager) GetFileName ¶

func (this *FileCollectManager) GetFileName(name string, index int) string

获取对应索引的文件名称

func (*FileCollectManager) Init ¶

func (this *FileCollectManager) Init()

func (*FileCollectManager) JobHandle ¶

func (this *FileCollectManager) JobHandle()

定时收集任务处理

func (*FileCollectManager) Load ¶

func (this *FileCollectManager) Load()

type WuxianSpider ¶

type WuxianSpider struct {
	Collecter *FileCollectManager `Inject:""`
	AbstractSpider
}

无限小说抓取

func (*WuxianSpider) GrabCaption ¶

func (this *WuxianSpider) GrabCaption(c *Caption)

抓取

func (*WuxianSpider) Init ¶

func (this *WuxianSpider) Init()

func (*WuxianSpider) Run ¶

func (this *WuxianSpider) Run()

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL