Documentation ¶
Index ¶
Constants ¶
View Source
const BooklistRe = `<a.*?href="([^"]+)" title="([^"]+)"`
Variables ¶
View Source
var BookTask = &spider.Task{ Options: spider.Options{ Name: "douban_book_list", WaitTime: 2, MaxDepth: 5, Cookie: "__Secure-3PAPISID=syRZh49k3Ly4GbH3/AOnbR3RsAI0yA8PDW; __Secure-3PSID=Twhd6jxVkHKO2ttDC0GLtozGuZdvukYa8gfvQ60Z38cf_DgXtxhCGUNKbE0uZnqBJp9INw.; 1P_JAR=2023-03-16-07; NID=511=nY--paEemdRdrxnDh45vRVS9CwdCxWrAB_WcGd881Noe38RsZgIe6O0alR-37fe7S1BmOGQhtUtBOjrlfdwJqD9GCobLvAdAZyHarnyVHoF9bITePzu_z2tgdu-82nVgorFtsksZ7lrJaS6pVljyG3wuyE-LWmV3rFnHns8cqgvEz0xnwTwgNsb-bUoJtXxHEc4kSyThUcIzPSgAIbO06Ae8-3RlViW5HT1dYKkHI1mIG-z1m9bIZNj_-NwRYljwZQs3YtOsN1xiJtFe4aXtNXiLc-1JNcHl2lHncjIRzuzrO7rTkqNwrUXJ; __Secure-3PSIDCC=AFvIBn9ifnhgfsJGfVreP-uldS_QSO5ydc1vRUCNQO4xgJgVl6UwYrCbK-CJVpXThvRKf7q3FA", Reload: true, Limiter: limiter.Multi( rate.NewLimiter(limiter.Per(1, 3*time.Second), 1), rate.NewLimiter(limiter.Per(20, 60*time.Second), 20), ), }, Rule: spider.RuleTree{ Root: func() ([]*spider.Request, error) { roots := []*spider.Request{ { Priority: 1, URL: "https://book.douban.com", Method: "GET", RuleName: "数据tag", }, } return roots, nil }, Trunk: map[string]*spider.Rule{ "数据tag": {ParseFunc: ParseTag}, "书籍列表": {ParseFunc: ParseBookList}, "书籍简介": { ItemFields: []string{ "book_name", "author", "page", "press", "score", "price", "description", }, ParseFunc: ParseBookDetail, }, }, }, }
Functions ¶
func ParseBookDetail ¶
func ParseBookDetail(ctx *spider.Context) (spider.ParseResult, error)
func ParseBookList ¶
func ParseBookList(ctx *spider.Context) (spider.ParseResult, error)
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.