douban

package
v0.0.0-...-caa0154 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2023 License: MIT Imports: 7 Imported by: 0

Documentation

Index

Constants

View Source
const BooklistRe = `<a.*?href="([^"]+)" title="([^"]+)"`

Variables

View Source
var BookTask = &spider.Task{
	Options: spider.Options{
		Name:     "douban_book_list",
		WaitTime: 2,
		MaxDepth: 5,
		Cookie:   "__Secure-3PAPISID=syRZh49k3Ly4GbH3/AOnbR3RsAI0yA8PDW; __Secure-3PSID=Twhd6jxVkHKO2ttDC0GLtozGuZdvukYa8gfvQ60Z38cf_DgXtxhCGUNKbE0uZnqBJp9INw.; 1P_JAR=2023-03-16-07; NID=511=nY--paEemdRdrxnDh45vRVS9CwdCxWrAB_WcGd881Noe38RsZgIe6O0alR-37fe7S1BmOGQhtUtBOjrlfdwJqD9GCobLvAdAZyHarnyVHoF9bITePzu_z2tgdu-82nVgorFtsksZ7lrJaS6pVljyG3wuyE-LWmV3rFnHns8cqgvEz0xnwTwgNsb-bUoJtXxHEc4kSyThUcIzPSgAIbO06Ae8-3RlViW5HT1dYKkHI1mIG-z1m9bIZNj_-NwRYljwZQs3YtOsN1xiJtFe4aXtNXiLc-1JNcHl2lHncjIRzuzrO7rTkqNwrUXJ; __Secure-3PSIDCC=AFvIBn9ifnhgfsJGfVreP-uldS_QSO5ydc1vRUCNQO4xgJgVl6UwYrCbK-CJVpXThvRKf7q3FA",
		Reload:   true,
		Limiter: limiter.Multi(
			rate.NewLimiter(limiter.Per(1, 3*time.Second), 1),
			rate.NewLimiter(limiter.Per(20, 60*time.Second), 20),
		),
	},
	Rule: spider.RuleTree{
		Root: func() ([]*spider.Request, error) {
			roots := []*spider.Request{
				{
					Priority: 1,
					URL:      "https://book.douban.com",
					Method:   "GET",
					RuleName: "数据tag",
				},
			}
			return roots, nil
		},
		Trunk: map[string]*spider.Rule{
			"数据tag": {ParseFunc: ParseTag},
			"书籍列表":  {ParseFunc: ParseBookList},
			"书籍简介": {
				ItemFields: []string{
					"book_name",
					"author",
					"page",
					"press",
					"score",
					"price",
					"description",
				},
				ParseFunc: ParseBookDetail,
			},
		},
	},
}

Functions

func ExtraString

func ExtraString(contents []byte, re *regexp.Regexp) string

func ParseBookDetail

func ParseBookDetail(ctx *spider.Context) (spider.ParseResult, error)

func ParseBookList

func ParseBookList(ctx *spider.Context) (spider.ParseResult, error)

func ParseTag

func ParseTag(ctx *spider.Context) (spider.ParseResult, error)

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL