pholcus_lib

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 22, 2023 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var BaiduNews = &Spider{
	Name:        "百度RSS新闻",
	Description: "百度RSS新闻,实现轮询更新 [Auto Page] [news.baidu.com]",

	EnableCookie: false,

	Namespace: nil,

	SubNamespace: func(self *Spider, dataCell map[string]interface{}) string {
		return dataCell["Data"].(map[string]interface{})["分类"].(string)
	},
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			for k := range rss_BaiduNews {
				ctx.SetTimer(k, time.Minute*5, nil)
				ctx.Aid(map[string]interface{}{"loop": k}, "LOOP")
			}
		},

		Trunk: map[string]*Rule{
			"LOOP": {
				AidFunc: func(ctx *Context, aid map[string]interface{}) interface{} {
					k := aid["loop"].(string)
					v := rss_BaiduNews[k]

					ctx.AddQueue(&request.Request{
						Url:    v,
						Rule:   "XML列表页",
						Header: http.Header{"Content-Type": []string{"application/xml"}},
						Temp:   map[string]interface{}{"src": k},

						Reloadable: true,
					})
					return nil
				},
			},
			"XML列表页": {
				ParseFunc: func(ctx *Context) {
					var src = ctx.GetTemp("src", "").(string)
					defer func() {

						ctx.RunTimer(src)
						ctx.Aid(map[string]interface{}{"loop": src}, "LOOP")
					}()

					page := ctx.GetText()
					rss := new(BaiduNewsRss)
					if err := xml.Unmarshal([]byte(page), rss); err != nil {
						logs.Log.Error("XML列表页: %v", err)
						return
					}
					content := rss.Channel
					for _, v := range content.Item {
						ctx.AddQueue(&request.Request{
							Url:  v.Link,
							Rule: "新闻详情",
							Temp: map[string]interface{}{
								"title":       CleanHtml(v.Title, 4),
								"description": CleanHtml(v.Description, 4),
								"src":         src,
								"releaseTime": CleanHtml(v.PubDate, 4),
								"author":      CleanHtml(v.Author, 4),
							},
						})
					}
				},
			},

			"新闻详情": {

				ItemFields: []string{
					"标题",
					"描述",
					"内容",
					"发布时间",
					"分类",
					"作者",
				},
				ParseFunc: func(ctx *Context) {
					var title = ctx.GetTemp("title", "").(string)

					infoStr, isReload := baiduNewsFn.prase(ctx)
					if isReload {
						return
					}

					ctx.Output(map[int]interface{}{
						0: title,
						1: ctx.GetTemp("description", ""),
						2: infoStr,
						3: ctx.GetTemp("releaseTime", ""),
						4: ctx.GetTemp("src", ""),
						5: ctx.GetTemp("author", ""),
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

type BaiduNewsData

type BaiduNewsData struct {
	Item []BaiduNewsItem `xml:"item"`
}

type BaiduNewsItem

type BaiduNewsItem struct {
	Title       string `xml:"title"`
	Link        string `xml:"link"`
	Description string `xml:"description"`
	PubDate     string `xml:"pubDate"`
	Author      string `xml:"author"`
}

type BaiduNewsRss

type BaiduNewsRss struct {
	Channel BaiduNewsData `xml:"channel"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL