site

package
v0.0.0-...-c266f6d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 24, 2023 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// 抓取类型
	CrawApi  = 1
	CrawHtml = 2

	// 渲染的样式
	CardText     = 0
	CardRichText = 1
	CardVideo    = 2
)
View Source
const SITE_CT = "chouti"
View Source
const SITE_GITHUB = "github"
View Source
const SITE_GUANGGU = "guanggu"
View Source
const SITE_HACKER = "hacker"
View Source
const SITE_REDDIT = "reddit"
View Source
const SITE_TIEBA = "tieba"
View Source
const SITE_V2EX = "v2ex"
View Source
const SITE_WBVIDEO = "wbvideo"
View Source
const SITE_WEIBO = "weibo"
View Source
const SITE_ZAOBAO = "zaobao"
View Source
const SITE_ZHIHU = "zhihu"

Variables

View Source
var (
	// 网站配置
	SiteMap = make(map[string]Spider)
	// 可用网站
	Avaiable = []string{
		SITE_V2EX,
		SITE_CT,
		SITE_ZHIHU,
		SITE_WEIBO,
		SITE_WBVIDEO,
		SITE_GUANGGU,
		SITE_HACKER,
		SITE_GITHUB,
		SITE_TIEBA,
		SITE_REDDIT,
		SITE_ZAOBAO,
	}
)
View Source
var ChoutiTabs = []SiteTab{
	{
		Url:  "/link/hot",
		Tag:  "hot",
		Name: "新热榜",
	},
	{
		Url:  "/top/24hr",
		Tag:  "24hr",
		Name: "24小时最热",
	},
	{
		Url:  "/top/72hr",
		Tag:  "72hr",
		Name: "3天最热",
	},
}
View Source
var GithubTabs = []SiteTab{
	{
		Tag:  "trending",
		Url:  "https://github.com/trending",
		Name: "Trending",
	},
	{
		Tag:  "trending-php",
		Url:  "https://github.com/trending/php?since=daily",
		Name: "Trending-PHP",
	},
	{
		Tag:  "trending-go",
		Url:  "https://github.com/trending/go?since=daily",
		Name: "Trending-Go",
	},
}
View Source
var GuangGuTabs = []SiteTab{
	{
		Tag:  "default",
		Name: "默认",
		Url:  "https://www.guozaoke.com/",
	},
	{
		Tag:  "latest",
		Name: "最新",
		Url:  "https://www.guozaoke.com/?tab=latest",
	},
}
View Source
var HackerTabs = []SiteTab{
	{
		Tag:  "new",
		Name: "最新",
		Url:  "https://news.ycombinator.com/",
	},
	{
		Tag:  "show",
		Name: "作品展示",
		Url:  "https://news.ycombinator.com/shownew",
	},
}
View Source
var RedditTabs = []SiteTab{
	{
		Tag:  "AskReddit",
		Name: "AskReddit",
		Url:  "AskReddit",
	},
	{
		Tag:  "Jokes",
		Name: "Jokes",
		Url:  "Jokes",
	},
	{
		Tag:  "leagueoflegends",
		Name: "lol",
		Url:  "leagueoflegends",
	},
}
View Source
var TiebaTabs = []SiteTab{
	{
		Tag:  "beiguo",
		Name: "抗压背锅吧",
		Url:  "https://tieba.baidu.com/f?ie=utf-8&kw=抗压背锅&fr=search",
	},
	{
		Tag:  "ruozhi",
		Name: "弱智吧",
		Url:  "https://tieba.baidu.com/f?ie=utf-8&kw=弱智&fr=search",
	},
}
View Source
var V2exTabs = []SiteTab{
	{
		Tag:  "all",
		Name: "全部",
	},
	{
		Tag:  "hot",
		Name: "最热",
	},
}
View Source
var WbvideoTabs = []SiteTab{
	{
		Tag:  "all",
		Url:  "https://weibo.com/tv/api/component?page=%2Ftv%2Fbillboard",
		Name: "全站",
		Args: map[string]string{
			"cid": "4418213501411061",
		},
	},
	{
		Tag:  "funny",
		Url:  "https://weibo.com/tv/api/component?page=%2Ftv%2Fbillboard%2F4418219809678869",
		Name: "搞笑幽默",
		Args: map[string]string{
			"cid": "4418219809678869",
		},
	},
}
View Source
var WeiboTabs = []SiteTab{
	{
		Tag:  "hot",
		Url:  "https://s.weibo.com/top/summary?cate=realtimehot",
		Name: "热搜",
	},
}
View Source
var ZaobaoTabs = []SiteTab{
	{
		Tag:  "focus",
		Url:  "http://www.zaobao.com/",
		Name: "今日焦点",
	},
}
View Source
var ZhihuTabs = []SiteTab{
	{
		Tag:  "all",
		Url:  "https://www.zhihu.com/hot",
		Name: "知乎热榜",
	},
}

Functions

func RegistSite

func RegistSite(name string, s Spider)

Types

type Chouti

type Chouti struct {
	Site
}

func NewChouti

func NewChouti() *Chouti

func (*Chouti) BuildUrl

func (c *Chouti) BuildUrl() ([]Link, error)

func (*Chouti) CrawPage

func (c *Chouti) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Chouti) FetchKey

func (c *Chouti) FetchKey(link string) string

func (*Chouti) GetSite

func (c *Chouti) GetSite() *Site

type ChoutiList

type ChoutiList struct {
	Data    []map[string]interface{} `json:"data"`
	Code    int                      `json:"code"`
	Success bool                     `json:"success"`
}

type Github

type Github struct {
	Site
}

func NewGithub

func NewGithub() *Github

func (*Github) BuildUrl

func (g *Github) BuildUrl() ([]Link, error)

func (*Github) CrawPage

func (g *Github) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Github) FetchKey

func (g *Github) FetchKey(link string) string

func (*Github) GetSite

func (g *Github) GetSite() *Site

type Guanggu

type Guanggu struct {
	Site
}

func NewGuanggu

func NewGuanggu() *Guanggu

func (*Guanggu) BuildUrl

func (g *Guanggu) BuildUrl() ([]Link, error)

func (*Guanggu) CrawPage

func (g *Guanggu) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Guanggu) FetchKey

func (g *Guanggu) FetchKey(link string) string

func (*Guanggu) GetSite

func (g *Guanggu) GetSite() *Site

type Hacker

type Hacker struct {
	Site
}

func NewHacker

func NewHacker() *Hacker

func (*Hacker) BuildUrl

func (h *Hacker) BuildUrl() ([]Link, error)

func (*Hacker) CrawPage

func (h *Hacker) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Hacker) FetchKey

func (h *Hacker) FetchKey(link string) string

func (*Hacker) GetSite

func (h *Hacker) GetSite() *Site

type Hot

type Hot struct {
	Key       string            `json:"key"`
	Title     string            `json:"title"`
	Desc      string            `json:"desc"`
	Rank      float64           `json:"rank"`
	OriginUrl string            `json:"origin_url"`
	Card      uint8             `json:"card_type"`
	Ext       map[string]string `json:"ext"`
}

热榜

type Link struct {
	Key    string
	Url    string
	Tag    string
	Method string
}

链接信息

type Page

type Page struct {
	Link Link

	Content string
	Doc     *goquery.Document
	Json    []map[string]interface{}

	List []Hot
	T    time.Time
}

抓取的页面信息

type Reddit

type Reddit struct {
	Site
}

func NewReddit

func NewReddit() *Reddit

func (*Reddit) BuildUrl

func (r *Reddit) BuildUrl() ([]Link, error)

func (*Reddit) CrawPage

func (r *Reddit) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Reddit) FetchKey

func (r *Reddit) FetchKey(key string) string

func (*Reddit) GetSite

func (r *Reddit) GetSite() *Site

type RedditList

type RedditList struct {
	Posts map[string]map[string]interface{} `json:"posts"`
}

type Site

type Site struct {
	Name     string
	Key      string
	Root     string
	Desc     string
	CrawType int8
	Tabs     []SiteTab
}

站点信息

func (*Site) FetchData

func (s *Site) FetchData(link Link, params map[string]string, headers map[string]string) (res Page, err error)

type SiteTab

type SiteTab struct {
	Tag  string
	Name string
	Url  string
	Args map[string]string
}

type Spider

type Spider interface {
	GetSite() *Site
	BuildUrl() ([]Link, error)
	CrawPage(link Link, headers map[string]string) (Page, error)
	FetchKey(link string) string
}

type Tieba

type Tieba struct {
	Site
}

func NewTieba

func NewTieba() *Tieba

func (*Tieba) BuildUrl

func (t *Tieba) BuildUrl() ([]Link, error)

func (*Tieba) CrawPage

func (t *Tieba) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Tieba) FetchKey

func (t *Tieba) FetchKey(link string) string

func (*Tieba) GetSite

func (t *Tieba) GetSite() *Site

type V2ex

type V2ex struct {
	Site
}

func NewV2ex

func NewV2ex() *V2ex

func (*V2ex) BuildUrl

func (v *V2ex) BuildUrl() ([]Link, error)

func (*V2ex) CrawPage

func (v *V2ex) CrawPage(link Link, headers map[string]string) (Page, error)

func (*V2ex) FetchKey

func (v *V2ex) FetchKey(link string) string

func (*V2ex) GetSite

func (v *V2ex) GetSite() *Site

type WbVideoList

type WbVideoList struct {
	Code string `json:"code"`
	Msg  string `json:"msg"`
	Data struct {
		Videos struct {
			Next int `json:"next_cursor"`
			List []struct {
				Title     string `json:"title"`
				Cover     string `json:"cover_image"`
				Id        int64  `json:"mid"`
				Oid       string `json:"oid"`
				Date      string `json:"date"`
				PlayCount string `json:"play_count"`
			} `json:"list"`
		} `json:"Component_Billboard_Billboardlist"`
	} `json:"data"`
}

type Wbvideo

type Wbvideo struct {
	Site
}

func NewWbvideo

func NewWbvideo() *Wbvideo

func (*Wbvideo) BuildUrl

func (w *Wbvideo) BuildUrl() ([]Link, error)

func (*Wbvideo) CrawPage

func (w *Wbvideo) CrawPage(link Link, headers map[string]string) (res Page, err error)

func (*Wbvideo) FetchKey

func (w *Wbvideo) FetchKey(key string) string

func (*Wbvideo) GetSite

func (w *Wbvideo) GetSite() *Site

type Weibo

type Weibo struct {
	Site
}

func NewWeibo

func NewWeibo() *Weibo

func (*Weibo) BuildUrl

func (w *Weibo) BuildUrl() ([]Link, error)

func (*Weibo) CrawPage

func (w *Weibo) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Weibo) FetchKey

func (w *Weibo) FetchKey(link string) string

func (*Weibo) GetSite

func (w *Weibo) GetSite() *Site

type Zaobao

type Zaobao struct {
	Site
}

func NewZaobao

func NewZaobao() *Zaobao

func (*Zaobao) BuildUrl

func (z *Zaobao) BuildUrl() ([]Link, error)

func (*Zaobao) CrawPage

func (z *Zaobao) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Zaobao) FetchKey

func (z *Zaobao) FetchKey(link string) string

func (*Zaobao) GetSite

func (z *Zaobao) GetSite() *Site

type Zhihu

type Zhihu struct {
	Site
}

func NewZhihu

func NewZhihu() *Zhihu

func (*Zhihu) BuildUrl

func (z *Zhihu) BuildUrl() ([]Link, error)

func (*Zhihu) CrawPage

func (z *Zhihu) CrawPage(link Link, headers map[string]string) (Page, error)

func (*Zhihu) FetchKey

func (z *Zhihu) FetchKey(link string) string

func (*Zhihu) GetSite

func (z *Zhihu) GetSite() *Site

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL