grabber

package module
v0.0.0-...-51ac1af Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 20, 2024 License: MIT Imports: 10 Imported by: 0

README

Head Grabber

Installation

go get -u github.com/gowool/grabber

License

Distributed under MIT License, please see license file within the code for more details.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	Client         = http.DefaultClient
	DefaultHeaders = map[string]string{
		"Accept":          "text/html",
		"Accept-Encoding": "gzip",
		"User-Agent":      "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/111.0",
	}
)
View Source
var DateFormats = []string{}/* 126 elements not displayed */

DateFormats taken from github.com/mjibson/goread

View Source
var DateFormatsWithNamedZone = []string{
	"Mon, January 02, 2006, 15:04:05 MST",
	"Mon, January 02, 2006 15:04:05 MST",
	"Mon, Jan 2, 2006 15:04 MST",
	"Mon, Jan 2 2006 15:04 MST",
	"Mon, Jan 2, 2006 15:04:05 MST",
	"Mon Jan 2 15:04:05 2006 MST",
	"Mon, Jan 02,2006 15:04:05 MST",
	"Monday, January 2, 2006 15:04:05 MST",
	"Monday, 2 January 2006 15:04:05 MST",
	"Monday, 2 Jan 2006 15:04:05 MST",
	"Monday, 02 January 2006 15:04:05 MST",
	"Mon, 2 January 2006 15:04 MST",
	"Mon, 2 January 2006, 15:04:05 MST",
	"Mon, 2 January 2006 15:04:05 MST",
	"Mon, 2 Jan 2006 15:4:5 MST",
	"Mon, 2 Jan 2006 15:04 MST",
	"Mon, 2 Jan 2006 15:04:05MST",
	"Mon, 2 Jan 2006 15:04:05 MST",
	"Mon 2 Jan 2006 15:04:05 MST",
	"mon,2 Jan 2006 15:04:05 MST",
	"Mon, 2 Jan 15:04:05 MST",
	"Mon, 2 Jan 06 15:04:05 MST",
	"Mon,02 January 2006 14:04:05 MST",
	"Mon, 02 Jan 2006 3:04:05 PM MST",
	"Mon,02 Jan 2006 15:04 MST",
	"Mon, 02 Jan 2006 15:04 MST",
	"Mon, 02 Jan 2006, 15:04:05 MST",
	"Mon, 02 Jan 2006 15:04:05MST",
	"Mon, 02 Jan 2006 15:04:05 MST",
	"Mon , 02 Jan 2006 15:04:05 MST",
	"Mon, 02 Jan 06 15:04:05 MST",
	"January 2, 2006 15:04:05 MST",
	"January 02, 2006 15:04:05 MST",
	"Jan 2, 2006 3:04:05 PM MST",
	"Jan 2, 2006 15:04:05 MST",
	"2 January 2006 15:04:05 MST",
	"2 Jan 2006 15:04:05 MST",
	"2006-01-02 15:04:05 MST",
	"1/2/2006 3:04:05 PM MST",
	"1/2/2006 15:04:05 MST",
	"02 Jan 2006 15:04 MST",
	"02 Jan 2006 15:04:05 MST",
	"02/01/2006 15:04 MST",
	"02-01-2006 15:04:05 MST",
	"01/02/2006 15:04:05 MST",
}

DateFormatsWithNamedZone Named zone cannot be consistently loaded, so handle separately

Functions

func NewRequest

func NewRequest(url string) (*http.Request, error)

func ParseDate

func ParseDate(ds string) (t time.Time, err error)

ParseDate parses a given date string using a large list of commonly found feed date formats.

func ParseDateP

func ParseDateP(ds string) *time.Time

Types

type Article

type Article struct {
	PublishedTime *time.Time `json:"published_time,omitempty"`
	ModifiedTime  *time.Time `json:"modified_time,omitempty"`
	Publisher     string     `json:"publisher,omitempty"`
	Author        string     `json:"author,omitempty"`
	Section       []string   `json:"section,omitempty"`
}

type Audio

type Audio struct {
	URL       string `json:"url,omitempty"`
	SecureURL string `json:"secure_url,omitempty"`
	Type      string `json:"type,omitempty"` // Content-Type
}

Audio represents a structure of "og:audio". "og:audio" might have following properties:

  • og:audio:url
  • og:audio:secure_url
  • og:audio:type

type Image

type Image struct {
	URL       string `json:"url,omitempty"`
	SecureURL string `json:"secure_url,omitempty"`
	Type      string `json:"type,omitempty"` // Content-Type
	Width     int    `json:"width,omitempty"`
	Height    int    `json:"height,omitempty"`
	Alt       string `json:"alt,omitempty"`
}

Image represents a structure of "og:image". "og:image" might have following properties:

  • og:image:url
  • og:image:secure_url
  • og:image:type
  • og:image:width
  • og:image:height
  • og:image:alt
type Link struct {
	Rel  string
	Ref  string
	Href string
}

func LinkTag

func LinkTag(attrs []html.Attribute) *Link

LinkTag constructs LinkTag.

func (*Link) Contribute

func (link *Link) Contribute(p *Page)

func (*Link) IsIcon

func (link *Link) IsIcon() bool

type Meta

type Meta struct {
	Name     string
	Property string
	Content  string
}

func MetaTag

func MetaTag(attrs []html.Attribute) *Meta

MetaTag constructs MetaTag.

func (*Meta) Contribute

func (meta *Meta) Contribute(p *Page) (err error)

func (*Meta) IsArticleAuthor

func (meta *Meta) IsArticleAuthor() bool

IsArticleAuthor returns if it can be "article:author"

func (*Meta) IsArticleModifiedTime

func (meta *Meta) IsArticleModifiedTime() bool

IsArticleModifiedTime returns if it can be "article:modified_time"

func (*Meta) IsArticlePublishedTime

func (meta *Meta) IsArticlePublishedTime() bool

IsArticlePublishedTime returns if it can be "article:published_time"

func (*Meta) IsArticlePublisher

func (meta *Meta) IsArticlePublisher() bool

IsArticlePublisher returns if it can be "article:publisher"

func (*Meta) IsArticleSection

func (meta *Meta) IsArticleSection() bool

IsArticleSection returns if it can be "article:section"

func (*Meta) IsAuthor

func (meta *Meta) IsAuthor() bool

IsAuthor returns if it can be "author".

func (*Meta) IsDescription

func (meta *Meta) IsDescription() bool

IsDescription returns if it can be "description".

func (*Meta) IsKeywords

func (meta *Meta) IsKeywords() bool

IsKeywords returns if it can be "keywords".

func (*Meta) IsOGAudio

func (meta *Meta) IsOGAudio() bool

IsOGAudio reeturns if it can be a root of "og:audio"

func (*Meta) IsOGDescription

func (meta *Meta) IsOGDescription() bool

IsOGDescription returns if it can be "description" of OGP

func (*Meta) IsOGImage

func (meta *Meta) IsOGImage() bool

IsOGImage returns if it can be a root of "og:image"

func (*Meta) IsOGLocale

func (meta *Meta) IsOGLocale() bool

IsOGLocale returns if it can be "og:locale"

func (*Meta) IsOGSiteName

func (meta *Meta) IsOGSiteName() bool

IsOGSiteName returns if it can be "og:site_name"

func (*Meta) IsOGTitle

func (meta *Meta) IsOGTitle() bool

IsOGTitle returns if it can be "title" of OGP

func (*Meta) IsOGType

func (meta *Meta) IsOGType() bool

IsOGType returns if it can be "og:type"

func (*Meta) IsOGURL

func (meta *Meta) IsOGURL() bool

IsOGURL returns if it can be "og:url"

func (*Meta) IsOGUpdatedTime

func (meta *Meta) IsOGUpdatedTime() bool

IsOGUpdatedTime returns if it can be "og:updated_time"

func (*Meta) IsOGVideo

func (meta *Meta) IsOGVideo() bool

IsOGVideo returns if it can be a root of "og:video"

func (*Meta) IsPropertyOf

func (meta *Meta) IsPropertyOf(name string) bool

IsPropertyOf returns if it can be a property of specified struct

type OpenGraph

type OpenGraph struct {
	Title       string     `json:"title,omitempty"`
	Type        string     `json:"type,omitempty"`
	URL         string     `json:"url,omitempty"`
	Description string     `json:"description,omitempty"`
	Locale      string     `json:"locale,omitempty"`
	SiteName    string     `json:"site_name,omitempty"`
	UpdatedTime *time.Time `json:"updated_time,omitempty"`
	Video       []Video    `json:"video,omitempty"`
	Image       []Image    `json:"image,omitempty"`
	Audio       []Audio    `json:"audio,omitempty"`
}

type Page

type Page struct {
	URL         string     `json:"url,omitempty"`
	Title       string     `json:"title,omitempty"`
	Description string     `json:"description,omitempty"`
	Keywords    string     `json:"keywords,omitempty"`
	Author      string     `json:"author,omitempty"`
	Favicon     []string   `json:"favicon,omitempty"`
	OpenGraph   *OpenGraph `json:"open_graph,omitempty"`
	Article     *Article   `json:"article,omitempty"`
}

func Do

func Do(req *http.Request) (*Page, error)

func NewPage

func NewPage(url string) *Page

func (*Page) Parse

func (p *Page) Parse(body io.Reader) error

Parse parses http.Response.Body and construct Page informations. Caller should close body after it gets parsed.

func (*Page) ToAbs

func (p *Page) ToAbs() error

ToAbs makes all relative URLs to absolute URLs

type Video

type Video struct {
	URL       string `json:"url,omitempty"`
	SecureURL string `json:"secure_url,omitempty"`
	Type      string `json:"type,omitempty"` // Content-Type
	Width     int    `json:"width,omitempty"`
	Height    int    `json:"height,omitempty"`
	// Duration in seconds
	Duration int      `json:"duration,omitempty"`
	Tag      []string `json:"tag,omitempty"`
}

Video represents a structure of "og:video". "og:video" might have following properties:

  • og:video:url
  • og:video:secure_url
  • og:video:type
  • og:video:width
  • og:video:height
  • og:video:tag

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL