resource

package
v0.8.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 5, 2024 License: GPL-3.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CanonicalURL skippable = "canonical_url"
	ContentText  skippable = "content_text"
	OriginalURL  skippable = "original_url"
	FetchTime    skippable = "fetch_time"
	TTL          skippable = "ttl"
)

Variables

View Source
var (
	ErrNoTTL   = errors.New("TTL not set")
	DefaultTTL = 30 * 24 * time.Hour
)

Functions

func CleanURL

func CleanURL(url *nurl.URL) *nurl.URL

Types

type Feed

type Feed struct {
	gofeed.Feed
}
func (f Feed) ItemLinks() []string

type FetchMethod added in v0.8.0

type FetchMethod int
const (
	Client FetchMethod = iota
	Headless
)

func (FetchMethod) String added in v0.8.0

func (f FetchMethod) String() string

type WebPage

type WebPage struct {
	RequestedURL *nurl.URL     `json:"-"` // The page that was actually fetched
	CanonicalURL *nurl.URL     `json:"-"`
	OriginalURL  string        `json:"original_url,omitempty"` // The canonical URL of the page
	TTL          time.Duration `json:"-"`                      // Time to live for the resource
	FetchTime    *time.Time    `json:"fetch_time,omitempty"`   // When the returned source was fetched
	FetchMethod  FetchMethod   `json:"fetch_method,omitempty"` // Method used to fetch the page
	Hostname     string        `json:"hostname,omitempty"`     // Hostname of the page
	StatusCode   int           `json:"status_code,omitempty"`  // HTTP status code
	Error        error         `json:"error,omitempty"`
	Title        string        `json:"title,omitempty"`        // Title of the page
	Description  string        `json:"description,omitempty"`  // Description of the page
	Sitename     string        `json:"sitename,omitempty"`     // Name of the site
	Authors      []string      `json:"authors,omitempty"`      // Authors of the page
	Date         *time.Time    `json:"date,omitempty"`         // Date of the page
	Categories   []string      `json:"categories,omitempty"`   // Categories of the page
	Tags         []string      `json:"tags,omitempty"`         // Tags of the page
	Language     string        `json:"language,omitempty"`     // Language of the page
	Image        string        `json:"image,omitempty"`        // Image of the page
	PageType     string        `json:"page_type,omitempty"`    // Type of the page
	License      string        `json:"license,omitempty"`      // License of the page
	ID           string        `json:"id,omitempty"`           // ID of the page
	Fingerprint  string        `json:"fingerprint,omitempty"`  // Fingerprint of the page
	ContentText  string        `json:"content_text,omitempty"` // Error that occurred during fetching
	// contains filtered or unexported fields
}

Represents a web page that was fetched, including metadata from the page itself, text content, and information about the fetch operation.

func NewWebPage added in v0.8.0

func NewWebPage(url nurl.URL) *WebPage

func (*WebPage) ClearSkipWhenMarshaling added in v0.8.0

func (r *WebPage) ClearSkipWhenMarshaling()

func (WebPage) ExpireTime added in v0.8.0

func (r WebPage) ExpireTime() (time.Time, error)

func (WebPage) MarshalJSON

func (r WebPage) MarshalJSON() ([]byte, error)

func (*WebPage) MergeTrafilaturaResult added in v0.8.0

func (r *WebPage) MergeTrafilaturaResult(tr *trafilatura.ExtractResult)

func (*WebPage) SkipWhenMarshaling added in v0.8.0

func (r *WebPage) SkipWhenMarshaling(skip ...skippable)

func (*WebPage) UnmarshalJSON added in v0.7.0

func (r *WebPage) UnmarshalJSON(data []byte) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL