goscraper

package module
v0.0.0-...-44a43d8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 18, 2021 License: MIT Imports: 10 Imported by: 0

README

goscraper

Golang package to quickly return a preview of a webpage, you can get easily its title, description & images

Usage

func main() {
	s, err := goscraper.Scrape("https://www.w3.org/", 5)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Printf("Icon : %s\n", s.Preview.Icon)
	fmt.Printf("Name : %s\n", s.Preview.Name)
	fmt.Printf("Title : %s\n", s.Preview.Title)
	fmt.Printf("Description : %s\n", s.Preview.Description)
	fmt.Printf("Image: %s\n", s.Preview.Images[0])
	fmt.Printf("Url : %s\n", s.Preview.Link)
}

output:

Icon : https://www.w3.org/favicon.ico
Name : www.w3.org
Title : World Wide Web Consortium (W3C)
Description : The World Wide Web Consortium (W3C) is an international community where Member organizations, a full-time staff, and the public work together to develop Web standards.
Image: https://www.w3.org/2008/site/images/logo-w3c-mobile-lg
Url : https://www.w3.org/

License

Goscraper is licensed under the MIT License.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	EscapedFragment string = "_escaped_fragment_="
)

Functions

This section is empty.

Types

type Document

type Document struct {
	Body      bytes.Buffer
	Preview   DocumentPreview
	ResHeader ResHeaders
}

func Scrape

func Scrape(uri string, maxRedirect int, options ScraperOptions) (*Document, error)

type DocumentPreview

type DocumentPreview struct {
	Icon        string
	Name        string
	Title       string
	Description string
	Type        string
	Images      []string
	Link        string
}

type ResHeaders

type ResHeaders struct {
	ContentType string
}

type ScrapeBuilder

type ScrapeBuilder interface {
	SetUserAgent(string) ScrapeBuilder
	SetMaxDocumentLength(int64) ScrapeBuilder
	SetUrl(string) ScrapeBuilder
	SetMaxRedirect(int) ScrapeBuilder
	Build() (ScrapeService, error)
	SetMaxTokenDepth(int) ScrapeBuilder
}

func NewScrapeBuilder

func NewScrapeBuilder() ScrapeBuilder

type ScrapeService

type ScrapeService interface {
	Scrape() (*Document, error)
	GetDocument() (*Document, error)
	ParseDocument(doc *Document) (*Document, error)
}

type Scraper

type Scraper struct {
	Url                *url.URL
	EscapedFragmentUrl *url.URL
	MaxRedirect        int
	Options            ScraperOptions
}

func (*Scraper) GetDocument

func (scraper *Scraper) GetDocument() (*Document, error)

func (*Scraper) ParseDocument

func (scraper *Scraper) ParseDocument(doc *Document) (*Document, error)

func (*Scraper) Scrape

func (scraper *Scraper) Scrape() (*Document, error)

type ScraperOptions

type ScraperOptions struct {
	MaxDocumentLength int64
	UserAgent         string
	MaxTokenDepth     int
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL