html

package module
v1.0.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 12, 2023 License: MIT Imports: 11 Imported by: 5

README

HTML

test Coverage Status

Simple HTML parser and data fetcher library written on Golang under MIT License.

Require

  • Golang (version >= 1.12)
  • golang.org/x/net

Install

go get github.com/wmentor/html

Usage

Fetch data from URL
package main

import (
  "fmt"
  "time"

  "github.com/wmentor/html"
)

func main() {

  src := "https://edition.cnn.com"

  parser := html.New()

  opts := &html.GetOpts{
    Agent:"Mozilla/5.0 (compatible; MSIE 10.0)",
    Timeout: time.Second*60,
  }

  parser.Get(src,opts)
  fmt.Println( string(parser.Text()) )

  parser.EachLink(func(link string) {
    fmt.Println("url=" + link)
  } )

  parser.EachImage(func(link string) {
    fmt.Println("img=" + link)
  } )

  parser.EachIframe(func(link string) {
    fmt.Println("iframe=" + link)
  } )
}
Fetch data from file/stdin
package main

import (
  "fmt"
  "os"

  "github.com/wmentor/html"
)

func main() {

  parser := html.New()

  parser.Parse(os.Stdin) // io.Reader
  fmt.Println( string(parser.Text()) )

  parser.EachLink(func(link string) {
    fmt.Println("url=" + link)
  } )

  parser.EachImage(func(link string) {
    fmt.Println("img=" + link)
  } )

  parser.EachIframe(func(link string) {
    fmt.Println("iframe=" + link)
  } )
}

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrGetFailed error
)

Functions

This section is empty.

Types

type GetOpts

type GetOpts struct {
	Timeout  time.Duration     // request timeout
	Agent    string            // user agent
	Headers  map[string]string // request header
	NoDecode bool              // decode to utf-8 if charset is not utf-8
}

type HTML

type HTML struct {
	// contains filtered or unexported fields
}

func New

func New() *HTML

func (*HTML) EachIframe

func (h *HTML) EachIframe(callback func(string))

func (*HTML) EachImage

func (h *HTML) EachImage(callback func(string))
func (h *HTML) EachLink(callback func(string))

func (*HTML) Get

func (h *HTML) Get(pageUrl string, opts *GetOpts) error

func (*HTML) Parse

func (h *HTML) Parse(r io.Reader)

func (*HTML) ParseString

func (h *HTML) ParseString(str string)

func (*HTML) ResetUrl

func (h *HTML) ResetUrl()

func (*HTML) SetUrl

func (h *HTML) SetUrl(pageUrl string) error

func (*HTML) Text

func (h *HTML) Text() []byte

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL