htmlx

package module
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 20, 2021 License: MIT Imports: 10 Imported by: 0

README

htmlx

htmlx is a library which provides a set of extensions on go's golang.org/x/net/html library.

Go Reference

// parse html file
doc, err := htmlx.Load("test.html")
if err != nil {
    panic(err)
}

div := doc.Find(htmlx.ID("test"))
for _, a := range div.FindAll(htmlx.TagName("a")) {
    if href, ok := a.Attr("href"); ok {
        fmt.Println(href)
    }
}

err = div.SetHTML(`<em>no links</em>`)
if err != nil {
    panic(err)
}
fmt.Println("html:", div)

Documentation

Overview

Package htmlx is a library which provides a set of extensions on go's golang.org/x/net/html library.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/mdigger/htmlx"
)

func main() {
	source := `<ul id="test">
	<li><a href="test1.html">test1</a></li>
	<li><a href="test2.html">test2</a></li>
	<li><a href="test3.html">test3</a></li>
</ul>`
	doc, err := htmlx.Parse(strings.NewReader(source))
	if err != nil {
		panic(err)
	}

	ul := doc.Find(htmlx.ID("test"))
	for _, e := range ul.FindAll(htmlx.TagName("a")) {
		if href, ok := e.Attr("href"); ok {
			fmt.Println(href)
		}
	}

	err = ul.SetHTML(`<li>no links</li>`)
	if err != nil {
		panic(err)
	}

	fmt.Println(ul)
}
Output:

test1.html
test2.html
test3.html
<ul id="test"><li>no links</li></ul>

Index

Examples

Constants

View Source
const (
	AttrID    = "id"
	AttrClass = "class"
)

Predefined attribute names.

Variables

TextIgnoreAtom specifies the list of items whose contents are ignored when working with text nodes.

Functions

func AddAttrWord

func AddAttrWord(attrs []html.Attribute, key, word string) []html.Attribute

AddAttrWord add new word to attribute value.

func AttrVal

func AttrVal(attrs []html.Attribute, key string) (val string, ok bool)

AttrVal returns the attribute value with the specified key name. If the attribute is not specified, the false flag is returned by the second value.

func Find

func Find(n *html.Node, m Matcher) *html.Node

Find finds the first coincidence on the element, including himself, and returns it.

func FindAll

func FindAll(n *html.Node, m Matcher) (result []*html.Node)

FindAll finds and returns all coincidences with the specified template.

func FindNext

func FindNext(n *html.Node, m Matcher) *html.Node

FindNext finds the first siblins element.

func FindPrev

func FindPrev(n *html.Node, m Matcher) *html.Node

FindPrev finds the previous siblin element.

func HTML

func HTML(n *html.Node, self bool) (string, error)

HTML returns a string with HTML representation.

func HasAttrWord

func HasAttrWord(attrs []html.Attribute, key, word string) bool

HasAttrWord returns true if the attribute value with the specified key name and specified word in value is found.

func Remove

func Remove(n *html.Node)

Remove removes the specified element from the HTML tree.

func RemoveAttr

func RemoveAttr(attrs []html.Attribute, key string) []html.Attribute

RemoveAttr removes the attribute with the specified key name.

func RemoveChilds

func RemoveChilds(n *html.Node)

RemoveChilds removes all child elements if they are.

func Rename

func Rename(n *html.Node, name string)

Rename HTML element.

func SetAttr

func SetAttr(attrs []html.Attribute, key, val string) []html.Attribute

SetAttr set the new attribute value with the specified key name.

func SetHTML

func SetHTML(n *html.Node, data string) (err error)

SetHTML parses an HTML fragment in the context of the current element and replaces them the child elements.

func SetText

func SetText(n *html.Node, text string) error

SetText replaces the text of the element to the new one.

func Stats

func Stats(n *html.Node) (c wstat.Counter)

Stats returns statistics on the text.

func Text

func Text(n *html.Node) string

Text returns only a text representation, without HTML elements. Elements from the TextignoreAtom list are ignored with all the daughter elements.

func WriteText

func WriteText(w io.StringWriter, n *html.Node, ignore map[atom.Atom]bool) error

WriteText walk all the invested text nodes and records the text from them to the specified StringWriter. Ignore comments.

BUG: <noscript> parsed as text

Types

type Matcher

type Matcher = func(*html.Node) bool

Matcher used as synonym the functions for searching and selecting HTML elements.

func Class

func Class(name string) Matcher

Class is used to select elements with a specified style class.

func HasAttrVal

func HasAttrVal(name, value string) Matcher

HasAttrVal is used to find an element with a specified attribute value.

func ID

func ID(id string) Matcher

ID is used to find an element with a specified unique identifier.

func Tag

func Tag(a atom.Atom) Matcher

Tag used to search for items on a tag name identifier.

func TagName

func TagName(name string) Matcher

Tag used to search for items on a tag name.

type Node

type Node struct {
	*html.Node
}

Node expands html.node with additional methods.

func Get

func Get(url string) (Node, error)

Get loads and parses an HTML document at the specified url address.

func Load

func Load(path string) (Node, error)

Load loads and parses an HTML document from the file.

func New

func New(node *html.Node) Node

New warps the representation of html.node by adding it a new functionality.

func Parse

func Parse(r io.Reader) (n Node, err error)

Parse returns a parsed HTML tree representation.

func String

func String(data string) (Node, error)

String returns a parsed HTML tree representation from the string.

func (Node) AddClass

func (n Node) AddClass(name string)

AddClass adds a new style name to the element attribute list.

func (Node) Attr

func (n Node) Attr(name string) (val string, ok bool)

Attr returns the attribute value with the specified name and the flag that the attribute was specified for this item.

func (Node) Find

func (n Node) Find(m Matcher) Node

Find returns the first element that is suitable for the specified conditions.

func (Node) FindAll

func (n Node) FindAll(m Matcher) []Node

FindAll returns all the elements suitable for the specified conditions.

func (Node) FindNext

func (n Node) FindNext(m Matcher) Node

FindNext finds the first siblin element.

func (Node) FindPrev

func (n Node) FindPrev(m Matcher) Node

FindPrev finds the previous siblin element.

func (Node) FirstChild

func (n Node) FirstChild() Node

FirstChild returns the first child element.

func (Node) HasClass

func (n Node) HasClass(name string) (ok bool)

HasClass returns true if the item is specified with the specified name.

func (Node) ID

func (n Node) ID() string

ID returns the unique identifier of the element.

func (Node) InnerHTML

func (n Node) InnerHTML() (string, error)

HTML returns a string with inner HTML representation.

func (Node) IsEmpty

func (n Node) IsEmpty() bool

IsEmpty returns true if the node is not specified.

func (Node) LastChild

func (n Node) LastChild() Node

LastChild returns the last child element.

func (Node) NextSibling

func (n Node) NextSibling() Node

NextSibling returns the previous sibling element.

func (Node) OuterHTML

func (n Node) OuterHTML() (string, error)

OuterHTML returns a string with HTML representation< include self tag.

func (Node) Parent

func (n Node) Parent() Node

Parent returns the parent element.

func (Node) PrevSibling

func (n Node) PrevSibling() Node

PrevSibling returns the previous sibling element.

func (Node) Remove

func (n Node) Remove()

Remove removes the specified element from the HTML tree.

func (Node) RemoveAttr

func (n Node) RemoveAttr(name, value string)

RemoveAttr removes the attribute value with the specified name.

func (Node) RemoveChilds

func (n Node) RemoveChilds()

RemoveChilds removes all child elements if they are exists.

func (Node) Rename

func (n Node) Rename(name string)

Rename renames HTML element.

func (Node) SetAttr

func (n Node) SetAttr(name, value string)

SetAttr set the new attribute value with the specified name.

func (Node) SetHTML

func (n Node) SetHTML(data string) error

SetHTML parses an HTML fragment in the context of the current element and replaces them the child elements.

func (Node) SetText

func (n Node) SetText(text string) error

SetText replaces the text of the element to the new and removes possible child items.

func (Node) Stats

func (n Node) Stats() (c wstat.Counter)

Stats returns statistics on the text.

func (Node) String

func (n Node) String() string

String returns a string with HTML representation. Possible error is ignored.

func (Node) Text

func (n Node) Text() string

Text returns only a text representation, without HTML elements.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL