Documentation ¶
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( A = Tag("A") B = Tag("b") Body = Tag("body") Div = Tag("div") Em = Tag("em") Form = Tag("form") H1 = Tag("h1") H2 = Tag("h2") Head = Tag("head") I = Tag("i") Img = Tag("img") Input = Tag("input") Label = Tag("label") Li = Tag("li") Option = Tag("option") P = Tag("p") Select = Tag("select") Span = Tag("span") Svg = Tag("svg") Table = Tag("table") Td = Tag("td") Th = Tag("th") Title = Tag("title") Tr = Tag("tr") Ul = Tag("ul") )
These variables are used to represent common tags.
var True everything
True is a special value that matches any node.
Functions ¶
This section is empty.
Types ¶
type Attributes ¶
type Attributes interface { // Range calls the provided function for each key-value pair in the Attributes // iteration stops if the function returns false for any pair. Range(func(key, value string) bool) // Get returns the value associated with the specified key and // a boolean indicating whether the key exists in the Attributes. Get(key string) (value string, exists bool) }
Attributes is an interface that describes a node's attributes with methods for getting and iterating over key-value pairs.
type Filter ¶
type Filter interface { // IsAttribute returns true if the filter represents an attribute filter. IsAttribute() bool // IsMatch returns true if the filter matches the given node. IsMatch(node Node) bool }
Filter is an interface that describes a filter that can be used to select nodes.
func Attr ¶
Attr returns a new attribute filter with the specified name and value.
Example ¶
node, err := ParseHTML(`<div data-foo="value">foo!</div>`) if err != nil { log.Fatal(err) } if nodes := node.FindAll(0, nil, Attr("data-foo", "value")); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) } node, err = ParseHTML(`<input name="email"/>`) if err != nil { log.Fatal(err) } if nodes := node.SelectAll(`[name="email"]`); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) } if nodes := node.XPath(`//*[@name="email"]`); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) }
Output: <div data-foo="value">foo!</div> <input name="email"/> <input name="email"/>
func Class ¶
Class returns a new class filter with the specified value. This filter is an attribute filter.
Example ¶
node, err := ParseHTML(`<p class="body strikeout"></p>`) if err != nil { log.Fatal(err) } if nodes := node.FindAll(0, nil, Class("body strikeout")); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) } if nodes := node.FindAll(0, nil, Class("strikeout body")); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) } if nodes := node.FindAll(0, nil, ClassStrict("body strikeout")); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) } if nodes := node.FindAll(0, nil, ClassStrict("strikeout body")); len(nodes) != 0 { log.Fatalf("expected nodes %d; got %d", 0, len(nodes)) } else { fmt.Println(nodes) }
Output: <p class="body strikeout"></p> <p class="body strikeout"></p> <p class="body strikeout"></p> []
func ClassStrict ¶
ClassStrict returns a new strict class filter with the specified string. This filter is an attribute filter.
type FindMethod ¶
type FindMethod int
FindMethod represents the method used to search for nodes in the parse tree.
const ( // Descendant represents a search for nodes that are descendants of the current node. Descendant FindMethod = iota // NoRecursive represents a search for nodes that are direct children of the current node. NoRecursive // Parent represents a search for the parent node of the current node. Parent // PrevSibling represents a search for the previous sibling node of the current node. PrevSibling // NextSibling represents a search for the next sibling node of the current node. NextSibling // Previous represents a search for the previous node in the parse tree. Previous // Next represents a search for the next node in the parse tree. Next )
type Finder ¶
type Finder interface { // Find searches for the first matched node in the parse tree based on the specified find method and filters. Find(FindMethod, TagFilter, ...Filter) Node // FindN searches for up to n nodes in the parse tree based on the specified find method and filters. FindN(FindMethod, int, TagFilter, ...Filter) []Node // FindAll searches for all nodes in the parse tree based on the specified find method and filters. FindAll(FindMethod, TagFilter, ...Filter) []Node // FindString searches for the first matched text node in the parse tree based on the specified find method and filters. FindString(FindMethod, StringFilter) TextNode // FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters. FindStringN(FindMethod, int, StringFilter) []TextNode // FindAllString searches for all text nodes in the parse tree based on the specified find method and filters. FindAllString(FindMethod, StringFilter) []TextNode // Select searches for the first matched node in the parse tree based on the css selector. // Will panics if the selector cannot be parsed. Select(string) Node // SelectAll searches for all nodes in the parse tree based on the css selector. // Will panics if the selector cannot be parsed. SelectAll(string) []Node // XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed. XPath(string) []Node // Evaluate returns the result of the xpath expression. // The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator. Evaluate(string) (any, error) }
Finder represents a set of methods for finding nodes.
type HtmlNode ¶
type HtmlNode interface { // Raw returns origin *html.Node. Raw() *html.Node // ToNode converts HtmlNode to Node. ToNode() Node // ToTextNode converts HtmlNode to TextNode. // It will panic if the node type is not text node. ToTextNode() TextNode // Type returns a NodeType. Type() html.NodeType // Data returns tag name for element node or content for text node. Data() string // Attrs returns an Attributes interface for element node. Attrs() Attributes // HasAttr return whether node has an attribute. HasAttr(string) bool // HTML renders the node's parse tree as HTML code. HTML() string // Readable renders unescaped HTML code. Readable() string // Parent returns the parent of this node. Parent() Node // FirstChild returns the first child of this node. FirstChild() Node // LastChild returns the last child of this node. LastChild() Node // PrevSibling returns the previous node that are on the same level of the parse tree. PrevSibling() Node // NextSibling returns the next node that are on the same level of the parse tree. NextSibling() Node // PrevNode returns the node that was parsed immediately before this node. PrevNode() Node // NextNode returns the node that was parsed immediately after this node. NextNode() Node // Parents iterate over all of this node's parent recursively. Parents() []Node // Children return all of this node's direct children. Children() []Node // Descendants iterate over all of this node's children recursively. Descendants() []Node // PrevSiblings return all of this node's previous nodes that are on the same level of the parse tree. PrevSiblings() []Node // NextSiblings return all of this node's next nodes that are on the same level of the parse tree. NextSiblings() []Node // PrevNodes return all of the nodes that was parsed before this node. PrevNodes() []Node // NextNodes return all of the nodes that was parsed after this node. NextNodes() []Node // Finder includes a set of find methods. Finder }
HtmlNode is an interface representing an HTML node.
Example ¶
node, err := ParseHTML("<a><b>text1</b><c>text2</c></a>") if err != nil { log.Fatal(err) } fmt.Println(node.Find(0, B).NextSibling().Readable()) fmt.Println(node.Find(0, Tag("c")).PrevSibling().Readable()) fmt.Println(node.Find(0, B).PrevSibling()) fmt.Println(node.Find(0, Tag("c")).NextSibling()) fmt.Println(node.Find(0, B).String().String()) fmt.Println(node.Find(0, B).String().NextSibling())
Output: <c>text2</c> <b>text1</b> <nil> <nil> text1 <nil>
type Node ¶
type Node interface { HtmlNode // String returns a TextNode if the node has only one child whose type is text, otherwise returns nil. String() TextNode // Strings return all of the text nodes inside this node. Strings() []TextNode // StrippedStrings return a list of strings generated by Strings, where strings consisting entirely of // whitespace are ignored, and whitespace at the beginning and end of strings is removed. StrippedStrings() []string // GetText concatenates all of the text node's content. GetText() string }
Node is an interface representing an HTML node.
func ParseWithOptions ¶
ParseWithOptions is like Parse, with options.
type StringFilter ¶
StringFilter interface extends the Filter interface and defines a method for checking if the filter represents an string filter.
func String ¶
func String[T Value](t T) StringFilter
String returns a StringFilter with the specified value.
type TagFilter ¶
TagFilter represents an interface that can be used to filter node based on node element's tag.
type Value ¶
type Value interface { // Value can be one of the following types: // - string: a simple string value // - []string: a slice of strings // - *regexp.Regexp: a regular expression // - everything: a special value that matches any node // - func(string, Node) bool: a function that takes a string and a node and returns true or false string | []string | *regexp.Regexp | everything | func(string, Node) bool }
Value is an interface that represents a value that can be used as a filter.