htmlselector

package module
v1.6.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 26, 2020 License: MIT Imports: 4 Imported by: 2

README

go-html-selector

GoDoc Go Report Card Build Status codecov

The library that implements collecting text, specified HTML tags, and their attributes from an HTML document.

Features

  • collecting from an HTML document:
    • HTML tags;
    • HTML attributes;
    • text (optional);
  • options:
    • filters:
      • filtering a result:
        • by specified HTML tags:
          • with support of the universal tag (*);
        • by specified HTML attributes;
      • friendly representation of filters:
        • for parsing from JSON;
        • for definition as a code literal;
    • skipping empty entities (separately optional):
      • tags without attributes;
      • attributes with an empty value;
      • whitespace-only text;
  • representing a result:
    • using the builder interface for building a result;
    • built-in builders:
      • with grouping HTML attributes by their tags;
      • with collecting only values of HTML attributes;
      • with collecting only text:
        • with support of merging with other builders;
  • optimizations:
    • of searching for a right filter among others:
      • with removing duplicate attribute filters, if there are the same filters for the universal tag;
    • of conversion from a byte slice to a string;
    • by the number:
      • of memory allocations;
      • of string copies.

Installation

Prepare the directory:

$ mkdir --parents "$(go env GOPATH)/src/github.com/thewizardplusplus/"
$ cd "$(go env GOPATH)/src/github.com/thewizardplusplus/"

Clone this repository:

$ git clone https://github.com/thewizardplusplus/go-html-selector.git
$ cd go-html-selector

Install dependencies with the dep tool:

$ dep ensure -vendor-only

Examples

htmlselector.SelectTags() with the structural builder:

package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
				</video>
			</li>
			<li>
				<a href="http://example.com/2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
				</video>
			</li>
			<li>
				<a href="">3</a>
				<video src="" poster=""></video>
			</li>
			<li>
				<a>4</a>
				<video></video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyTags(),
		htmlselector.SkipEmptyAttributes(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

	// Output:
	// <a>:
	//   href="http://example.com/1"
	// <video>:
	//   src="http://example.com/1.1"
	//   poster="http://example.com/1.2"
	// <a>:
	//   href="http://example.com/2"
	// <video>:
	//   src="http://example.com/2.1"
	//   poster="http://example.com/2.2"
}

htmlselector.SelectTags() with the flatten builder:

package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
				</video>
			</li>
			<li>
				<a href="http://example.com/2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
				</video>
			</li>
			<li>
				<a href="">3</a>
				<video src="" poster=""></video>
			</li>
			<li>
				<a>4</a>
				<video></video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.FlattenBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyAttributes(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, attributeValue := range builder.AttributeValues() {
		fmt.Printf("%q\n", attributeValue)
	}

	// Output:
	// "http://example.com/1"
	// "http://example.com/1.1"
	// "http://example.com/1.2"
	// "http://example.com/2"
	// "http://example.com/2.1"
	// "http://example.com/2.2"
}

htmlselector.SelectTags() with the universal tag:

package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1" title="link #1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2"
					title="video #1">
				</video>
			</li>
			<li>
				<a href="http://example.com/2" title="link #2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2"
					title="video #2">
				</video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		htmlselector.UniversalTag: {"title", "href"},
		"video":                   {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyTags(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

	// Output:
	// <a>:
	//   href="http://example.com/1"
	//   title="link #1"
	// <video>:
	//   src="http://example.com/1.1"
	//   poster="http://example.com/1.2"
	//   title="video #1"
	// <a>:
	//   href="http://example.com/2"
	//   title="link #2"
	// <video>:
	//   src="http://example.com/2.1"
	//   poster="http://example.com/2.2"
	//   title="video #2"
}

htmlselector.SelectTags() with the text builder:

package main

import (
	"bytes"
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				link #1: <a href="http://example.com/1">one</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
					Unable to embed video #1.
				</video>
			</li>
			<li>
				link #2: <a href="http://example.com/2">two</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
					Unable to embed video #2.
				</video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	var textBuilder builders.TextBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		htmlselector.MultiBuilder{Builder: &builder, TextBuilder: &textBuilder},
		htmlselector.SkipEmptyText(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

	fmt.Println("text parts:")
	for _, textPart := range textBuilder.TextParts() {
		textPart = bytes.TrimSpace(textPart)
		fmt.Printf("  %q\n", textPart)
	}

	// Output:
	// <a>:
	//   href="http://example.com/1"
	// <video>:
	//   src="http://example.com/1.1"
	//   poster="http://example.com/1.2"
	// <a>:
	//   href="http://example.com/2"
	// <video>:
	//   src="http://example.com/2.1"
	//   poster="http://example.com/2.2"
	// text parts:
	//   "link #1:"
	//   "one"
	//   "Unable to embed video #1."
	//   "link #2:"
	//   "two"
	//   "Unable to embed video #2."
}

Benchmarks

With Structural Builder

htmlselector.SelectTags() with a simple markup:

BenchmarkSelectTags/structural_builder/simple_markup/10_tags/430B-8         	  200000	     12572 ns/op	     8124 B/op	      35 allocs/op
BenchmarkSelectTags/structural_builder/simple_markup/100_tags/4.4K-8        	   20000	     93708 ns/op	    49167 B/op	     305 allocs/op
BenchmarkSelectTags/structural_builder/simple_markup/1000_tags/45.7K-8      	    3000	    716388 ns/op	   367374 B/op	    3005 allocs/op
BenchmarkSelectTags/structural_builder/simple_markup/10000_tags/476.3K-8    	     300	   7021473 ns/op	  2772749 B/op	   30005 allocs/op
BenchmarkSelectTags/structural_builder/simple_markup/100000_tags/4.8M-8     	      20	  59393679 ns/op	  8104370 B/op	  300005 allocs/op
BenchmarkSelectTags/structural_builder/simple_markup/1000000_tags/50.3M-8   	       1	1166593360 ns/op	998999904 B/op	 3000006 allocs/op

htmlselector.SelectTags() with a complex markup:

BenchmarkSelectTags/structural_builder/complex_markup/10_tags/1020B-8       	  100000	     18614 ns/op	     8117 B/op	     106 allocs/op
BenchmarkSelectTags/structural_builder/complex_markup/100_tags/10.4K-8      	   10000	    194201 ns/op	   155986 B/op	    1006 allocs/op
BenchmarkSelectTags/structural_builder/complex_markup/1000_tags/108.8K-8    	    1000	   1725249 ns/op	   379632 B/op	   10006 allocs/op
BenchmarkSelectTags/structural_builder/complex_markup/10000_tags/1.1M-8     	     100	  23956279 ns/op	 18179332 B/op	  100006 allocs/op
BenchmarkSelectTags/structural_builder/complex_markup/100000_tags/11.6M-8   	      10	 184968462 ns/op	 38395632 B/op	 1000006 allocs/op
BenchmarkSelectTags/structural_builder/complex_markup/1000000_tags/120.6M-8 	       1	1857636429 ns/op	383995632 B/op	10000006 allocs/op

htmlselector.SelectTags() with the universal tag:

BenchmarkSelectTags/structural_builder/universal_tag/10_tags/1020B-8         	   50000	     25557 ns/op	    18370 B/op	     126 allocs/op
BenchmarkSelectTags/structural_builder/universal_tag/100_tags/10.4K-8        	   10000	    311815 ns/op	   168108 B/op	    1206 allocs/op
BenchmarkSelectTags/structural_builder/universal_tag/1000_tags/108.8K-8      	    1000	   2407620 ns/op	  1437633 B/op	   12006 allocs/op
BenchmarkSelectTags/structural_builder/universal_tag/10000_tags/1.1M-8       	     100	  22065997 ns/op	 11180076 B/op	  120006 allocs/op
BenchmarkSelectTags/structural_builder/universal_tag/100000_tags/11.6M-8     	      10	 445495615 ns/op	244945105 B/op	 1200006 allocs/op
BenchmarkSelectTags/structural_builder/universal_tag/1000000_tags/120.6M-8   	       1	2307457875 ns/op	383996112 B/op	12000006 allocs/op
With Flatten Builder

htmlselector.SelectTags() with a simple markup:

BenchmarkSelectTags/flatten_builder/simple_markup/10_tags/430B-8            	  200000	      6244 ns/op	     5971 B/op	      15 allocs/op
BenchmarkSelectTags/flatten_builder/simple_markup/100_tags/4.4K-8           	   30000	     54042 ns/op	    21038 B/op	     105 allocs/op
BenchmarkSelectTags/flatten_builder/simple_markup/1000_tags/45.7K-8         	    3000	    530844 ns/op	   191624 B/op	    1005 allocs/op
BenchmarkSelectTags/flatten_builder/simple_markup/10000_tags/476.3K-8       	     300	   5116874 ns/op	  1402546 B/op	   10005 allocs/op
BenchmarkSelectTags/flatten_builder/simple_markup/100000_tags/4.8M-8        	      20	  54404882 ns/op	 23420178 B/op	  100005 allocs/op
BenchmarkSelectTags/flatten_builder/simple_markup/1000000_tags/50.3M-8      	       2	 663629704 ns/op	284703000 B/op	 1000005 allocs/op

htmlselector.SelectTags() with a complex markup:

BenchmarkSelectTags/flatten_builder/complex_markup/10_tags/1020B-8          	  100000	     15029 ns/op	     11411 B/op	      46 allocs/op
BenchmarkSelectTags/flatten_builder/complex_markup/100_tags/10.4K-8         	   10000	    150423 ns/op	     90183 B/op	     406 allocs/op
BenchmarkSelectTags/flatten_builder/complex_markup/1000_tags/108.8K-8       	    1000	   1268568 ns/op	     74576 B/op	    4006 allocs/op
BenchmarkSelectTags/flatten_builder/complex_markup/10000_tags/1.1M-8        	     100	  19005918 ns/op	  10593772 B/op	   40006 allocs/op
BenchmarkSelectTags/flatten_builder/complex_markup/100000_tags/11.6M-8      	      10	 138218081 ns/op	   7442576 B/op	  400006 allocs/op
BenchmarkSelectTags/flatten_builder/complex_markup/1000000_tags/120.6M-8    	       1	2141494094 ns/op	1313346192 B/op	 4000007 allocs/op

htmlselector.SelectTags() with the universal tag:

BenchmarkSelectTags/flatten_builder/universal_tag/10_tags/1020B-8            	  100000	     17471 ns/op	     9823 B/op	      46 allocs/op
BenchmarkSelectTags/flatten_builder/universal_tag/100_tags/10.4K-8           	   10000	    140121 ns/op	    57790 B/op	     406 allocs/op
BenchmarkSelectTags/flatten_builder/universal_tag/1000_tags/108.8K-8         	    1000	   1577272 ns/op	   802345 B/op	    4006 allocs/op
BenchmarkSelectTags/flatten_builder/universal_tag/10000_tags/1.1M-8          	     100	  15151013 ns/op	  5776548 B/op	   40006 allocs/op
BenchmarkSelectTags/flatten_builder/universal_tag/100000_tags/11.6M-8        	      10	 164593709 ns/op	 70617641 B/op	  400006 allocs/op
BenchmarkSelectTags/flatten_builder/universal_tag/1000000_tags/120.6M-8      	       1	1620865858 ns/op	869134992 B/op	 4000007 allocs/op
With Structural & Text Builders

htmlselector.SelectTags() with a simple markup:

BenchmarkSelectTags/text_builder/simple_markup/10_tags/430B-8         	  122985	     10339 ns/op	     9466 B/op	      45 allocs/op
BenchmarkSelectTags/text_builder/simple_markup/100_tags/4.4K-8        	   15040	     78519 ns/op	    48475 B/op	     405 allocs/op
BenchmarkSelectTags/text_builder/simple_markup/1000_tags/45.7K-8      	    1990	    813642 ns/op	   510223 B/op	    4005 allocs/op
BenchmarkSelectTags/text_builder/simple_markup/10000_tags/476.3K-8    	     160	   6728975 ns/op	  3817896 B/op	   40005 allocs/op
BenchmarkSelectTags/text_builder/simple_markup/100000_tags/4.8M-8     	      19	  75982499 ns/op	 56745964 B/op	  400005 allocs/op
BenchmarkSelectTags/text_builder/simple_markup/1000000_tags/50.3M-8   	       2	 664071080 ns/op	290158432 B/op	 4000005 allocs/op

htmlselector.SelectTags() with a complex markup:

BenchmarkSelectTags/text_builder/complex_markup/10_tags/1020B-8       	   68089	     20964 ns/op	    21599 B/op	     116 allocs/op
BenchmarkSelectTags/text_builder/complex_markup/100_tags/10.4K-8      	    7014	    167202 ns/op	    41712 B/op	    1106 allocs/op
BenchmarkSelectTags/text_builder/complex_markup/1000_tags/108.8K-8    	     717	   1663950 ns/op	   387312 B/op	   11006 allocs/op
BenchmarkSelectTags/text_builder/complex_markup/10000_tags/1.1M-8     	      69	  23245678 ns/op	 27798263 B/op	  110006 allocs/op
BenchmarkSelectTags/text_builder/complex_markup/100000_tags/11.6M-8   	       6	 173781906 ns/op	 38403312 B/op	 1100006 allocs/op
BenchmarkSelectTags/text_builder/complex_markup/1000000_tags/120.6M-8 	       1	1740418254 ns/op	384003328 B/op	11000006 allocs/op

htmlselector.SelectTags() with the universal tag:

BenchmarkSelectTags/text_builder/universal_tag/10_tags/1020B-8        	   61375	     19060 ns/op	     8168 B/op	     136 allocs/op
BenchmarkSelectTags/text_builder/universal_tag/100_tags/10.4K-8       	    6638	    258572 ns/op	   407046 B/op	    1306 allocs/op
BenchmarkSelectTags/text_builder/universal_tag/1000_tags/108.8K-8     	     615	   1775232 ns/op	   387360 B/op	   13006 allocs/op
BenchmarkSelectTags/text_builder/universal_tag/10000_tags/1.1M-8      	      57	  27496277 ns/op	 43162804 B/op	  130006 allocs/op
BenchmarkSelectTags/text_builder/universal_tag/100000_tags/11.6M-8    	       6	 187202961 ns/op	 38403378 B/op	 1300006 allocs/op
BenchmarkSelectTags/text_builder/universal_tag/1000000_tags/120.6M-8  	       1	1854043284 ns/op	384003408 B/op	13000006 allocs/op

License

The MIT License (MIT)

Copyright © 2020 thewizardplusplus

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func SelectTags

func SelectTags(
	reader io.Reader,
	filters OptimizedFilterGroup,
	builder Builder,
	options ...Option,
) error

SelectTags ...

Example (WithFlattenBuilder)
package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
				</video>
			</li>
			<li>
				<a href="http://example.com/2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
				</video>
			</li>
			<li>
				<a href="">3</a>
				<video src="" poster=""></video>
			</li>
			<li>
				<a>4</a>
				<video></video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.FlattenBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyAttributes(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, attributeValue := range builder.AttributeValues() {
		fmt.Printf("%q\n", attributeValue)
	}

}
Output:

"http://example.com/1"
"http://example.com/1.1"
"http://example.com/1.2"
"http://example.com/2"
"http://example.com/2.1"
"http://example.com/2.2"
Example (WithStructuralBuilder)
package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
				</video>
			</li>
			<li>
				<a href="http://example.com/2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
				</video>
			</li>
			<li>
				<a href="">3</a>
				<video src="" poster=""></video>
			</li>
			<li>
				<a>4</a>
				<video></video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyTags(),
		htmlselector.SkipEmptyAttributes(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

}
Output:

<a>:
  href="http://example.com/1"
<video>:
  src="http://example.com/1.1"
  poster="http://example.com/1.2"
<a>:
  href="http://example.com/2"
<video>:
  src="http://example.com/2.1"
  poster="http://example.com/2.2"
Example (WithTextBuilder)
package main

import (
	"bytes"
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				link #1: <a href="http://example.com/1">one</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2">
					Unable to embed video #1.
				</video>
			</li>
			<li>
				link #2: <a href="http://example.com/2">two</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2">
					Unable to embed video #2.
				</video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		"a":     {"href"},
		"video": {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	var textBuilder builders.TextBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		htmlselector.MultiBuilder{Builder: &builder, TextBuilder: &textBuilder},
		htmlselector.SkipEmptyText(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

	fmt.Println("text parts:")
	for _, textPart := range textBuilder.TextParts() {
		textPart = bytes.TrimSpace(textPart)
		fmt.Printf("  %q\n", textPart)
	}

}
Output:

<a>:
  href="http://example.com/1"
<video>:
  src="http://example.com/1.1"
  poster="http://example.com/1.2"
<a>:
  href="http://example.com/2"
<video>:
  src="http://example.com/2.1"
  poster="http://example.com/2.2"
text parts:
  "link #1:"
  "one"
  "Unable to embed video #1."
  "link #2:"
  "two"
  "Unable to embed video #2."
Example (WithUniversalTag)
package main

import (
	"fmt"
	"log"
	"strings"

	htmlselector "github.com/thewizardplusplus/go-html-selector"
	"github.com/thewizardplusplus/go-html-selector/builders"
)

func main() {
	reader := strings.NewReader(`
		<ul>
			<li>
				<a href="http://example.com/1" title="link #1">1</a>
				<video
					src="http://example.com/1.1"
					poster="http://example.com/1.2"
					title="video #1">
				</video>
			</li>
			<li>
				<a href="http://example.com/2" title="link #2">2</a>
				<video
					src="http://example.com/2.1"
					poster="http://example.com/2.2"
					title="video #2">
				</video>
			</li>
		</ul>
	`)

	filters := htmlselector.OptimizeFilters(htmlselector.FilterGroup{
		htmlselector.UniversalTag: {"title", "href"},
		"video":                   {"src", "poster"},
	})

	var builder builders.StructuralBuilder
	err := htmlselector.SelectTags(
		reader,
		filters,
		&builder,
		htmlselector.SkipEmptyTags(),
	)
	if err != nil {
		log.Fatal(err)
	}

	for _, tag := range builder.Tags() {
		fmt.Printf("<%s>:\n", tag.Name)
		for _, attribute := range tag.Attributes {
			fmt.Printf("  %s=%q\n", attribute.Name, attribute.Value)
		}
	}

}
Output:

<a>:
  href="http://example.com/1"
  title="link #1"
<video>:
  src="http://example.com/1.1"
  poster="http://example.com/1.2"
  title="video #1"
<a>:
  href="http://example.com/2"
  title="link #2"
<video>:
  src="http://example.com/2.1"
  poster="http://example.com/2.2"
  title="video #2"

Types

type AttributeName

type AttributeName string

AttributeName ...

type Builder

type Builder interface {
	AddTag(name []byte)
	AddAttribute(name []byte, value []byte)
}

Builder ...

type FilterGroup

type FilterGroup map[TagName][]AttributeName

FilterGroup ...

type MultiBuilder added in v1.6.1

type MultiBuilder struct {
	Builder
	TextBuilder
}

MultiBuilder ...

type OptimizedAttributeFilterGroup

type OptimizedAttributeFilterGroup map[AttributeName]struct{}

OptimizedAttributeFilterGroup ...

type OptimizedFilterGroup

type OptimizedFilterGroup map[TagName]OptimizedAttributeFilterGroup

OptimizedFilterGroup ...

func OptimizeFilters

func OptimizeFilters(
	filters FilterGroup,
	options ...Option,
) OptimizedFilterGroup

OptimizeFilters ...

type Option

type Option func(config *OptionConfig)

Option ...

func SkipEmptyAttributes added in v1.6.1

func SkipEmptyAttributes() Option

SkipEmptyAttributes ...

func SkipEmptyTags

func SkipEmptyTags() Option

SkipEmptyTags ...

func SkipEmptyText added in v1.6.1

func SkipEmptyText() Option

SkipEmptyText ...

type OptionConfig

type OptionConfig struct {
	// contains filtered or unexported fields
}

OptionConfig ...

type TagName

type TagName string

TagName ...

const (
	UniversalTag TagName = "*"
)

...

type TextBuilder added in v1.6.1

type TextBuilder interface {
	AddText(text []byte)
}

TextBuilder ...

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL