adblock

package
v0.0.0-...-edfb97a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 28, 2017 License: MIT Imports: 8 Imported by: 16

Documentation

Overview

Package implements a parser and a matcher for AdBlockPlus rules.

The syntax of AdBlockPlus rules is partially defined in https://adblockplus.org/en/filter-cheatsheet and https://adblockplus.org/en/filters.

To parse rules and build a matcher:

matcher := adblock.NewMatcher()
fp, err := os.Open("easylist.txt")
...
rules, err := adblock.ParseRules(fp)
for _, rule := range rules {
	err = matcher.AddRule(rule, 0)
	...
}

To match HTTP requests:

host := r.URL.Host
if host == "" {
	host = r.Host
}
rq := adblock.Request{
	URL: r.URL.String(),
	Domain: host,
	// possibly fill OriginDomain from Referrer header
	// and ContentType from HTTP response Content-Type.
	Timeout: 200 * time.Millisecond,
}
matched, id, err := matcher.Match(rq)
if err != nil {
	...
}
if matched {
	// Use the rule identifier to print which rules was matched
}

Index

Constants

View Source
const (
	Exact        = iota // string to match
	Wildcard     = iota // *
	Separator    = iota // ^
	StartAnchor  = iota // |
	DomainAnchor = iota // ||

	Root      = iota
	Substring = iota // Wildcard + Exact
)

Variables

View Source
var (
	NullOpts = RuleOpts{}
)

Functions

This section is empty.

Types

type InterruptedError

type InterruptedError struct {
	Duration time.Duration
	Rule     string
}

func (*InterruptedError) Error

func (e *InterruptedError) Error() string

type Request

type Request struct {
	// URL is matched against rule parts. Mandatory.
	URL string
	// Domain is matched against optional domain or third-party rules
	Domain string
	// ContentType is matched against optional content rules. This
	// information is often available only in client responses. Filters
	// may be applied twice, once at request time, once at response time.
	ContentType string
	// OriginDomain is matched against optional third-party rules.
	OriginDomain string

	// Timeout is the maximum amount of time a single matching can take.
	Timeout   time.Duration
	CheckFreq int

	// GenericBlock is true if rules not matching a specific domain are to be
	// ignored. If nil, the matcher will determine it internally based on
	// $genericblock options.
	GenericBlock *bool
}

Request defines client request properties to be matched against a set of rules.

func (*Request) HasGenericBlock

func (rq *Request) HasGenericBlock() bool

type Rule

type Rule struct {
	// The original string representation
	Raw string
	// Exception is true for exclusion rules (prefixed with "@@")
	Exception bool
	// Parts is the sequence of RulePart matching URLs
	Parts []RulePart
	// Opts are optional rules applied to content
	Opts RuleOpts
}

Rule represents a complete adblockplus rule.

func ParseRule

func ParseRule(s string) (*Rule, error)

ParseRule parses a single rule.

func ParseRules

func ParseRules(r io.Reader) ([]*Rule, error)

ParseRules returns the sequence of rules extracted from supplied reader content.

func (*Rule) HasContentOpts

func (r *Rule) HasContentOpts() bool

func (*Rule) HasUnsupportedOpts

func (r *Rule) HasUnsupportedOpts() bool

type RuleMatcher

type RuleMatcher struct {
	// contains filtered or unexported fields
}

RuleMatcher implements a complete set of include and exclude AdblockPlus rules.

func NewMatcher

func NewMatcher() *RuleMatcher

NewMatcher returns a new empty matcher.

func NewMatcherFromFiles

func NewMatcherFromFiles(paths ...string) (*RuleMatcher, int, error)

func (*RuleMatcher) AddRule

func (m *RuleMatcher) AddRule(rule *Rule, ruleId int) error

AddRule adds a rule to the matcher. Supplied rule identifier will be returned by Match().

func (*RuleMatcher) Match

func (m *RuleMatcher) Match(rq *Request) (bool, int, error)

Match applies include and exclude rules on supplied request. If the request is accepted, it returns true and the matching rule identifier.

func (*RuleMatcher) String

func (m *RuleMatcher) String() string

String returns a textual representation of the include and exclude rules, matching request with or without content.

type RuleOpts

type RuleOpts struct {
	Raw              string
	Collapse         *bool
	Document         bool
	Domains          []string
	ElemHide         bool
	Font             *bool
	GenericBlock     bool
	GenericHide      bool
	Image            *bool
	Media            *bool
	Object           *bool
	ObjectSubRequest *bool
	Other            *bool
	Ping             *bool
	Popup            *bool
	Script           *bool
	Stylesheet       *bool
	SubDocument      *bool
	ThirdParty       *bool
	Websocket        *bool
	WebRTC           *bool
	XmlHttpRequest   *bool
}

RuleOpts defines custom rules applied to content once the URL part has been matched by the RuleParts.

func NewRuleOpts

func NewRuleOpts(s string) (RuleOpts, error)

NewRuleOpts parses the rule part following the '$' separator and return content matching options.

type RulePart

type RulePart struct {
	// Rule type, like Exact, Wildcard, etc.
	Type int
	// Rule part string representation
	Value string
}

RulePart is the base component of rules. It represents a single matching element, like an exact match, a wildcard, a domain anchor...

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL