htmlcheck

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 26, 2023 License: GPL-3.0 Imports: 10 Imported by: 0

README

htmlcheck

simple, fast and easy HTML validator in Go


About The Project

htmlcheck is a lightweight and efficient Golang package designed to simplify the validation of HTML content in your Go applications. Whether you're working with complete HTML documents or HTML snippets, this package provides a straightforward interface to check the validity of your markup.

You can specify valid HTML tags, their attributes, and permissible attribute values, providing a comprehensive solution for HTML validation.

This package is a clone of htmlcheck package which has not been maintained for a long time

Table of Contents

Installation

go get github.com/anilsenay/htmlcheck

Usage

Explain how users can import and use your package in their Go projects. Include code snippets to demonstrate the basic usage.

package main

import (
	"fmt"
	"github.com/anilsenay/htmlcheck"
)

func main() {
	validator := htmlcheck.Validator{}

	validator.AddValidTag(htmlcheck.ValidTag{
		Name:  "a",
		Attrs: []htmlcheck.Attribute{
			{Name: "id"},
			{Name: "href", Value: &htmlcheck.AttributeValue{
				// valid regex for href attribute value
				Regex: "^(http(s|))://.*"
			}},
			{Name: "target", Value: &htmlcheck.AttributeValue{
				// valid values for target attribute value
				List: []string{"_target", "_blank"}
			}},
		},
		IsSelfClosing: false,
	})

	html := "<a href='http://hello.world'>Hello, World!</a>"
	errors := htmlcheck.Validate(html)
	if len(errors) == 0 {
		fmt.Println("HTML is not valid.")
	} else {
		fmt.Println("HTML is valid!")
	}
}

Examples

package main

import (
	"fmt"
	"github.com/anilsenay/htmlcheck"
)

func main() {
	validator := htmlcheck.Validator{}

	validLink := htmlcheck.ValidTag{
		Name:  "a",
		Attrs: []htmlcheck.Attribute{
			{Name: "id"},
			{Name: "href", Value: &htmlcheck.AttributeValue{Regex: "^(http(s|))://.*"}}, // valid regex for href attribute value
			{Name: "target", Value: &htmlcheck.AttributeValue{List: []string{"_target", "_blank"}}}, // valid values for target attribute value
		},
		IsSelfClosing: false,
	}

	validator.AddValidTag(validLink)

	// first check
	err := validator.ValidateHtmlString("<a href='http://google.com'>m</a>").Join()
	if err == nil {
		fmt.Println("ok")
	} else {
		fmt.Println(err)
	}

	// second check
	// notice the missing / in the second <a>:
	errors := validator.ValidateHtmlString("<a href='http://google.com'>m<a>")
	if len(errors) == 0 {
		fmt.Println("ok")
	} else {
		fmt.Println(errors)
	}
}

output:

ok
tag 'a' is not properly closed

Documentation

Error Handling

Validation function returns a slice of errors ([]error) as type of ValidationErrors.

errors := validator.ValidateHtmlString("<a href='http://google.com'>m<a>")
if len(errors) > 0 {
	fmt.Println("NOT valid")
}

You can join all errors as one by using Join() function:

err := validator.ValidateHtmlString("<a href='http://google.com'>m<a>").Join()
if err != nil {
	fmt.Println("NOT valid")
}
Stop after first error

It will return after an error occurs

validator := htmlcheck.Validator{
	StopAfterFirstError: true,
}
errors := validator.ValidateHtmlString("<a href='http://google.com'>m<a>")
if len(errors) > 0 {
	err := errors[0] // its the only error
}
Check error types

You can check type of errors:

Example
var err error
for _, e := range validationErrors {
  switch v := e.(type) {
  case htmlcheck.ErrInvAttribute:
    err = errors.Join(err, fmt.Errorf("inv attr: '%s'", v.AttributeName))
  case htmlcheck.ErrInvAttributeValue:
    err = errors.Join(err, fmt.Errorf("inv attr val: '%s'", v.AttributeValue))
  case htmlcheck.ErrInvClosedBeforeOpened:
    err = errors.Join(err, fmt.Errorf("closed before opened: '%s'", v.TagName))
  case htmlcheck.ErrInvDuplicatedAttribute:
    err = errors.Join(err, fmt.Errorf("dup attr: '%s'", v.AttributeName))
  case htmlcheck.ErrInvTag:
    err = errors.Join(err, fmt.Errorf("inv tag: '%s'", v.TagName))
  case htmlcheck.ErrInvNotProperlyClosed:
    err = errors.Join(err, fmt.Errorf("not properly closed: '%s'", v.TagName))
  case htmlcheck.ErrInvEOF:
    err = errors.Join(err, fmt.Errorf("EOF"))
  default:
    err = errors.Join(err, fmt.Errorf("Validation error: '%s'", e.Error()))
  }
}
Register Callback
v.RegisterCallback(func(tagName string, attributeName string, value string, reason ErrorReason) error {
	if reason == InvTag || reason == InvAttribute {
		return fmt.Errorf("validation error: tag '%s', attr: %s", tagName, attributeName)
	}
	return nil
})
Validator Functions
AddValidTag
validator := htmlcheck.Validator{}
validator.AddValidTag(ValidTag{
	Name:          "b",
	IsSelfClosing: false,
})
AddValidTags
validator := htmlcheck.Validator{}
validator.AddValidTags([]*htmlcheck.ValidTag{
	{ Name: "div" },
	{ Name: "p" },
})
AddGroup / AddGroups

You can group attributes to use in valid tags by group name

validator := htmlcheck.Validator{}
// consider it should only accept http/https urls in some attributes in this example
httpRegex := &htmlcheck.AttributeValue{Regex: "^(http(s|))://.*"}
validator.AddGroup(&htmlcheck.TagGroup{
	Name:  "valid-links",
	Attrs: []htmlcheck.Attribute{
		{Name: "href", Value: httpRegex},
		{Name: "src", Value: httpRegex},
	},
})
validator.AddValidTag(htmlcheck.ValidTag{ Name: "a", Groups: []string{"valid-links"} })
validator.AddValidTag(htmlcheck.ValidTag{ Name: "img", Groups: []string{"valid-links"} })
Types
ValidTag
Field Type Description
Name string Name of tag such as div, a, p, span, etc.
Attrs []Attribute Valid Attribute list for the tag
AttrRegex string Attributes that match the regex are valid
AttrStartsWith string Attributes that starts with the given input are valid
Groups []string Group list
IsSelfClosing bool If true, tag will be valid without closing tag, default: false
Example
validator.AddValidTags([]*htmlcheck.ValidTag{
  { Name: "div", Attrs: []htmlcheck.Attribute{ {Name: "id"} } },
  { Name: "p", AttrStartsWith: "data-" },
  { Name: "a", AttrRegex: "^(data-).+" },
})
Attribute
Field Type Description
Name string Name of attribute such as href, class, style, etc.
Value *AttributeValue Valid values for the attribute
Example
validLink := htmlcheck.ValidTag{
  Name:  "a",
  Attrs: []htmlcheck.Attribute{
    {Name: "id"},
    {Name: "href", Value: &htmlcheck.AttributeValue{Regex: "^(http(s|))://.*"}}, // valid regex for href attribute value
    {Name: "target", Value: &htmlcheck.AttributeValue{List: []string{"_target", "_blank"}}}, // valid values for target attribute value
  },
}
AttributeValue
Field Type Description
List []string List of valid attribute values (for example valid class names)
Regex string Attribute values that match the regex are valid
StartsWith string Attributes that starts with the given input are valid
Example
validLink := htmlcheck.ValidTag{
  Name:  "a",
  Attrs: []htmlcheck.Attribute{
    {Name: "id"},
    {Name: "href", Value: &htmlcheck.AttributeValue{Regex: "^(http(s|))://.*"}}, // valid regex for href attribute value
    {Name: "target", Value: &htmlcheck.AttributeValue{List: []string{"_target", "_blank"}}}, // valid values for target attribute value
  },
}
TagGroup
Field Type Description
Name string Name of group
Attrs []Attribute Valid Attribute list for the group
Example
// consider it should only accept http/https urls in some attributes in this example
httpRegex := &htmlcheck.AttributeValue{Regex: "^(http(s|))://.*"}
validator.AddGroup(&htmlcheck.TagGroup{
  Name:  "valid-links",
  Attrs: []htmlcheck.Attribute{
    {Name: "href", Value: httpRegex}, 
    {Name: "src", Value: httpRegex},
  },
})
Error Types
Type Description
ErrInvTag Tag is not valid
ErrInvClosedBeforeOpened Tag closed before opened e.g: <div></p></div>
ErrInvNotProperlyClosed Tag is opened but not closed e.g: <div><p></div>
ErrInvAttribute An attribute in tag is not valid
ErrInvAttributeValue Value of the attribute is not valid
ErrInvDuplicatedAttribute Duplicate attribute e.g: <a href='..' href='..'></a>
ErrInvEOF This error occurs when parsing is done. It will not be added in the output error list

Contributing

Anyone can contribute by opening issue or pull-request

License

Distributed under the GPL License. See LICENSE for more information.

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Attribute

type Attribute struct {
	Name  string
	Value *AttributeValue
}

type AttributeValue

type AttributeValue struct {
	List       []string
	Regex      string
	StartsWith string
}

type ErrInvAttribute

type ErrInvAttribute struct{ ErrorDetails }

func (ErrInvAttribute) Error

func (e ErrInvAttribute) Error() string

type ErrInvAttributeValue

type ErrInvAttributeValue struct{ ErrorDetails }

func (ErrInvAttributeValue) Error

func (e ErrInvAttributeValue) Error() string

type ErrInvClosedBeforeOpened

type ErrInvClosedBeforeOpened struct{ ErrorDetails }

func (ErrInvClosedBeforeOpened) Error

func (e ErrInvClosedBeforeOpened) Error() string

type ErrInvDuplicatedAttribute

type ErrInvDuplicatedAttribute struct{ ErrorDetails }

func (ErrInvDuplicatedAttribute) Error

type ErrInvEOF

type ErrInvEOF struct{ ErrorDetails }

func (ErrInvEOF) Error

func (e ErrInvEOF) Error() string

type ErrInvNotProperlyClosed

type ErrInvNotProperlyClosed struct{ ErrorDetails }

func (ErrInvNotProperlyClosed) Error

func (e ErrInvNotProperlyClosed) Error() string

type ErrInvTag

type ErrInvTag struct{ ErrorDetails }

func (ErrInvTag) Error

func (e ErrInvTag) Error() string

type ErrorCallback

type ErrorCallback func(tagName string, attributeName string, value string, reason ErrorReason) error

type ErrorDetails

type ErrorDetails struct {
	TagName        string
	AttributeName  string
	AttributeValue string
	Reason         ErrorReason
}

func (ErrorDetails) Details

func (d ErrorDetails) Details() ErrorDetails

type ErrorReason

type ErrorReason int
const (
	InvTag                 ErrorReason = 0
	InvAttribute           ErrorReason = 1
	InvClosedBeforeOpened  ErrorReason = 2
	InvNotProperlyClosed   ErrorReason = 3
	InvDuplicatedAttribute ErrorReason = 4
	InvEOF                 ErrorReason = 5
	InvAttributeValue      ErrorReason = 6
)

type TagGroup

type TagGroup struct {
	Name  string
	Attrs []Attribute
}

type TagsFile

type TagsFile struct {
	Groups []*TagGroup
	Tags   []*ValidTag
}

type ValidTag

type ValidTag struct {
	Name           string
	Attrs          []Attribute
	AttrRegex      string
	AttrStartsWith string
	Groups         []string
	IsSelfClosing  bool
}

func (*ValidTag) HasGroup

func (tag *ValidTag) HasGroup(groupName string) bool

type ValidationError

type ValidationError interface {
	Error() string
	Details() ErrorDetails
}

type ValidationErrorList

type ValidationErrorList []error

func (ValidationErrorList) Join

func (el ValidationErrorList) Join() error

type Validator

type Validator struct {
	StopAfterFirstError bool
	// contains filtered or unexported fields
}

func (*Validator) AddGroup

func (v *Validator) AddGroup(group *TagGroup)

func (*Validator) AddGroups

func (v *Validator) AddGroups(groups []*TagGroup)

func (*Validator) AddValidTag

func (v *Validator) AddValidTag(validTag ValidTag)

func (*Validator) AddValidTags

func (v *Validator) AddValidTags(validTags []*ValidTag)

func (*Validator) IsValidSelfClosingTag

func (v *Validator) IsValidSelfClosingTag(tagName string) bool

func (*Validator) IsValidTag

func (v *Validator) IsValidTag(tagName string) bool

func (*Validator) LoadTagsFromFile

func (v *Validator) LoadTagsFromFile(path string) error

func (*Validator) RegisterCallback

func (v *Validator) RegisterCallback(f ErrorCallback)

func (*Validator) ValidateHtml

func (v *Validator) ValidateHtml(r io.Reader) ValidationErrorList

func (*Validator) ValidateHtmlString

func (v *Validator) ValidateHtmlString(str string) ValidationErrorList

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL