parser

package
v0.0.0-...-e569bb0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2020 License: MIT Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// ErrEmptyURL Error in the case empty URL provided
	ErrEmptyURL = errors.New("empty URL provided")
	// ErrInvalidURL Error in the case invalid URL provided
	ErrInvalidURL = errors.New("invalid URL provided")
	// ErrEmptyURLHost Error in the case empty URL host provided
	ErrEmptyURLHost = errors.New("empty URL host provided")
)
View Source
var GetLinksTestSuite = []GetLinksTestCase{
	GetLinksTestCase{
		Description:   "http://blank.org/",
		URL:           "http://blank.org/",
		ExpectedList:  []string{"http://blank.org/blank.html"},
		ExpectedError: nil,
	},
	GetLinksTestCase{
		Description:   "http://www.blankwebsite.com/",
		URL:           "http://www.blankwebsite.com/",
		ExpectedList:  []string{},
		ExpectedError: nil,
	},
}

GetLinksTestSuite is the test suite cases for GetLinks()

View Source
var ValidateURLTestSuite = []ValidateURLTestCase{
	ValidateURLTestCase{
		Description:   "Validate Valid URL",
		URL:           "http://www.google.com",
		ExpectedError: nil,
	},
	ValidateURLTestCase{
		Description:   "Validate URL With Port",
		URL:           "https://www.google.com:443",
		ExpectedError: nil,
	},
	ValidateURLTestCase{
		Description:   "Validate Empty URL",
		URL:           "",
		ExpectedError: ErrEmptyURL,
	},
	ValidateURLTestCase{
		Description:   "Validate URL With Only Path",
		URL:           "/testing-path",
		ExpectedError: ErrEmptyURLHost,
	},
	ValidateURLTestCase{
		Description:   "Validate Empty Host URL",
		URL:           "https://",
		ExpectedError: ErrEmptyURLHost,
	},
	ValidateURLTestCase{
		Description:   "Invalid Case",
		URL:           "alskjff#?asf//dfas",
		ExpectedError: ErrInvalidURL,
	},
	ValidateURLTestCase{
		Description:   "Invalid Case",
		URL:           "https",
		ExpectedError: ErrInvalidURL,
	},
	ValidateURLTestCase{
		Description:   "Invalid Case",
		URL:           "google",
		ExpectedError: ErrInvalidURL,
	},
	ValidateURLTestCase{
		Description:   "Invalid Case",
		URL:           "google.com",
		ExpectedError: ErrInvalidURL,
	},
	ValidateURLTestCase{
		Description:   "Invalid Case",
		URL:           "testing-path",
		ExpectedError: ErrInvalidURL,
	},
}

ValidateURLTestSuite is the test suite cases for ValidateURL()

Functions

func FilterHyperlinks(URL string, links []string) []string

FilterHyperlinks filters out links that lead away from the original domain URL

func GetHyperlinks(BaseURL string, response *http.Response) []string

GetHyperlinks process a given URL and the request to the UR to obtain all the links in it. It does so by tokenizing the result <html> body and identifies hyperlinks in it

func GetLinkHelper

func GetLinkHelper(tokenAttribute []html.Attribute) string

GetLinkHelper iterates over the token attributes to search for the link type token

func GetLinks(URL string) ([]string, error)

GetLinks sends an http GET request to the given URL to obtain its resultant html body & then parse it

func InsertLinks(URL string, links []string) error

InsertLinks persists our obtained links to the Db

func Parse

func Parse(URL string) ([]string, error)

Parse is the main function that takes in a given URL and obtains all the urls present in it

func PublishLinks(links []string) error

PublishLinks messages to RabbitMQ

func ValidateURL

func ValidateURL(URL string) error

ValidateURL checks if the URL is semantically valid

Types

type GetLinksTestCase

type GetLinksTestCase struct {
	Description   string
	URL           string
	ExpectedList  []string
	ExpectedError error
}

GetLinksTestCase is the test case struct definition for GetLinks()

type ValidateURLTestCase

type ValidateURLTestCase struct {
	Description   string
	URL           string
	ExpectedError error
}

ValidateURLTestCase is the test case struct definition for ValidateURL()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL