netspy

package module
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 3, 2023 License: Apache-2.0 Imports: 7 Imported by: 0

README

spynet v0.1.0

Changelog

  • Init

Libraries

Examples

package main

import (
	"fmt"
	"net/http"
	
	"github.com/CURVoid/netspy"
)

func handler(r *http.Response) string {
	return r.Request.URL.String()
}

func main() {
	start := "https://books.toscrape.com/index.html"
	hosts := []string{"books.toscrape.com"}
	rules := []netspy.Rule{
		netspy.NewRule([]string{"category"}, []string{}),
		netspy.NewRule([]string{"catalogue"}, []string{"category"}),
	}

	crawler := spynet.NewCrawler[string](start, hosts, rules, handler)

	outputs, err := crawler.Crawl()
	if err != nil {
		panic(err)
	}

	fmt.Printf("passed - %d\n", len(outputs))
	for _, output := range outputs {
		println(output)
	}
}

Roadmap

  • add proxies support

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Crawler

type Crawler[T any] struct {
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler[T any](startURL string, allowed_hosts []string, rules []Rule, handler func(*http.Response) T) Crawler[T]

func (*Crawler[T]) Crawl

func (crawler *Crawler[T]) Crawl() ([]T, error)

type Rule

type Rule struct {
	// contains filtered or unexported fields
}

func NewRule

func NewRule(allow []string, deny []string) Rule

func (*Rule) Pass

func (rule *Rule) Pass(parsedURL *url.URL) bool

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL