spynet

package module
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 3, 2023 License: Apache-2.0 Imports: 8 Imported by: 0

README

spynet v0.1.0

Changelog

  • Init

Libraries

Examples

package main

import (
	"fmt"
	"net/http"
	
	"github.com/CURVoid/spynet/rule"
	"github.com/CURVoid/spynet"
)

func handler(r *http.Response) string {
	return r.Request.URL.String()
}

func main() {
	start := "https://books.toscrape.com/index.html"
	hosts := []string{"books.toscrape.com"}
	rules := []rule.Rule{F
		rule.New([]string{"category"}, []string{}),
		rule.New([]string{"catalogue"}, []string{"category"}),
	}

	crawler := spynet.New[string](start, hosts, rules, handler)

	outputs, err := crawler.Crawl()
	if err != nil {
		panic(err)
	}

	fmt.Printf("passed - %d\n", len(outputs))
	for _, output := range outputs {
		println(output)
	}
}

Roadmap

  • add proxies support

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Crawler

type Crawler[T any] struct {
	// contains filtered or unexported fields
}

func New

func New[T any](startURL string, allowed_hosts []string, rules []rule.Rule, handler func(*http.Response) T) Crawler[T]

func (*Crawler[T]) Crawl

func (self *Crawler[T]) Crawl() ([]T, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL