scraper

package module

v0.0.0-...-62de8c0 Latest Latest Go to latest Published: Mar 16, 2022 License: MIT Imports: 14 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/arriqaaq/scraper

Links

Open Source Insights

README ¶

Scraper

Scraper is a simple library to scrape and monitor multiple urls and provide prometheus metrics.

Usecase

package main

import (
	"fmt"
	"log"
	"net/http"
	"net/url"
	"os"
	"strings"
	"time"

	"github.com/arriqaaq/scraper"
	"github.com/gorilla/mux"
)

var (

	// server settings
	port         = 8080
	readTimeout  = 5 * time.Second
	writeTimeout = 5 * time.Second
	// targets to scrape
	targets []*scraper.Target
	// scraper settings
	storeSize = 10
)

// healthzHandler for reporting health
func healthzHandler(w http.ResponseWriter, r *http.Request) {

	switch r.Method {
	case http.MethodGet:
		w.WriteHeader(http.StatusOK)
		w.Write([]byte("OK"))
		break

	default:
		w.WriteHeader(http.StatusMethodNotAllowed)
	}
}

func parseURLs(urls []string) []*scraper.Target {
	var targets []*scraper.Target

	for _, u := range urls {
		u, err := url.ParseRequestURI(u)
		if err != nil {
			continue
		}
		targets = append(targets, scraper.NewTarget(u))
	}
	return targets
}

func main() {

	//Read URLs to monitor from environment variable
	var urls = []string{"http://google.com", "http://cloudflare.com", "http://reddit.com"}

	targets = parseURLs(urls)

	fmt.Printf("URLs to monitor: %+v\n", targets)

	scrapePool, err := scraper.NewScrapePool(
		&scraper.ScrapeConfig{
			ScrapeInterval: time.Duration(3 * time.Second),
			ScrapeTimeout:  time.Duration(2 * time.Second),
			StoreSize:      storeSize,
		},
	)
	if err != nil {
		panic(err)
	}

	scraper.RegisterExporter(scrapePool)

	// start scraping the targets
	scrapePool.Start(targets)

	// create Router
	router := mux.NewRouter()

	// register handlers
	router.Handle("/metrics", scraper.PrometheusHandler())
	router.HandleFunc("/healthz", healthzHandler)

	// configure the HTTP server and start it
	s := &http.Server{
		Addr:           fmt.Sprintf(":%d", port),
		ReadTimeout:    readTimeout,
		WriteTimeout:   writeTimeout,
		MaxHeaderBytes: http.DefaultMaxHeaderBytes,
		Handler:        router,
	}

	log.Fatal(s.ListenAndServe())
}

Metrics Screenshots

Dashboard All Stats Prometheus Graph

Documentation ¶

Index ¶

func PrometheusHandler() http.Handler
func RegisterExporter(e *ScrapePool)
type Exporter
- func NewExporter(options Metrics, chSize int) *Exporter
- func (e *Exporter) Collect(ch chan<- prometheus.Metric)
- func (e *Exporter) Describe(ch chan<- *prometheus.Desc)
type Metrics
- func NewMetrics() Metrics
type ScrapeConfig
type ScrapePool
- func NewScrapePool(cfg *ScrapeConfig) (*ScrapePool, error)
- func (sp *ScrapePool) Start(targets []*Target)
- func (sp *ScrapePool) Stop()
type Storage
- func (t *Storage) Add(url *url.URL, health TargetHealth, duration time.Duration) error
- func (t *Storage) Commit() []TargetResponse
type Store
- func NewStorage(chSize int) Store
type Target
- func NewTarget(url *url.URL) *Target
- func (t *Target) URL() *url.URL
type TargetHealth
type TargetResponse

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func PrometheusHandler ¶

func PrometheusHandler() http.Handler

PrometheusHandler prometheus metrics handler

func RegisterExporter ¶

func RegisterExporter(e *ScrapePool)

RegisterExporter registers the exporter with prometheus

Types ¶

type Exporter ¶

type Exporter struct {
	// contains filtered or unexported fields
}

Exporter exports stats in prometheus format

func NewExporter ¶

func NewExporter(options Metrics, chSize int) *Exporter

NewExporter creates a new exporter

func (*Exporter) Collect ¶

func (e *Exporter) Collect(ch chan<- prometheus.Metric)

Collect collects data to be consumed by prometheus

func (*Exporter) Describe ¶

func (e *Exporter) Describe(ch chan<- *prometheus.Desc)

Describe describe the metrics for prometheus

type Metrics ¶

type Metrics struct {
	TargetURLStatus       *prometheus.GaugeVec
	TargetURLResponseTime *prometheus.HistogramVec
}

Metrics is a collection of the url metrics

func NewMetrics ¶

func NewMetrics() Metrics

NewMetrics builds a new metric options

type ScrapeConfig ¶

type ScrapeConfig struct {
	// How frequently to scrape the targets of this scrape config.
	ScrapeInterval time.Duration
	// The timeout for scraping targets of this config.
	ScrapeTimeout time.Duration
	// The channel size for the storage.
	StoreSize int
	// Jitter seed
	JitterSeed uint64
}

ScrapeConfig describes the config for the scraper pool.

type ScrapePool ¶

type ScrapePool struct {
	*Exporter
	// contains filtered or unexported fields
}

ScrapePool manages scrapes for sets of targets.

func NewScrapePool ¶

func NewScrapePool(
	cfg *ScrapeConfig,
) (*ScrapePool, error)

func (*ScrapePool) Start ¶

func (sp *ScrapePool) Start(targets []*Target)

Start starts scrape loops for new targets.

func (*ScrapePool) Stop ¶

func (sp *ScrapePool) Stop()

Stop terminates all scrape loops and returns after they all terminated.

type Storage ¶

type Storage struct {
	// contains filtered or unexported fields
}

Storage represents all the remote read and write endpoints

func (*Storage) Add ¶

func (t *Storage) Add(url *url.URL, health TargetHealth, duration time.Duration) error

Add implements Store.

func (*Storage) Commit ¶

func (t *Storage) Commit() []TargetResponse

Commit implements Store.

type Store ¶

type Store interface {
	// Add adds a target response for the given target.
	Add(url *url.URL, health TargetHealth, duration time.Duration) error
	// Commit commits the entries and clears the store. This should be called when all the entries are committed/reported.
	Commit() []TargetResponse
}

Store provides appends against a storage.

func NewStorage ¶

func NewStorage(chSize int) Store

NewStorage creates a storage for storing target responses.

type Target ¶

type Target struct {
	// contains filtered or unexported fields
}

Target refers to a singular HTTP or HTTPS endpoint.

func NewTarget ¶

func NewTarget(url *url.URL) *Target

NewTarget creates a target for querying.

func (*Target) URL ¶

func (t *Target) URL() *url.URL

URL returns the target's URL.

type TargetHealth ¶

type TargetHealth float64

TargetHealth describes the health state of a target.

const (
	HealthUnknown TargetHealth = -1
	HealthGood    TargetHealth = 1
	HealthBad     TargetHealth = 0
)

The possible health states of a target based on the last performed scrape.

type TargetResponse ¶

type TargetResponse struct {
	URL          *url.URL      `json:"url"`
	Status       TargetHealth  `json:"status"`
	ResponseTime time.Duration `json:"response_time"`
}

TargetResponse refers to the query response from the target

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL