scraper

package module
v0.0.0-...-62de8c0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 16, 2022 License: MIT Imports: 14 Imported by: 0

README

Scraper

Scraper is a simple library to scrape and monitor multiple urls and provide prometheus metrics.

Usecase
package main

import (
	"fmt"
	"log"
	"net/http"
	"net/url"
	"os"
	"strings"
	"time"

	"github.com/arriqaaq/scraper"
	"github.com/gorilla/mux"
)

var (

	// server settings
	port         = 8080
	readTimeout  = 5 * time.Second
	writeTimeout = 5 * time.Second
	// targets to scrape
	targets []*scraper.Target
	// scraper settings
	storeSize = 10
)

// healthzHandler for reporting health
func healthzHandler(w http.ResponseWriter, r *http.Request) {

	switch r.Method {
	case http.MethodGet:
		w.WriteHeader(http.StatusOK)
		w.Write([]byte("OK"))
		break

	default:
		w.WriteHeader(http.StatusMethodNotAllowed)
	}
}

func parseURLs(urls []string) []*scraper.Target {
	var targets []*scraper.Target

	for _, u := range urls {
		u, err := url.ParseRequestURI(u)
		if err != nil {
			continue
		}
		targets = append(targets, scraper.NewTarget(u))
	}
	return targets
}

func main() {

	//Read URLs to monitor from environment variable
	var urls = []string{"http://google.com", "http://cloudflare.com", "http://reddit.com"}

	targets = parseURLs(urls)

	fmt.Printf("URLs to monitor: %+v\n", targets)

	scrapePool, err := scraper.NewScrapePool(
		&scraper.ScrapeConfig{
			ScrapeInterval: time.Duration(3 * time.Second),
			ScrapeTimeout:  time.Duration(2 * time.Second),
			StoreSize:      storeSize,
		},
	)
	if err != nil {
		panic(err)
	}

	scraper.RegisterExporter(scrapePool)

	// start scraping the targets
	scrapePool.Start(targets)

	// create Router
	router := mux.NewRouter()

	// register handlers
	router.Handle("/metrics", scraper.PrometheusHandler())
	router.HandleFunc("/healthz", healthzHandler)

	// configure the HTTP server and start it
	s := &http.Server{
		Addr:           fmt.Sprintf(":%d", port),
		ReadTimeout:    readTimeout,
		WriteTimeout:   writeTimeout,
		MaxHeaderBytes: http.DefaultMaxHeaderBytes,
		Handler:        router,
	}

	log.Fatal(s.ListenAndServe())
}

Metrics Screenshots

Dashboard All Stats Prometheus Graph

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func PrometheusHandler

func PrometheusHandler() http.Handler

PrometheusHandler prometheus metrics handler

func RegisterExporter

func RegisterExporter(e *ScrapePool)

RegisterExporter registers the exporter with prometheus

Types

type Exporter

type Exporter struct {
	// contains filtered or unexported fields
}

Exporter exports stats in prometheus format

func NewExporter

func NewExporter(options Metrics, chSize int) *Exporter

NewExporter creates a new exporter

func (*Exporter) Collect

func (e *Exporter) Collect(ch chan<- prometheus.Metric)

Collect collects data to be consumed by prometheus

func (*Exporter) Describe

func (e *Exporter) Describe(ch chan<- *prometheus.Desc)

Describe describe the metrics for prometheus

type Metrics

type Metrics struct {
	TargetURLStatus       *prometheus.GaugeVec
	TargetURLResponseTime *prometheus.HistogramVec
}

Metrics is a collection of the url metrics

func NewMetrics

func NewMetrics() Metrics

NewMetrics builds a new metric options

type ScrapeConfig

type ScrapeConfig struct {
	// How frequently to scrape the targets of this scrape config.
	ScrapeInterval time.Duration
	// The timeout for scraping targets of this config.
	ScrapeTimeout time.Duration
	// The channel size for the storage.
	StoreSize int
	// Jitter seed
	JitterSeed uint64
}

ScrapeConfig describes the config for the scraper pool.

type ScrapePool

type ScrapePool struct {
	*Exporter
	// contains filtered or unexported fields
}

ScrapePool manages scrapes for sets of targets.

func NewScrapePool

func NewScrapePool(
	cfg *ScrapeConfig,
) (*ScrapePool, error)

func (*ScrapePool) Start

func (sp *ScrapePool) Start(targets []*Target)

Start starts scrape loops for new targets.

func (*ScrapePool) Stop

func (sp *ScrapePool) Stop()

Stop terminates all scrape loops and returns after they all terminated.

type Storage

type Storage struct {
	// contains filtered or unexported fields
}

Storage represents all the remote read and write endpoints

func (*Storage) Add

func (t *Storage) Add(url *url.URL, health TargetHealth, duration time.Duration) error

Add implements Store.

func (*Storage) Commit

func (t *Storage) Commit() []TargetResponse

Commit implements Store.

type Store

type Store interface {
	// Add adds a target response for the given target.
	Add(url *url.URL, health TargetHealth, duration time.Duration) error
	// Commit commits the entries and clears the store. This should be called when all the entries are committed/reported.
	Commit() []TargetResponse
}

Store provides appends against a storage.

func NewStorage

func NewStorage(chSize int) Store

NewStorage creates a storage for storing target responses.

type Target

type Target struct {
	// contains filtered or unexported fields
}

Target refers to a singular HTTP or HTTPS endpoint.

func NewTarget

func NewTarget(url *url.URL) *Target

NewTarget creates a target for querying.

func (*Target) URL

func (t *Target) URL() *url.URL

URL returns the target's URL.

type TargetHealth

type TargetHealth float64

TargetHealth describes the health state of a target.

const (
	HealthUnknown TargetHealth = -1
	HealthGood    TargetHealth = 1
	HealthBad     TargetHealth = 0
)

The possible health states of a target based on the last performed scrape.

type TargetResponse

type TargetResponse struct {
	URL          *url.URL      `json:"url"`
	Status       TargetHealth  `json:"status"`
	ResponseTime time.Duration `json:"response_time"`
}

TargetResponse refers to the query response from the target

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL