dockerhub

package
v0.0.0-...-b175f30 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 23, 2022 License: Apache-2.0 Imports: 17 Imported by: 0

README

Dockerhub Datasource

Dockerhub datasource is a package to fetch data from dockerhub API and save it into Elasticsearch and Enrich saved data.

Docker Running instructions

To run dockerhub datasource from dads you must set proper environment variables to select dockerhub as an engine and other parameters that determine the intended behavior.

These are the needed environment variables to run dockerhub:

  • DA_DOCKERHUB_ENRICH={1,0}
    • To decide whether will do enrichment step or not.
  • DA_DOCKERHUB_ES_URL=http://{ES_USERNAME}:{ES_PASSWORD}@{URL}:{PORT}
    • Elasticsearch url included username, password, host and port
  • DA_DOCKERHUB_NO_INCREMENTAL={1,0}
    • Starts from the beginning if 1 is selected and will not use date to continue enriching
  • DA_DOCKERHUB_USERNAME=''
    • Optional, for dockerhub repository credentials
  • DA_DOCKERHUB_PASSWORD=''
    • Optional, for dockerhub repository credentials
  • DA_DOCKERHUB_PROJECT_SLUG='{SLUG}'
    • Slug name e.g. yocto
  • DA_DOCKERHUB_REPOSITORIES_JSON='[{"Owner":'{OWNER}',"Repository":"{REPOSITORY}","Project":"{PROJECT}","ESIndex":"{INDEX_NAME}"}]'
    • JSON e.g. '[{"Owner":"crops","Repository":"yocto-eol","Project":"yocto","ESIndex":"sds-yocto-dockerhub"}]'
  • DA_DS='{DATASOURCE}'
    • Datasource name should be 'dockerhub'
  • DA_DOCKERHUB_HTTP_TIMEOUT=60s
    • HTTP timeout duration.

Example of running dads at ./scripts/dockerhub.sh

Documentation

Index

Constants

View Source
const (
	// APIURL dockerhub base url
	APIURL = "https://hub.docker.com"
	// APIVersion dockerhub API version
	APIVersion = "v2"
	// APILogin url
	APILogin = "users/login"
	// APIRepositories dockerhub repositories API
	APIRepositories = "repositories"
	// Category ...
	Category = "dockerhub-data"
	// Dockerhub - DS name
	Dockerhub string = "dockerhub"
)

Variables

View Source
var (
	// DockerhubSearchFields - extra search fields
	DockerhubSearchFields = map[string][]string{
		"name":      {"name"},
		"namespace": {"namespace"},
	}

	// DockerhubRawMapping - Dockerhub raw index mapping
	DockerhubRawMapping = []byte(`{"mappings": {"dynamic":true,"properties":{"metadata__updated_on":{"type":"date"},"data":{"properties":{"description":{"type":"text","index":true},"full_description":{"type":"text","index":true}}}}}}`)

	// DockerhubRichMapping - Dockerhub rich index mapping
	DockerhubRichMapping = []byte(`{"mappings": {"properties":{"metadata__updated_on":{"type":"date"},"description":{"type":"text","index":true},"description_analyzed":{"type":"text","index":true},"full_description_analyzed":{"type":"text","index":true},"origin":{"type":"keyword"},"repository_type":{"type":"keyword"},"tag":{"type":"keyword"},"id":{"type":"keyword"},"metadata__backend_name":{"type":"keyword"},"user":{"type":"keyword"},"uuid":{"type":"keyword"},"project":{"type":"keyword"},"meta_title":{"type":"keyword"},"meta_type":{"type":"keyword"},"meta_state":{"type":"keyword"},"meta_program":{"type":"keyword"},"status":{"type":"keyword"}}}}`)
)

Functions

This section is empty.

Types

type Aggregations

type Aggregations struct {
	LastDate LastDate `json:"last_date"`
}

Aggregations result

type Auth0Client

type Auth0Client interface {
	GetToken() (string, error)
}

Auth0Client ...

type ESClientProvider

type ESClientProvider interface {
	Add(index string, documentID string, body []byte) ([]byte, error)
	CreateIndex(index string, body []byte) ([]byte, error)
	Bulk(body []byte) ([]byte, error)
	Get(index string, query map[string]interface{}, result interface{}) (err error)
	GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (result time.Time, err error)
	BulkInsert(data []elastic.BulkData) ([]byte, error)
	DelayOfCreateIndex(ex func(str string, b []byte) ([]byte, error), uin uint, du time.Duration, index string, data []byte) error
	BulkUpdate(data []elastic.BulkData) ([]byte, error)
}

ESClientProvider used in connecting to ES Client server

type Enricher

type Enricher struct {
	DSName                string // Datasource will be used as key for ES
	ElasticSearchProvider ESClientProvider
	BackendVersion        string
}

Enricher contains dockerhub datasource enrich logic

func NewEnricher

func NewEnricher(backendVersion string, esClientProvider ESClientProvider) *Enricher

NewEnricher initiates a new Enricher

func (*Enricher) EnrichItem

func (e *Enricher) EnrichItem(rawItem RepositoryRaw, project string, now time.Time) (*RepositoryEnrich, error)

EnrichItem enriches raw item

func (*Enricher) GetFetchedDataItem

func (e *Enricher) GetFetchedDataItem(repo *Repository, cmdLastDate *time.Time, lastDate *time.Time, noIncremental bool) (result *TopHits, err error)

GetFetchedDataItem gets fetched data items starting from lastDate

func (*Enricher) HandleMapping

func (e *Enricher) HandleMapping(index string) error

HandleMapping creates rich mapping

type Fetcher

type Fetcher struct {
	DSName                string // Datasource will be used as key for ES
	IncludeArchived       bool
	MultiOrigin           bool // can we store multiple endpoints in a single index?
	HTTPClientProvider    HTTPClientProvider
	ElasticSearchProvider ESClientProvider
	Username              string
	Password              string
	Token                 string
	BackendVersion        string
}

Fetcher contains dockerhub datasource fetch logic

func NewFetcher

func NewFetcher(params *Params, httpClientProvider HTTPClientProvider, esClientProvider ESClientProvider) *Fetcher

NewFetcher initiates a new dockerhub fetcher

func (*Fetcher) FetchItem

func (f *Fetcher) FetchItem(owner string, repository string, now time.Time) (*RepositoryRaw, error)

FetchItem pulls image data

func (*Fetcher) GetLastDate

func (f *Fetcher) GetLastDate(repo *Repository, now time.Time) (time.Time, error)

GetLastDate gets fetching lastDate

func (*Fetcher) Login

func (f *Fetcher) Login(username string, password string) (string, error)

Login dockerhub in order to obtain access token for fetching private repositories

type HTTPClientProvider

type HTTPClientProvider interface {
	Request(url string, method string, header map[string]string, body []byte, params map[string]string) (statusCode int, resBody []byte, err error)
}

HTTPClientProvider used in connecting to remote http server

type Hits

type Hits struct {
	Total    Total        `json:"total"`
	MaxScore float32      `json:"max_score"`
	Hits     []NestedHits `json:"hits"`
}

Hits result

type LastDate

type LastDate struct {
	Value         float64 `json:"value"`
	ValueAsString string  `json:"value_as_string"`
}

LastDate result

type LoginResponse

type LoginResponse struct {
	Token string `json:"token"`
}

LoginResponse from login dockerhub web API

type Manager

type Manager struct {
	Username               string
	Password               string
	FetcherBackendVersion  string
	EnricherBackendVersion string
	EnrichOnly             bool
	Enrich                 bool
	ESUrl                  string
	ESUsername             string
	ESPassword             string
	HTTPTimeout            time.Duration
	Repositories           []*Repository
	FromDate               *time.Time
	NoIncremental          bool

	AffAPI           string
	ProjectSlug      string
	AffBaseURL       string
	ESCacheURL       string
	ESCacheUsername  string
	ESCachePassword  string
	AuthGrantType    string
	AuthClientID     string
	AuthClientSecret string
	AuthAudience     string
	Auth0URL         string
	Environment      string
	Slug             string

	Retries    uint
	Delay      time.Duration
	GapURL     string
	WebHookURL string
}

Manager describes dockerhub manager

func NewManager

func NewManager(param Param) *Manager

NewManager initiates dockerhub manager instance

func (*Manager) Sync

func (m *Manager) Sync() error

Sync runs dockerhub fetch and enrich according to passed parameters

type NestedHits

type NestedHits struct {
	Index  string         `json:"_index"`
	Type   string         `json:"_type"`
	ID     string         `json:"_id"`
	Score  float64        `json:"_score"`
	Source *RepositoryRaw `json:"_source"`
}

NestedHits result

type Param

type Param struct {
	Username               string
	Password               string
	EndPoint               string
	FetcherBackendVersion  string
	EnricherBackendVersion string
	Fetch                  bool
	Enrich                 bool
	ESUrl                  string
	EsUser                 string
	EsPassword             string
	EsIndex                string
	FromDate               *time.Time
	Project                string
	Retries                uint
	Delay                  time.Duration
	GapURL                 string
	AffAPI                 string
	ProjectSlug            string
	AffBaseURL             string
	ESCacheURL             string
	ESCacheUsername        string
	ESCachePassword        string
	AuthGrantType          string
	AuthClientID           string
	AuthClientSecret       string
	AuthAudience           string
	Auth0URL               string
	Environment            string
	Slug                   string
	EnrichOnly             bool
	HTTPTimeout            time.Duration
	Repositories           []*Repository
	NoIncremental          bool
	SlackWebHookURL        string
}

Param required for creating a new instance of Bugzilla manager

type Params

type Params struct {
	Username       string
	Password       string
	BackendVersion string
}

Params required parameters for dockerhub fetcher

type Permissions

type Permissions struct {
	Read  bool `json:"read"`
	Write bool `json:"write"`
	Admin bool `json:"admin"`
}

Permissions response

type Repository

type Repository struct {
	Owner      string
	Repository string
	Project    string
	ESIndex    string
}

Repository represents dockerhub repository data

type RepositoryEnrich

type RepositoryEnrich struct {
	ID             string `json:"id"`
	Project        string `json:"project"`
	Affiliation    string `json:"affiliation"`
	Description    string `json:"description"`
	IsPrivate      bool   `json:"is_private"`
	IsAutomated    bool   `json:"is_automated"`
	PullCount      int    `json:"pull_count"`
	RepositoryType string `json:"repository_type"`
	User           string `json:"user"`
	Status         int    `json:"status"`
	StarCount      int    `json:"star_count"`

	IsEvent                 int    `json:"is_event"`
	IsDockerImage           int    `json:"is_docker_image"`
	DescriptionAnalyzed     string `json:"description_analyzed"`
	FullDescriptionAnalyzed string `json:"full_description_analyzed"`

	CreationDate         time.Time `json:"creation_date"`
	IsDockerhubDockerhub int       `json:"is_dockerhub_dockerhub"`
	RepositoryLabels     *[]string `json:"repository_labels"`
	MetadataFilterRaw    *string   `json:"metadata__filter_raw"`

	LastUpdated        time.Time `json:"last_updated"`
	Offset             *string   `json:"offset"`
	MetadataEnrichedOn time.Time `json:"metadata__enriched_on"`

	BackendVersion      string    `json:"backend_version"`
	Tag                 string    `json:"tag"`
	UUID                string    `json:"uuid"`
	Origin              string    `json:"origin"`
	MetadataUpdatedOn   time.Time `json:"metadata__updated_on"`
	MetadataBackendName string    `json:"metadata__backend_name"`
	MetadataTimestamp   time.Time `json:"metadata__timestamp"`
	BuildOnCloud        *string   `json:"build_on_cloud"`
	ProjectTS           int64     `json:"project_ts"`
}

RepositoryEnrich represents dockerhub repository enriched model

type RepositoryRaw

type RepositoryRaw struct {
	BackendVersion           string                  `json:"backend_version"`
	Data                     *RepositoryResponse     `json:"data"`
	Tag                      string                  `json:"tag"`
	UUID                     string                  `json:"uuid"`
	SearchFields             *RepositorySearchFields `json:"search_fields"`
	Origin                   string                  `json:"origin"`
	UpdatedOn                float64                 `json:"updated_on"`
	MetadataUpdatedOn        time.Time               `json:"metadata__updated_on"`
	BackendName              string                  `json:"backend_name"`
	MetadataTimestamp        time.Time               `json:"metadata__timestamp"`
	Timestamp                float64                 `json:"timestamp"`
	Category                 string                  `json:"category"`
	ClassifiedFieldsFiltered *string                 `json:"classified_fields_filtered"`
}

RepositoryRaw represents dockerhub repository raw model

type RepositoryResponse

type RepositoryResponse struct {
	User            string      `json:"user"`
	Name            string      `json:"name"`
	Namespace       string      `json:"namespace"`
	RepositoryType  string      `json:"repository_type"`
	Status          *int        `json:"status"`
	Description     string      `json:"description"`
	IsPrivate       *bool       `json:"is_private"`
	IsAutomated     bool        `json:"is_automated"`
	CanEdit         bool        `json:"can_edit"`
	StarCount       *int        `json:"star_count"`
	PullCount       *int        `json:"pull_count"`
	LastUpdated     time.Time   `json:"last_updated"`
	IsMigrated      bool        `json:"is_migrated"`
	HasStarred      bool        `json:"has_starred"`
	FullDescription string      `json:"full_description"`
	Affiliation     string      `json:"affiliation"`
	Permissions     Permissions `json:"permissions"`
	FetchedOn       float64     `json:"fetched_on"`
}

RepositoryResponse data model represents dockerhub get repository results

type RepositorySearchFields

type RepositorySearchFields struct {
	Name      string `json:"name"`
	ItemID    string `json:"item_id"`
	Namespace string `json:"namespace"`
}

RepositorySearchFields ...

type TopHits

type TopHits struct {
	Took         int          `json:"took"`
	Hits         Hits         `json:"hits"`
	Aggregations Aggregations `json:"aggregations"`
}

TopHits result

type Total

type Total struct {
	Value    int    `json:"value"`
	Relation string `json:"relation"`
}

Total result

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL