importutil

package
v0.8.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 26, 2024 License: Apache-2.0 Imports: 24 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ImportExisting

func ImportExisting(db *ImportDb, client configV1.ConfigClient, kind configV1.Kind, keyNormalizer KeyNormalizer) error

func NewHttpClient added in v0.6.0

func NewHttpClient(timeout time.Duration, followRedirects bool) *http.Client

func NewRecordReader

func NewRecordReader(fileOrDir string, decoder RecordDecoder, filePattern string) (l *recordReader, err error)

Types

type DuplicateKindReporter

type DuplicateKindReporter struct {
	*ImportDb
}

func (DuplicateKindReporter) Report

func (d DuplicateKindReporter) Report(w io.Writer) error

type DuplicateReportRecord

type DuplicateReportRecord struct {
	Name    string
	Records []Record
}

type ErrAlreadyExists added in v0.6.0

type ErrAlreadyExists string

func (ErrAlreadyExists) Error added in v0.6.0

func (e ErrAlreadyExists) Error() string

type Executor

type Executor[P Payload] struct {
	Queue chan Job[P]
	// contains filtered or unexported fields
}

Executor is a work queue that executes jobs in concurrent workers.

func NewExecutor

func NewExecutor[P Payload](nrOfWorkers int, do func(P) error, onError func(Job[P])) *Executor[P]

NewExecutor creates a work queue with nrOfWorkers workers.

do is the function that will be called for each job. onError is the function that will be called for each job that fails.

To close the work queue, call Wait() after all jobs have been queued. Writing to the Queue channel after Wait() has been called will panic.

func (*Executor[P]) Wait

func (e *Executor[P]) Wait() (int, int, int)

Wait waits for all jobs to complete. It returns the number of jobs completed, the number of jobs that succeeded and the number of jobs that failed.

type ExistsCode

type ExistsCode int
const (
	Undefined ExistsCode = iota
	NewKey
	NewId
	Exists
)

func (ExistsCode) ExistsInVeidemann

func (e ExistsCode) ExistsInVeidemann() bool

func (ExistsCode) String

func (e ExistsCode) String() string

type ImportDb

type ImportDb struct {
	// contains filtered or unexported fields
}

func NewImportDb

func NewImportDb(dbDir string, truncate bool) (*ImportDb, error)

func (*ImportDb) Close

func (d *ImportDb) Close()

Close closes the database, stops the GC ticker and waits for

func (*ImportDb) Get

func (d *ImportDb) Get(key string) (ids []string, err error)

Get returns the ids for the key

func (*ImportDb) Iterate

func (d *ImportDb) Iterate(fn func([]byte, []byte)) error

Iterate iterates over all keys in the db and calls the function with the key and value. The function is not called in parallel.

func (*ImportDb) RunValueLogGC

func (d *ImportDb) RunValueLogGC(discardRatio float64)

func (*ImportDb) Set

func (d *ImportDb) Set(key string, id string) (code ExistsCode, ids []string, err error)

Set sets the id as a value for the key.

type Job

type Job[P Payload] struct {
	*State
	Val P
}

Job represents a piece of work in the work queue

type JsonYamlDecoder added in v0.6.0

type JsonYamlDecoder struct {
	// contains filtered or unexported fields
}

JsonYamlDecoder is a decoder that reads json or yaml from the input and decodes it into a struct

func (*JsonYamlDecoder) Init added in v0.6.0

func (j *JsonYamlDecoder) Init(r io.Reader, suffix string)

func (*JsonYamlDecoder) Read added in v0.6.0

func (j *JsonYamlDecoder) Read(v interface{}) error

type KeyNormalizer

type KeyNormalizer interface {
	Normalize(key string) (string, error)
}

type LineAsStringDecoder added in v0.6.0

type LineAsStringDecoder struct {
	// contains filtered or unexported fields
}

LineAsStringDecoder is a decoder that reads a line from the input as a string

func (*LineAsStringDecoder) Init added in v0.6.0

func (l *LineAsStringDecoder) Init(r io.Reader, suffix string)

func (*LineAsStringDecoder) Read added in v0.6.0

func (l *LineAsStringDecoder) Read(v interface{}) error

type Payload

type Payload interface {
	any
}

Payload is an interface for the payload of a job in a work queue

type Record

type Record struct {
	Id string
}

type RecordDecoder

type RecordDecoder interface {
	Init(r io.Reader, suffix string)
	Read(v interface{}) (err error)
}

type SeedDesc added in v0.6.0

type SeedDesc struct {
	EntityId          string            `json:"entityId,omitempty" yaml:"entityId,omitempty"`
	EntityName        string            `json:"entityName,omitempty" yaml:"entityName,omitempty"`
	EntityDescription string            `json:"entityDescription,omitempty" yaml:"entityDescription,omitempty"`
	EntityLabel       []*configV1.Label `json:"entityLabel,omitempty" yaml:"entityLabel,omitempty"`
	Uri               string            `json:"uri,omitempty" yaml:"uri,omitempty"`
	SeedDescription   string            `json:"seedDescription,omitempty" yaml:"seedDescription,omitempty"`
	SeedLabel         []*configV1.Label `json:"seedLabel,omitempty" yaml:"seedLabel,omitempty"`

	Description string `json:"description,omitempty" yaml:"description,omitempty"`
	CrawlJobRef []*configV1.ConfigRef
}

func (*SeedDesc) String added in v0.6.0

func (sd *SeedDesc) String() string

func (*SeedDesc) ToEntity added in v0.6.0

func (sd *SeedDesc) ToEntity() *configV1.ConfigObject

func (*SeedDesc) ToSeed added in v0.6.0

func (sd *SeedDesc) ToSeed() *configV1.ConfigObject

type SeedDuplicateReportRecord

type SeedDuplicateReportRecord struct {
	Host  string
	Seeds []SeedRecord
}

type SeedRecord

type SeedRecord struct {
	SeedId            string
	Uri               string
	SeedDescription   string
	EntityId          string
	EntityName        string
	EntityDescription string
}

type SeedReporter

type SeedReporter struct {
	*ImportDb
	Client configV1.ConfigClient
}

func (SeedReporter) Report

func (d SeedReporter) Report(w io.Writer) error

type State

type State struct {
	// contains filtered or unexported fields
}

func (*State) GetError

func (r *State) GetError() error

func (*State) GetFilename

func (r *State) GetFilename() string

func (*State) GetRecordNum

func (r *State) GetRecordNum() int

type UriChecker added in v0.6.0

type UriChecker struct {
	*http.Client
}

UriChecker checks if a uri is reachable

func (*UriChecker) Check added in v0.6.0

func (uc *UriChecker) Check(uri string) (string, error)

Check checks if a uri is reachable and returns the uri if it is reachable If the uri is not reachable, it returns an error If the uri is redirected with 301, it returns the redirected uri

func (*UriChecker) GetTitle added in v0.6.0

func (uc *UriChecker) GetTitle(uri string) string

GetTitle returns the title of the uri

type UriKeyNormalizer added in v0.6.0

type UriKeyNormalizer struct {
	// Toplevel will normalize the uri to the top level domain
	Toplevel bool
	// IgnoreScheme will ignore the scheme when normalizing
	IgnoreScheme bool
}

func (*UriKeyNormalizer) Normalize added in v0.6.0

func (u *UriKeyNormalizer) Normalize(s string) (string, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL