mimo

package
v0.8.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 4, 2023 License: GPL-3.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

View Source
const (
	Number = "number"
	Bool   = "bool"
	String = "string"
	Nil    = "nil"
)

Variables

View Source
var (
	// ErrReadingDataRow is returned when an error occurs while reading a data row.
	ErrReadingDataRow = errors.New("error while reading datarow")

	// ErrOrphanRow is returned when a original row does not have a masked version, or the other way around.
	ErrOrphanRow = errors.New("error datarow is orphan")

	// ErrKeyNotFound is returned when key is not in multimap.
	ErrKeyNotFound = errors.New("key not found in multimap")

	// ErrDisparityType is returned when real and masked value have different types.
	ErrDisparityType = errors.New("can't analyze value because type has changed")

	// ErrDisparityStruct is returned when real and masked value have different structure.
	ErrDisparityStruct = errors.New("can't analyze row because structure has changed")

	// ErrAnalyze is returned when a fatal error occurs during analyze.
	ErrAnalyze = errors.New("fatal error")
)

Functions

This section is empty.

Types

type ColumnConfig added in v0.2.0

type ColumnConfig struct {
	Exclude           []any        // exclude values from the masking rate (default: exclude only nil values)
	ExcludeTemplate   *Template    // exclude values if template expression evaluate to True (default: False)
	CoherentWith      []string     // list of fields use for coherent rate computation (default: the current field)
	CoherentSource    *Template    // template to execute to create coherence source
	Constraints       []Constraint // list of constraints to validate
	Alias             string       // alias to use in persisted data
	IgnoreDisparities bool
	// contains filtered or unexported fields
}

func NewDefaultColumnConfig added in v0.2.0

func NewDefaultColumnConfig() ColumnConfig

type Config added in v0.2.0

type Config struct {
	ColumnNames       []string
	ColumnConfigs     map[string]ColumnConfig
	PreprocessConfigs []PreprocessConfig
	IgnoreDisparities bool
}

func NewConfig added in v0.2.0

func NewConfig() Config

type Constraint added in v0.2.0

type Constraint struct {
	Target ConstraintTarget
	Type   ConstraintType
	Value  float64
}

type ConstraintTarget added in v0.2.0

type ConstraintTarget int
const (
	MaskingRate ConstraintTarget = iota
	CoherentRate
	IdentifiantRate
)

type ConstraintType added in v0.2.0

type ConstraintType int
const (
	ShouldEqual ConstraintType = iota
	ShouldBeGreaterThan
	ShouldBeGreaterThanOrEqualTo
	ShouldBeLowerThan
	ShouldBeLessThanOrEqualTo
)

type CounterBackend added in v0.5.0

type CounterBackend interface {
	IncreaseTotalCount()
	GetTotalCount() int64

	IncreaseNilCount()
	GetNilCount() int64

	IncreaseIgnoredCount()
	GetIgnoredCount() int64

	IncreaseMaskedCount()
	GetMaskedCount() int64

	Close() error
}

type CounterFactory added in v0.5.0

type CounterFactory func(fieldname string) CounterBackend

type DataRow

type DataRow map[string]any

type DataRowReader

type DataRowReader interface {
	ReadDataRow() (DataRow, error)
	Close() error
}

type Driver

type Driver struct {
	// contains filtered or unexported fields
}

func NewDriver

func NewDriver(
	realReader DataRowReader,
	maskedReader DataRowReader,
	multimapFactory MultimapFactory,
	counterFactory CounterFactory,
	subs ...EventSubscriber,
) Driver

func (*Driver) Analyze

func (d *Driver) Analyze() (r *Report, err error)

func (Driver) Close added in v0.3.0

func (d Driver) Close() error

func (*Driver) Configure added in v0.2.0

func (d *Driver) Configure(c Config)

type EventSubscriber

type EventSubscriber interface {
	NewField(fieldname string)
	FirstNonMaskedValue(fieldname string, value any)
	NonMaskedValue(fieldname string, value any)
	IncoherentValue(fieldname string, value any, pseudonym any)
	InconsistentPseudonym(fieldname string, value any, pseudonym any)
}

type InMemoryCounterBackend added in v0.5.0

type InMemoryCounterBackend struct {
	TotalCount   int64 // TotalCount is the number of values analyzed
	NilCount     int64 // NilCount is the number of null values in real data
	IgnoredCount int64 // IgnoredCount is the number of ignored values in real data
	MaskedCount  int64 // MaskedCount is the number of non-blank real values masked
}

func (*InMemoryCounterBackend) Close added in v0.5.0

func (b *InMemoryCounterBackend) Close() error

func (*InMemoryCounterBackend) GetIgnoredCount added in v0.5.0

func (b *InMemoryCounterBackend) GetIgnoredCount() int64

func (*InMemoryCounterBackend) GetMaskedCount added in v0.5.0

func (b *InMemoryCounterBackend) GetMaskedCount() int64

func (*InMemoryCounterBackend) GetNilCount added in v0.5.0

func (b *InMemoryCounterBackend) GetNilCount() int64

func (*InMemoryCounterBackend) GetTotalCount added in v0.5.0

func (b *InMemoryCounterBackend) GetTotalCount() int64

func (*InMemoryCounterBackend) IncreaseIgnoredCount added in v0.5.0

func (b *InMemoryCounterBackend) IncreaseIgnoredCount()

func (*InMemoryCounterBackend) IncreaseMaskedCount added in v0.5.0

func (b *InMemoryCounterBackend) IncreaseMaskedCount()

func (*InMemoryCounterBackend) IncreaseNilCount added in v0.5.0

func (b *InMemoryCounterBackend) IncreaseNilCount()

func (*InMemoryCounterBackend) IncreaseTotalCount added in v0.5.0

func (b *InMemoryCounterBackend) IncreaseTotalCount()

type InMemoryIterator added in v0.3.0

type InMemoryIterator struct {
	// contains filtered or unexported fields
}

func (*InMemoryIterator) Close added in v0.3.0

func (i *InMemoryIterator) Close() error

func (*InMemoryIterator) First added in v0.3.0

func (i *InMemoryIterator) First() bool

func (*InMemoryIterator) Next added in v0.3.0

func (i *InMemoryIterator) Next() bool

func (*InMemoryIterator) Valid added in v0.3.0

func (i *InMemoryIterator) Valid() bool

func (*InMemoryIterator) Value added in v0.3.0

func (i *InMemoryIterator) Value() int

type InMemoryMultimapBackend added in v0.3.0

type InMemoryMultimapBackend map[string]map[string]int

func (InMemoryMultimapBackend) Close added in v0.3.0

func (m InMemoryMultimapBackend) Close() error

Close the backend.

func (InMemoryMultimapBackend) GetKey added in v0.3.0

func (m InMemoryMultimapBackend) GetKey(key string) (map[string]int, error)

func (InMemoryMultimapBackend) GetSamplesMono added in v0.5.0

func (m InMemoryMultimapBackend) GetSamplesMono(maxlen int) []Sample

func (InMemoryMultimapBackend) GetSamplesMulti added in v0.5.0

func (m InMemoryMultimapBackend) GetSamplesMulti(maxlen int) []Sample

func (InMemoryMultimapBackend) GetSize added in v0.3.0

func (m InMemoryMultimapBackend) GetSize(key string) int

func (InMemoryMultimapBackend) NewSizeIterator added in v0.3.0

func (m InMemoryMultimapBackend) NewSizeIterator() SizeIterator

CountMin returns the minimum count of values associated to a key across the map.

func (InMemoryMultimapBackend) SetKey added in v0.3.0

func (m InMemoryMultimapBackend) SetKey(key string, value map[string]int) error

type Metrics

type Metrics struct {
	Fieldname   string       // Fieldname is name of column analyzed
	Coherence   Multimap     // Coherence is a multimap used to compute the coherence rate
	Identifiant Multimap     // Identifiant is a multimap used to compute the identifiable rate
	Constraints []Constraint // Constraints is the set of rules to validate
	// contains filtered or unexported fields
}

func NewMetrics

func NewMetrics(
	fieldname string, multimapFactory MultimapFactory, counterFactory CounterFactory, constraints ...Constraint,
) Metrics

func (Metrics) BlankCount

func (m Metrics) BlankCount() int64

BlankCount is the number of blank (null or ignored) values in real data.

func (Metrics) CoherenceRateValidate added in v0.2.0

func (m Metrics) CoherenceRateValidate() int

CoherenceRateValidate returns :

  • -1 if at least one constraint fail on the CoherenceRate,
  • 0 if no constraint exist on the CoherenceRate,
  • 1 if all constraints succeed on the CoherenceRate,

func (Metrics) GetInvalidSamplesForCoherentRate added in v0.5.0

func (m Metrics) GetInvalidSamplesForCoherentRate(maxlen int) []Sample

GetInvalidSamplesForCoherentRate will return at most n invalid sample if a constraint on coherent rate failed.

func (Metrics) GetInvalidSamplesForIdentifiantRate added in v0.5.0

func (m Metrics) GetInvalidSamplesForIdentifiantRate(maxlen int) []Sample

GetInvalidSamplesForIdentifiantRate will return at most n invalid sample if a constraint on identifiant rate failed.

func (Metrics) IdentifiantRateValidate added in v0.2.0

func (m Metrics) IdentifiantRateValidate() int

IdentifiantRateValidate returns :

  • -1 if at least one constraint fail on the IdentifiantRate,
  • 0 if no constraint exist on the IdentifiantRate,
  • 1 if all constraints succeed on the IdentifiantRate,

func (Metrics) IgnoredCount added in v0.4.0

func (m Metrics) IgnoredCount() int64

func (Metrics) K

func (m Metrics) K() int

K is the minimum number of value pseudonym was attributed.

func (Metrics) MaskedCount

func (m Metrics) MaskedCount() int64

func (Metrics) MaskedRate

func (m Metrics) MaskedRate() float64

MaskedRate is equal to

Number of non-blank real values masked
  / (Number of values analyzed - Number of blank (null or ignored) values in real data) ).

func (Metrics) MaskedRateValidate added in v0.2.0

func (m Metrics) MaskedRateValidate() int

MaskedRateValidate returns :

  • -1 if at least one constraint fail on the MaskedRate,
  • 0 if no constraint exist on the MaskedRate,
  • 1 if all constraints succeed on the MaskedRate,

func (Metrics) NilCount

func (m Metrics) NilCount() int64

func (Metrics) NonBlankCount

func (m Metrics) NonBlankCount() int64

NonBlankCount is the number of non-blank (non-null and non-ignored) values in real data.

func (Metrics) NonMaskedCount

func (m Metrics) NonMaskedCount() int64

NonMaskedCount is the number of non-blank (non-null and non-ignored) values in real data that were not masked.

func (*Metrics) Update

func (m *Metrics) Update(
	fieldname string,
	realValue any, maskedValue any, coherenceValue []any,
	subs Suscribers, config ColumnConfig,
) bool

func (Metrics) Validate added in v0.2.0

func (m Metrics) Validate() int

Validate returns :

  • -1 if at least one constraint fail,
  • 0 if no constraint exist,
  • 1 if all constraints succeed ,

type Multimap

type Multimap struct {
	Backend MultimapBackend
}

func (Multimap) Add

func (m Multimap) Add(key string, value string)

Add a key/value pair to the multimap.

func (Multimap) Close added in v0.3.0

func (m Multimap) Close() error

Close the database.

func (Multimap) Count

func (m Multimap) Count(key string) int

Count the number of values associated to key.

func (Multimap) CountMin

func (m Multimap) CountMin() int

CountMin returns the minimum count of values associated to a key across the map.

func (Multimap) Rate

func (m Multimap) Rate() float64

Rate return the percentage of keys that have a count of 1.

type MultimapBackend added in v0.3.0

type MultimapBackend interface {
	Close() error
	GetKey(key string) (map[string]int, error)
	SetKey(key string, value map[string]int) error
	GetSize(key string) int
	NewSizeIterator() SizeIterator
	GetSamplesMono(n int) []Sample
	GetSamplesMulti(n int) []Sample
}

type MultimapFactory added in v0.3.0

type MultimapFactory func(fieldname string) Multimap

type PreprocessConfig added in v0.4.0

type PreprocessConfig struct {
	Path  string
	Value *Template
}

type Report

type Report struct {
	Metrics map[string]Metrics
	// contains filtered or unexported fields
}

func NewReport

func NewReport(
	subs []EventSubscriber, config Config, multiMapFactory MultimapFactory, counterFactory CounterFactory,
) *Report

func (Report) ColumnMetric

func (r Report) ColumnMetric(colname string) Metrics

func (Report) Columns

func (r Report) Columns() []string

func (Report) Update

func (r Report) Update(realRow DataRow, maskedRow DataRow)

func (Report) UpdateArray added in v0.3.0

func (r Report) UpdateArray(root DataRow, realArray []any, maskedArray []any, stack []any, path ...string)

func (Report) UpdateDeep added in v0.3.0

func (r Report) UpdateDeep(root DataRow, realRow DataRow, maskedRow DataRow, stack []any, path ...string)

func (Report) UpdateValue added in v0.3.0

func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, stack []any, path ...string)

type Sample added in v0.5.0

type Sample struct {
	OriginalValue  string
	AssignedValues []string
}

type SizeIterator added in v0.3.0

type SizeIterator interface {
	First() bool
	Next() bool
	Valid() bool
	Value() int
	Close() error
}

type Suscribers

type Suscribers []EventSubscriber

func (Suscribers) PostFirstNonMaskedValue

func (subs Suscribers) PostFirstNonMaskedValue(fieldname string, value any)

func (Suscribers) PostNewField

func (subs Suscribers) PostNewField(fieldname string)

func (Suscribers) PostNonMaskedValue added in v0.6.0

func (subs Suscribers) PostNonMaskedValue(fieldname string, value any)

type Template added in v0.8.0

type Template struct {
	// contains filtered or unexported fields
}

func NewTemplate added in v0.8.0

func NewTemplate(tmplstr string) (*Template, error)

func (*Template) Execute added in v0.8.0

func (t *Template) Execute(root DataRow, stack []any) (string, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL