pachyderm: github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk Index | Files

package chunk

import "github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk"

Index

Package Files

chunk.pb.go option.go reader.go storage.go util.go writer.go

Constants

const (
    // WindowSize is the size of the rolling hash window.
    WindowSize = 64
)

Variables

var (
    ErrInvalidLengthChunk        = fmt.Errorf("proto: negative length found during unmarshaling")
    ErrIntOverflowChunk          = fmt.Errorf("proto: integer overflow")
    ErrUnexpectedEndOfGroupChunk = fmt.Errorf("proto: unexpected end of group")
)

func BeforeBound Uses

func BeforeBound(str string, strBound ...string) bool

BeforeBound checks if the passed in string is before the string bound (exclusive). The string bound is optional, so if no string bound is passed then it returns true.

func RandSeq Uses

func RandSeq(n int) []byte

RandSeq generates a random sequence of data (n is number of bytes)

func WithLocalStorage Uses

func WithLocalStorage(f func(obj.Client, *Storage) error, opts ...StorageOption) error

WithLocalStorage creates a local storage instance for testing during the lifetime of the callback.

type Annotation Uses

type Annotation struct {
    RefDataRefs []*DataRef
    NextDataRef *DataRef
    Data        interface{}
    // TODO Find a way around needing this field (for deletions).
    Empty bool
    // contains filtered or unexported fields
}

Annotation is used to associate information with data written into the chunk storage layer.

type Chunk Uses

type Chunk struct {
    Hash                 string   `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"`
    XXX_NoUnkeyedLiteral struct{} `json:"-"`
    XXX_unrecognized     []byte   `json:"-"`
    XXX_sizecache        int32    `json:"-"`
}

func (*Chunk) Descriptor Uses

func (*Chunk) Descriptor() ([]byte, []int)

func (*Chunk) GetHash Uses

func (m *Chunk) GetHash() string

func (*Chunk) Marshal Uses

func (m *Chunk) Marshal() (dAtA []byte, err error)

func (*Chunk) MarshalTo Uses

func (m *Chunk) MarshalTo(dAtA []byte) (int, error)

func (*Chunk) MarshalToSizedBuffer Uses

func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*Chunk) ProtoMessage Uses

func (*Chunk) ProtoMessage()

func (*Chunk) Reset Uses

func (m *Chunk) Reset()

func (*Chunk) Size Uses

func (m *Chunk) Size() (n int)

func (*Chunk) String Uses

func (m *Chunk) String() string

func (*Chunk) Unmarshal Uses

func (m *Chunk) Unmarshal(dAtA []byte) error

func (*Chunk) XXX_DiscardUnknown Uses

func (m *Chunk) XXX_DiscardUnknown()

func (*Chunk) XXX_Marshal Uses

func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Chunk) XXX_Merge Uses

func (m *Chunk) XXX_Merge(src proto.Message)

func (*Chunk) XXX_Size Uses

func (m *Chunk) XXX_Size() int

func (*Chunk) XXX_Unmarshal Uses

func (m *Chunk) XXX_Unmarshal(b []byte) error

type ChunkInfo Uses

type ChunkInfo struct {
    Chunk                *Chunk   `protobuf:"bytes,1,opt,name=chunk,proto3" json:"chunk,omitempty"`
    SizeBytes            int64    `protobuf:"varint,2,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"`
    Edge                 bool     `protobuf:"varint,3,opt,name=edge,proto3" json:"edge,omitempty"`
    XXX_NoUnkeyedLiteral struct{} `json:"-"`
    XXX_unrecognized     []byte   `json:"-"`
    XXX_sizecache        int32    `json:"-"`
}

func (*ChunkInfo) Descriptor Uses

func (*ChunkInfo) Descriptor() ([]byte, []int)

func (*ChunkInfo) GetChunk Uses

func (m *ChunkInfo) GetChunk() *Chunk

func (*ChunkInfo) GetEdge Uses

func (m *ChunkInfo) GetEdge() bool

func (*ChunkInfo) GetSizeBytes Uses

func (m *ChunkInfo) GetSizeBytes() int64

func (*ChunkInfo) Marshal Uses

func (m *ChunkInfo) Marshal() (dAtA []byte, err error)

func (*ChunkInfo) MarshalTo Uses

func (m *ChunkInfo) MarshalTo(dAtA []byte) (int, error)

func (*ChunkInfo) MarshalToSizedBuffer Uses

func (m *ChunkInfo) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*ChunkInfo) ProtoMessage Uses

func (*ChunkInfo) ProtoMessage()

func (*ChunkInfo) Reset Uses

func (m *ChunkInfo) Reset()

func (*ChunkInfo) Size Uses

func (m *ChunkInfo) Size() (n int)

func (*ChunkInfo) String Uses

func (m *ChunkInfo) String() string

func (*ChunkInfo) Unmarshal Uses

func (m *ChunkInfo) Unmarshal(dAtA []byte) error

func (*ChunkInfo) XXX_DiscardUnknown Uses

func (m *ChunkInfo) XXX_DiscardUnknown()

func (*ChunkInfo) XXX_Marshal Uses

func (m *ChunkInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*ChunkInfo) XXX_Merge Uses

func (m *ChunkInfo) XXX_Merge(src proto.Message)

func (*ChunkInfo) XXX_Size Uses

func (m *ChunkInfo) XXX_Size() int

func (*ChunkInfo) XXX_Unmarshal Uses

func (m *ChunkInfo) XXX_Unmarshal(b []byte) error

type DataReader Uses

type DataReader struct {
    // contains filtered or unexported fields
}

DataReader is an abstraction that lazily reads data referenced by a data reference. The seed is set to avoid re-downloading a chunk that is shared between this data reference and the prior in a chain of data references.

func (*DataReader) BoundReader Uses

func (dr *DataReader) BoundReader(tagUpperBound ...string) *DataReader

BoundReader creates a new data reader that reads a subset of the remaining tags in the current data reader. This tag subset is determined by the optional parameter tagUpperBound which specifies the upper bound (exclusive) of the new data reader. BoundReader will progress the current data reader past the tags in the tag subset. Data in the tag subset is fetched lazily by the new data reader.

func (*DataReader) DataRef Uses

func (dr *DataReader) DataRef() *DataRef

DataRef returns the data reference associated with this data reader.

func (*DataReader) Get Uses

func (dr *DataReader) Get(w io.Writer) error

Get writes the referenced data. This does not take into account the reading of tags.

func (*DataReader) Iterate Uses

func (dr *DataReader) Iterate(f func(*Tag, io.Reader) error, tagUpperBound ...string) error

Iterate iterates over the tags in the data reference and passes the tag and a reader for getting the tagged data to the callback function. tagUpperBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.

func (*DataReader) Len Uses

func (dr *DataReader) Len() int64

Len returns the length of the remaining data to be read.

func (*DataReader) PeekTag Uses

func (dr *DataReader) PeekTag() (*Tag, error)

PeekTag returns the next tag without progressing the reader.

func (*DataReader) PeekTags Uses

func (dr *DataReader) PeekTags() ([]*Tag, error)

PeekTags returns the tags left in the reader without progressing the reader.

type DataRef Uses

type DataRef struct {
    // The chunk the referenced data is located in.
    ChunkInfo *ChunkInfo `protobuf:"bytes,1,opt,name=chunk_info,json=chunkInfo,proto3" json:"chunk_info,omitempty"`
    // The hash of the data being referenced.
    // This field is empty when it is equal to the chunk hash (the ref is the whole chunk).
    Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
    // The offset and size used for accessing the data within the chunk.
    OffsetBytes          int64    `protobuf:"varint,3,opt,name=offset_bytes,json=offsetBytes,proto3" json:"offset_bytes,omitempty"`
    SizeBytes            int64    `protobuf:"varint,4,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"`
    Tags                 []*Tag   `protobuf:"bytes,5,rep,name=tags,proto3" json:"tags,omitempty"`
    XXX_NoUnkeyedLiteral struct{} `json:"-"`
    XXX_unrecognized     []byte   `json:"-"`
    XXX_sizecache        int32    `json:"-"`
}

DataRef is a reference to data within a chunk.

func Reference Uses

func Reference(dataRef *DataRef, tag string) *DataRef

Reference creates a data reference for the full chunk referenced by a data reference.

func (*DataRef) Descriptor Uses

func (*DataRef) Descriptor() ([]byte, []int)

func (*DataRef) GetChunkInfo Uses

func (m *DataRef) GetChunkInfo() *ChunkInfo

func (*DataRef) GetHash Uses

func (m *DataRef) GetHash() string

func (*DataRef) GetOffsetBytes Uses

func (m *DataRef) GetOffsetBytes() int64

func (*DataRef) GetSizeBytes Uses

func (m *DataRef) GetSizeBytes() int64

func (*DataRef) GetTags Uses

func (m *DataRef) GetTags() []*Tag

func (*DataRef) Marshal Uses

func (m *DataRef) Marshal() (dAtA []byte, err error)

func (*DataRef) MarshalTo Uses

func (m *DataRef) MarshalTo(dAtA []byte) (int, error)

func (*DataRef) MarshalToSizedBuffer Uses

func (m *DataRef) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*DataRef) ProtoMessage Uses

func (*DataRef) ProtoMessage()

func (*DataRef) Reset Uses

func (m *DataRef) Reset()

func (*DataRef) Size Uses

func (m *DataRef) Size() (n int)

func (*DataRef) String Uses

func (m *DataRef) String() string

func (*DataRef) Unmarshal Uses

func (m *DataRef) Unmarshal(dAtA []byte) error

func (*DataRef) XXX_DiscardUnknown Uses

func (m *DataRef) XXX_DiscardUnknown()

func (*DataRef) XXX_Marshal Uses

func (m *DataRef) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*DataRef) XXX_Merge Uses

func (m *DataRef) XXX_Merge(src proto.Message)

func (*DataRef) XXX_Size Uses

func (m *DataRef) XXX_Size() int

func (*DataRef) XXX_Unmarshal Uses

func (m *DataRef) XXX_Unmarshal(b []byte) error

type Reader Uses

type Reader struct {
    // contains filtered or unexported fields
}

Reader reads data from chunk storage.

func (*Reader) Get Uses

func (r *Reader) Get(w io.Writer) error

Get writes the concatenation of the data represented by the data references set in the reader.

func (*Reader) Iterate Uses

func (r *Reader) Iterate(f func(*DataReader) error, tagUpperBound ...string) error

Iterate iterates over the data readers for the current data references set in the reader. tagUpperBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.

func (*Reader) Next Uses

func (r *Reader) Next() (*DataReader, error)

Next returns the next data reader and progresses the reader.

func (*Reader) NextDataRefs Uses

func (r *Reader) NextDataRefs(dataRefs []*DataRef)

NextDataRefs sets the next data references for the reader.

func (*Reader) NextTagReader Uses

func (r *Reader) NextTagReader() *TagReader

NextTagReader sets up a tag reader for the next tagged data.

func (*Reader) Peek Uses

func (r *Reader) Peek() (*DataReader, error)

Peek returns the next data reader without progressing the reader.

func (*Reader) PeekTag Uses

func (r *Reader) PeekTag() (*Tag, error)

PeekTag returns the next tag for the next data reader without progressing the reader.

type Storage Uses

type Storage struct {
    // contains filtered or unexported fields
}

Storage is the abstraction that manages chunk storage.

func NewStorage Uses

func NewStorage(objClient obj.Client, opts ...StorageOption) *Storage

NewStorage creates a new Storage.

func (*Storage) CreateSemanticReference Uses

func (s *Storage) CreateSemanticReference(ctx context.Context, name string, chunk *Chunk) error

CreateSemanticReference creates a semantic reference to a chunk.

func (*Storage) Delete Uses

func (s *Storage) Delete(ctx context.Context, hash string) error

Delete deletes a chunk in object storage.

func (*Storage) DeleteAll Uses

func (s *Storage) DeleteAll(ctx context.Context) error

DeleteAll deletes all of the chunks in object storage.

func (*Storage) DeleteSemanticReference Uses

func (s *Storage) DeleteSemanticReference(ctx context.Context, name string) error

DeleteSemanticReference deletes a semantic reference.

func (*Storage) List Uses

func (s *Storage) List(ctx context.Context, f func(string) error) error

List lists all of the chunks in object storage.

func (*Storage) NewReader Uses

func (s *Storage) NewReader(ctx context.Context, dataRefs ...*DataRef) *Reader

NewReader creates a new Reader.

func (*Storage) NewWriter Uses

func (s *Storage) NewWriter(ctx context.Context, tmpID string, f WriterFunc, opts ...WriterOption) *Writer

NewWriter creates a new Writer for a stream of bytes to be chunked. Chunks are created based on the content, then hashed and deduplicated/uploaded to object storage.

type StorageOption Uses

type StorageOption func(s *Storage)

StorageOption configures a storage.

func ServiceEnvToOptions Uses

func ServiceEnvToOptions(env *serviceenv.ServiceEnv) (options []StorageOption)

ServiceEnvToOptions converts a service environment configuration (specifically the storage configuration) to a set of storage options.

func WithGarbageCollection Uses

func WithGarbageCollection(gcClient gc.Client) StorageOption

WithGarbageCollection sets the garbage collection client for the storage. The storage will use a mock client otherwise.

func WithMaxConcurrentObjects Uses

func WithMaxConcurrentObjects(maxDownload, maxUpload int) StorageOption

WithMaxConcurrentObjects sets the maximum number of object writers (upload) and readers (download) that can be open at a time.

type Tag Uses

type Tag struct {
    Id                   string   `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
    SizeBytes            int64    `protobuf:"varint,2,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"`
    XXX_NoUnkeyedLiteral struct{} `json:"-"`
    XXX_unrecognized     []byte   `json:"-"`
    XXX_sizecache        int32    `json:"-"`
}

func (*Tag) Descriptor Uses

func (*Tag) Descriptor() ([]byte, []int)

func (*Tag) GetId Uses

func (m *Tag) GetId() string

func (*Tag) GetSizeBytes Uses

func (m *Tag) GetSizeBytes() int64

func (*Tag) Marshal Uses

func (m *Tag) Marshal() (dAtA []byte, err error)

func (*Tag) MarshalTo Uses

func (m *Tag) MarshalTo(dAtA []byte) (int, error)

func (*Tag) MarshalToSizedBuffer Uses

func (m *Tag) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*Tag) ProtoMessage Uses

func (*Tag) ProtoMessage()

func (*Tag) Reset Uses

func (m *Tag) Reset()

func (*Tag) Size Uses

func (m *Tag) Size() (n int)

func (*Tag) String Uses

func (m *Tag) String() string

func (*Tag) Unmarshal Uses

func (m *Tag) Unmarshal(dAtA []byte) error

func (*Tag) XXX_DiscardUnknown Uses

func (m *Tag) XXX_DiscardUnknown()

func (*Tag) XXX_Marshal Uses

func (m *Tag) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Tag) XXX_Merge Uses

func (m *Tag) XXX_Merge(src proto.Message)

func (*Tag) XXX_Size Uses

func (m *Tag) XXX_Size() int

func (*Tag) XXX_Unmarshal Uses

func (m *Tag) XXX_Unmarshal(b []byte) error

type TagReader Uses

type TagReader struct {
    // contains filtered or unexported fields
}

TagReader is an abstraction for reading tagged data. A tag may span multiple data readers, so this abstraction will connect the data readers and bound them appropriately to retrieve the content for a specific tag.

func (*TagReader) Get Uses

func (tr *TagReader) Get(w io.Writer) error

Get writes the tagged data.

func (*TagReader) Iterate Uses

func (tr *TagReader) Iterate(f func(*DataReader) error) error

Iterate iterates over the data readers for the tagged data.

type Writer Uses

type Writer struct {
    // contains filtered or unexported fields
}

Writer splits a byte stream into content defined chunks that are hashed and deduplicated/uploaded to object storage. Chunk split points are determined by a bit pattern in a rolling hash function (buzhash64 at https://github.com/chmduquesne/rollinghash).

func (*Writer) Annotate Uses

func (w *Writer) Annotate(a *Annotation)

Annotate associates an annotation with the current data. TODO Maybe add some validation for calling Annotate / Tag function in incorrect order.

func (*Writer) AnnotationCount Uses

func (w *Writer) AnnotationCount() int64

AnnotationCount returns a count of the number of annotations created/referenced by the writer.

func (*Writer) ChunkCount Uses

func (w *Writer) ChunkCount() int64

ChunkCount returns a count of the number of chunks created/referenced by the writer.

func (*Writer) Close Uses

func (w *Writer) Close() error

Close closes the writer.

func (*Writer) Copy Uses

func (w *Writer) Copy(dr *DataReader) error

Copy copies data from a data reader to the writer. The copy will either be by reading the referenced data, or just copying the data reference (cheap copy).

func (*Writer) Tag Uses

func (w *Writer) Tag(id string)

Tag starts a tag in the current annotation with the passed in id.

func (*Writer) Write Uses

func (w *Writer) Write(data []byte) (int, error)

type WriterFunc Uses

type WriterFunc func([]*Annotation) error

WriterFunc is a callback that returns the annotations within a chunk.

type WriterOption Uses

type WriterOption func(w *Writer)

WriterOption configures a chunk writer.

func WithMinMax Uses

func WithMinMax(min, max int) WriterOption

WithMinMax sets the minimum and maximum chunk size.

func WithNoUpload Uses

func WithNoUpload() WriterOption

WithNoUpload sets the writer to no upload (will not upload chunks).

func WithRollingHashConfig Uses

func WithRollingHashConfig(averageBits int, seed int64) WriterOption

WithRollingHashConfig sets up the rolling hash with the passed in configuration.

Package chunk imports 20 packages (graph) and is imported by 4 packages. Updated 2020-08-05. Refresh now. Tools for package owners.