pachyderm: github.com/pachyderm/pachyderm/src/server/pkg/storage/fileset Index | Files | Directories

package fileset

import "github.com/pachyderm/pachyderm/src/server/pkg/storage/fileset"

Index

Package Files

api.go fileset.go index_resolver.go merge.go option.go priority_queue.go reader.go storage.go transforms.go util.go writer.go

Constants

const (

    // DefaultMemoryThreshold is the default for the memory threshold that must
    // be met before a file set part is serialized (excluding close).
    DefaultMemoryThreshold = 1024 * units.MB
    // DefaultShardThreshold is the default for the size threshold that must
    // be met before a shard is created by the shard function.
    DefaultShardThreshold = 1024 * units.MB
    // DefaultLevelZeroSize is the default size for level zero in the compacted
    // representation of a file set.
    DefaultLevelZeroSize = 1 * units.MB
    // DefaultLevelSizeBase is the default base of the exponential growth function
    // for level sizes in the compacted representation of a file set.
    DefaultLevelSizeBase = 10
    // Diff is the suffix of a path that points to the diff of the prefix.
    Diff = "diff"
    // Compacted is the suffix of a path that points to the compaction of the prefix.
    Compacted = "compacted"
)

func CopyFiles Uses

func CopyFiles(w *Writer, s FileSource) error

CopyFiles iterates over s and writes all the Files to w

func SubFileSetStr Uses

func SubFileSetStr(subFileSet int64) string

SubFileSetStr returns the string representation of a subfileset.

func WithLocalStorage Uses

func WithLocalStorage(f func(*Storage) error) error

WithLocalStorage constructs a local storage instance for testing during the lifetime of the callback.

func WriteTarEntry Uses

func WriteTarEntry(w io.Writer, f File) error

WriteTarEntry writes an tar entry for f to w

func WriteTarStream Uses

func WriteTarStream(ctx context.Context, w io.Writer, fs FileSource) error

WriteTarStream writes an entire tar stream to w It will contain an entry for each File in fs

type CompactSpec Uses

type CompactSpec struct {
    Output string
    Input  []string
}

CompactSpec specifies the input and output for a compaction operation.

type CompactStats Uses

type CompactStats struct {
    OutputSize int64
}

CompactStats contains information about what was compacted.

type File Uses

type File interface {
    // Index returns the index for the file
    Index() *index.Index
    // Header returns a *tar.Header with metadata about the file, or an error
    Header() (*tar.Header, error)
    // Content writes the content of the file to w
    Content(w io.Writer) error
}

File represents a file in a fileset

type FileMergeReader Uses

type FileMergeReader struct {
    // contains filtered or unexported fields
}

FileMergeReader is an abstraction for reading a merged file.

func (*FileMergeReader) Content Uses

func (fmr *FileMergeReader) Content(w io.Writer) error

Content writes the content of the current file excluding the header to w

func (*FileMergeReader) Get Uses

func (fmr *FileMergeReader) Get(w io.Writer) error

Get writes the merged file. TODO It might be cleaner to check if w is of type *Writer then use WriteTo rather than Get.

func (*FileMergeReader) Header Uses

func (fmr *FileMergeReader) Header() (*tar.Header, error)

Header returns the tar header for the merged file.

func (*FileMergeReader) Index Uses

func (fmr *FileMergeReader) Index() *index.Index

Index returns the index for the merged file.

func (*FileMergeReader) TagSetMergeReader Uses

func (fmr *FileMergeReader) TagSetMergeReader() (*TagSetMergeReader, error)

TagSetMergeReader returns the tagset merge reader for the file. This is how you would get just the data in the file (excludes the tar header and padding).

func (*FileMergeReader) WriteTo Uses

func (fmr *FileMergeReader) WriteTo(w *Writer) error

WriteTo writes the merged file to the passed in fileset writer.

type FileReader Uses

type FileReader struct {
    // contains filtered or unexported fields
}

FileReader is an abstraction for reading a file.

func (*FileReader) Content Uses

func (fr *FileReader) Content(w io.Writer) error

Content writes the contents of the file excluding the header to w.

func (*FileReader) Get Uses

func (fr *FileReader) Get(w io.Writer) error

Get writes the file.

func (*FileReader) Header Uses

func (fr *FileReader) Header() (*tar.Header, error)

Header returns the tar header for the file.

func (*FileReader) Index Uses

func (fr *FileReader) Index() *index.Index

Index returns the index for the file.

func (*FileReader) Iterate Uses

func (fr *FileReader) Iterate(f func(*chunk.DataReader) error, tagUpperBound ...string) error

Iterate iterates over the data readers for the data in the file. tagUpperBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.

func (*FileReader) NextTagReader Uses

func (fr *FileReader) NextTagReader() *chunk.TagReader

NextTagReader returns a tag reader for the next tagged data in the file.

func (*FileReader) PeekTag Uses

func (fr *FileReader) PeekTag() (*chunk.Tag, error)

PeekTag returns the next tag in the file without progressing the reader.

type FileSet Uses

type FileSet struct {
    // contains filtered or unexported fields
}

FileSet is a set of files. This may be a full filesystem or a subfilesystem (e.g. datum / datum set / shard).

func (*FileSet) Close Uses

func (f *FileSet) Close() error

Close closes the file set.

func (*FileSet) Delete Uses

func (f *FileSet) Delete(name string, customTag ...string)

Delete deletes a file from the file set.

func (*FileSet) Put Uses

func (f *FileSet) Put(r io.Reader, customTag ...string) error

Put reads files from a tar stream and adds them to the fileset.

type FileSource Uses

type FileSource interface {
    // Iterate calls cb for each File in the FileSource in lexigraphical order.
    Iterate(ctx context.Context, cb func(File) error, stopBefore ...string) error
}

FileSource is a source of Files.

func NewHeaderFilter Uses

func NewHeaderFilter(x FileSource, pred func(th *tar.Header) bool) FileSource

NewHeaderFilter filters x using pred

func NewHeaderMapper Uses

func NewHeaderMapper(x FileSource, pred func(*tar.Header) *tar.Header) FileSource

NewHeaderMapper filters x using pred

func NewIndexFilter Uses

func NewIndexFilter(x FileSource, pred func(idx *index.Index) bool) FileSource

NewIndexFilter filters x using pred

func NewIndexResolver Uses

func NewIndexResolver(x FileSource) FileSource

NewIndexResolver ensures the indexes in the FileSource are correct based on the content

type MergeReader Uses

type MergeReader struct {
    // contains filtered or unexported fields
}

MergeReader merges a file's content that shows up across multiple fileset streams. A file's content is ordered based on the lexicographical order of the tagged content, so the output file content is produced by performing a merge of the tagged content.

func (*MergeReader) Get Uses

func (mr *MergeReader) Get(w io.Writer) error

Get writes the merged fileset.

func (*MergeReader) Next Uses

func (mr *MergeReader) Next() (*FileMergeReader, error)

Next gets the next file merge reader in the merge reader.

func (*MergeReader) WriteTo Uses

func (mr *MergeReader) WriteTo(w *Writer) error

WriteTo writes the merged fileset to the passed in fileset writer.

type Option Uses

type Option func(f *FileSet)

Option configures a file set.

func WithRoot Uses

func WithRoot(root string) Option

WithRoot sets the root path of the file set.

type Reader Uses

type Reader struct {
    // contains filtered or unexported fields
}

Reader reads the serialized format of a fileset.

func (*Reader) Get Uses

func (r *Reader) Get(w io.Writer) error

Get writes the fileset.

func (*Reader) Iterate Uses

func (r *Reader) Iterate(ctx context.Context, f func(File) error, pathBound ...string) error

Iterate iterates over the file readers in the fileset. pathBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.

func (*Reader) Next Uses

func (r *Reader) Next() (*FileReader, error)

Next returns the next file reader and progresses the reader.

func (*Reader) Peek Uses

func (r *Reader) Peek() (*index.Index, error)

Peek returns the next file index without progressing the reader.

type ShardFunc Uses

type ShardFunc func(*index.PathRange) error

ShardFunc is a callback that returns a PathRange for each shard.

type Storage Uses

type Storage struct {
    // contains filtered or unexported fields
}

Storage is the abstraction that manages fileset storage.

func NewStorage Uses

func NewStorage(objC obj.Client, chunks *chunk.Storage, opts ...StorageOption) *Storage

NewStorage creates a new Storage.

func (*Storage) ChunkStorage Uses

func (s *Storage) ChunkStorage() *chunk.Storage

ChunkStorage returns the underlying chunk storage instance for this storage instance.

func (*Storage) Compact Uses

func (s *Storage) Compact(ctx context.Context, outputFileSet string, inputFileSets []string, opts ...index.Option) (*CompactStats, error)

Compact compacts a set of filesets into an output fileset.

func (*Storage) CompactSpec Uses

func (s *Storage) CompactSpec(ctx context.Context, fileSet string, compactedFileSet ...string) (*CompactSpec, error)

CompactSpec returns a compaction specification that determines the input filesets (the diff file set and potentially compacted filesets) and output fileset.

func (*Storage) Delete Uses

func (s *Storage) Delete(ctx context.Context, fileSet string) error

Delete deletes a fileset.

func (*Storage) New Uses

func (s *Storage) New(ctx context.Context, fileSet, defaultTag string, opts ...Option) (*FileSet, error)

New creates a new in-memory fileset.

func (*Storage) NewMergeReader Uses

func (s *Storage) NewMergeReader(ctx context.Context, fileSets []string, opts ...index.Option) (*MergeReader, error)

NewMergeReader returns a merge reader for a set for filesets.

func (*Storage) NewReader Uses

func (s *Storage) NewReader(ctx context.Context, fileSet string, opts ...index.Option) *Reader

NewReader makes a Reader backed by the path `fileSet` in object storage.

func (*Storage) NewSource Uses

func (s *Storage) NewSource(ctx context.Context, fileSet string, opts ...index.Option) FileSource

NewSource makes a source which will iterate over the prefix fileSet

func (*Storage) NewWriter Uses

func (s *Storage) NewWriter(ctx context.Context, fileSet string, opts ...WriterOption) *Writer

NewWriter makes a Writer backed by the path `fileSet` in object storage.

func (*Storage) ResolveIndexes Uses

func (s *Storage) ResolveIndexes(ctx context.Context, fileSets []string, cb func(*index.Index) error, opts ...index.Option) error

ResolveIndexes resolves index entries that are spread across multiple filesets. DEPRECATED: Use NewIndexResolver

func (*Storage) Shard Uses

func (s *Storage) Shard(ctx context.Context, fileSets []string, shardFunc ShardFunc) error

Shard shards the merge of the file sets with the passed in prefix into file ranges. TODO This should be extended to be more configurable (different criteria for creating shards).

func (*Storage) WalkFileSet Uses

func (s *Storage) WalkFileSet(ctx context.Context, prefix string, f func(string) error) error

WalkFileSet calls f with the path of every primitive fileSet under prefix.

type StorageOption Uses

type StorageOption func(s *Storage)

StorageOption configures a storage.

func ServiceEnvToOptions Uses

func ServiceEnvToOptions(env *serviceenv.ServiceEnv) []StorageOption

ServiceEnvToOptions converts a service environment configuration (specifically the storage configuration) to a set of storage options.

func WithLevelSizeBase Uses

func WithLevelSizeBase(base int) StorageOption

WithLevelSizeBase sets the base of the exponential growth function for level sizes in the compacted representation of a file set.

func WithLevelZeroSize Uses

func WithLevelZeroSize(size int64) StorageOption

WithLevelZeroSize sets the size for level zero in the compacted representation of a file set.

func WithMaxOpenFileSets Uses

func WithMaxOpenFileSets(max int) StorageOption

WithMaxOpenFileSets sets the maximum number of filesets that will be open (potentially buffered in memory) at a time.

func WithMemoryThreshold Uses

func WithMemoryThreshold(threshold int64) StorageOption

WithMemoryThreshold sets the memory threshold that must be met before a file set part is serialized (excluding close).

func WithShardThreshold Uses

func WithShardThreshold(threshold int64) StorageOption

WithShardThreshold sets the size threshold that must be met before a shard is created by the shard function.

type TagMergeReader Uses

type TagMergeReader struct {
    // contains filtered or unexported fields
}

TagMergeReader is an abstraction for reading a merged tag. This abstraction is necessary because a tag in a file can appear across multiple filesets.

func (*TagMergeReader) Get Uses

func (tmr *TagMergeReader) Get(w io.Writer) error

Get writes the merged tagged data.

func (*TagMergeReader) Iterate Uses

func (tmr *TagMergeReader) Iterate(f func(*chunk.DataReader) error) error

Iterate iterates over the data readers for the tagged data being merged.

func (*TagMergeReader) WriteTo Uses

func (tmr *TagMergeReader) WriteTo(w *Writer) error

WriteTo writes the merged tagged data to the passed in fileset writer.

type TagSetMergeReader Uses

type TagSetMergeReader struct {
    // contains filtered or unexported fields
}

TagSetMergeReader is an abstraction for reading the merged tagged data in a merged file.

func (*TagSetMergeReader) Get Uses

func (tsmr *TagSetMergeReader) Get(w io.Writer) error

Get writes the merged tagset.

func (*TagSetMergeReader) Iterate Uses

func (tsmr *TagSetMergeReader) Iterate(f func(*TagMergeReader) error) error

Iterate iterates over the tag merge readers in the merged tagset.

func (*TagSetMergeReader) WriteTo Uses

func (tsmr *TagSetMergeReader) WriteTo(w *Writer) error

WriteTo writes the merged tagset to the passed in fileset writer.

type Writer Uses

type Writer struct {
    // contains filtered or unexported fields
}

Writer writes the serialized format of a fileset. The serialized format of a fileset consists of indexes and content.

func (*Writer) Close Uses

func (w *Writer) Close() error

Close closes the writer.

func (*Writer) CopyFile Uses

func (w *Writer) CopyFile(fr *FileReader) error

CopyFile copies a file (header and tags included).

func (*Writer) CopyTags Uses

func (w *Writer) CopyTags(dr *chunk.DataReader) error

CopyTags copies the tagged data from the passed in data reader.

func (*Writer) DeleteFile Uses

func (w *Writer) DeleteFile(name string, tags ...string) error

DeleteFile deletes a file. The optional tag field indicates specific tags in the files to delete.

func (*Writer) DeleteTag Uses

func (w *Writer) DeleteTag(id string)

DeleteTag deletes a tag in the current file.

func (*Writer) Tag Uses

func (w *Writer) Tag(id string)

Tag starts a tag for the next set of bytes (used for the reverse index, mapping file output to datums).

func (*Writer) Write Uses

func (w *Writer) Write(data []byte) (int, error)

Write writes to the current file in the tar stream.

func (*Writer) WriteHeader Uses

func (w *Writer) WriteHeader(hdr *tar.Header) error

WriteHeader writes a tar header and prepares to accept the file's contents.

type WriterOption Uses

type WriterOption func(w *Writer)

WriterOption configures a file set writer.

func WithIndexCallback Uses

func WithIndexCallback(cb func(*index.Index) error) WriterOption

WithIndexCallback sets a function to be called after each index is written. If WithNoUpload is set, the function is called after the index would have been written.

func WithNoUpload Uses

func WithNoUpload() WriterOption

WithNoUpload sets the writer to no upload (will not upload chunks).

Directories

PathSynopsis
index
tarPackage tar implements access to tar archives.

Package fileset imports 19 packages (graph) and is imported by 2 packages. Updated 2020-08-09. Refresh now. Tools for package owners.