ParquetFile

package
v0.0.0-...-ead641c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 23, 2019 License: Apache-2.0 Imports: 9 Imported by: 5

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ConvertToThriftReader

func ConvertToThriftReader(file ParquetFile, offset int64, size int64) *thrift.TBufferedTransport

Convert a file reater to Thrift reader

func GetMemFileFs

func GetMemFileFs() afero.Fs

GetMemFileFs - returns the current memory file-system being used by ParquetFile

func SetInMemFileFs

func SetInMemFileFs(fs *afero.Fs)

SetInMemFileFs - overrides local in-memory fileSystem NOTE: this is set by NewMemFileWriter is created and memFs is still nil

Types

type BufferFile

type BufferFile struct {
	Reader *bytes.Reader
	// contains filtered or unexported fields
}

BufferFile allows reading parquet messages from a memory buffer.

func (BufferFile) Close

func (bf BufferFile) Close() error

Close is a no-op for a memory buffer.

func (BufferFile) Create

func (bf BufferFile) Create(name string) (ParquetFile, error)

func (BufferFile) Open

func (bf BufferFile) Open(name string) (ParquetFile, error)

func (BufferFile) Read

func (bf BufferFile) Read(p []byte) (cnt int, err error)

Read reads data form BufferFile into p.

func (BufferFile) Seek

func (bf BufferFile) Seek(offset int64, pos int) (int64, error)

Seek seeks in the underlying memory buffer.

func (BufferFile) Write

func (bf BufferFile) Write(p []byte) (int, error)

Write writes data from p into BufferFile.

type GcsFile

type GcsFile struct {
	ProjectId  string
	BucketName string
	Ctx        context.Context

	Client     *storage.Client
	Bucket     *storage.BucketHandle
	FilePath   string
	FileReader *storage.Reader
	FileWriter *storage.Writer
}

func (*GcsFile) Close

func (self *GcsFile) Close() error

func (*GcsFile) Create

func (self *GcsFile) Create(name string) (ParquetFile, error)

func (*GcsFile) Open

func (self *GcsFile) Open(name string) (ParquetFile, error)

func (*GcsFile) Read

func (self *GcsFile) Read(b []byte) (cnt int, err error)

func (*GcsFile) Seek

func (self *GcsFile) Seek(offset int64, pos int) (int64, error)

func (*GcsFile) Write

func (self *GcsFile) Write(b []byte) (n int, err error)

type HdfsFile

type HdfsFile struct {
	Hosts []string
	User  string

	Client     *hdfs.Client
	FilePath   string
	FileReader *hdfs.FileReader
	FileWriter *hdfs.FileWriter
}

func (*HdfsFile) Close

func (self *HdfsFile) Close() error

func (*HdfsFile) Create

func (self *HdfsFile) Create(name string) (ParquetFile, error)

func (*HdfsFile) Open

func (self *HdfsFile) Open(name string) (ParquetFile, error)

func (*HdfsFile) Read

func (self *HdfsFile) Read(b []byte) (cnt int, err error)

func (*HdfsFile) Seek

func (self *HdfsFile) Seek(offset int64, pos int) (int64, error)

func (*HdfsFile) Write

func (self *HdfsFile) Write(b []byte) (n int, err error)

type LocalFile

type LocalFile struct {
	FilePath string
	File     *os.File
}

func (*LocalFile) Close

func (self *LocalFile) Close() error

func (*LocalFile) Create

func (self *LocalFile) Create(name string) (ParquetFile, error)

func (*LocalFile) Open

func (self *LocalFile) Open(name string) (ParquetFile, error)

func (*LocalFile) Read

func (self *LocalFile) Read(b []byte) (cnt int, err error)

func (*LocalFile) Seek

func (self *LocalFile) Seek(offset int64, pos int) (int64, error)

func (*LocalFile) Write

func (self *LocalFile) Write(b []byte) (n int, err error)

type MemFile

type MemFile struct {
	FilePath string
	File     afero.File
	OnClose  OnCloseFunc
}

MemFile - ParquetFile type for in-memory file operations

func (*MemFile) Close

func (fs *MemFile) Close() error

Close - close file and execute OnCloseFunc

func (*MemFile) Create

func (fs *MemFile) Create(name string) (ParquetFile, error)

Create - create in-memory file

func (*MemFile) Open

func (fs *MemFile) Open(name string) (ParquetFile, error)

Open - open file in-memory

func (*MemFile) Read

func (fs *MemFile) Read(b []byte) (cnt int, err error)

Read - read file

func (*MemFile) Seek

func (fs *MemFile) Seek(offset int64, pos int) (int64, error)

Seek - seek function

func (*MemFile) Write

func (fs *MemFile) Write(b []byte) (n int, err error)

Write - write file in-memory

type OnCloseFunc

type OnCloseFunc func(string, io.Reader) error

OnCloseFunc function type, handles what to do after converted file is closed in-memory. Close() will pass the filename string and data as io.reader

type ParquetFile

type ParquetFile interface {
	io.Seeker
	io.Reader
	io.Writer
	io.Closer
	Open(name string) (ParquetFile, error)
	Create(name string) (ParquetFile, error)
}

func NewBufferFile

func NewBufferFile(b []byte) (ParquetFile, error)

NewBufferFile creates new in memory parquet buffer.

func NewGcsFileReader

func NewGcsFileReader(ctx context.Context, projectId string, bucketName string, name string) (ParquetFile, error)

func NewGcsFileWriter

func NewGcsFileWriter(ctx context.Context, projectId string, bucketName string, name string) (ParquetFile, error)

func NewHdfsFileReader

func NewHdfsFileReader(hosts []string, user string, name string) (ParquetFile, error)

func NewHdfsFileWriter

func NewHdfsFileWriter(hosts []string, user string, name string) (ParquetFile, error)

func NewLocalFileReader

func NewLocalFileReader(name string) (ParquetFile, error)

func NewLocalFileWriter

func NewLocalFileWriter(name string) (ParquetFile, error)

func NewMemFileWriter

func NewMemFileWriter(name string, f OnCloseFunc) (ParquetFile, error)

NewMemFileWriter - intiates and creates an instance of MemFiles NOTE: there is no NewMemFileReader as this particular type was written to handle in-memory converstions and offloading. The results of conversion can then be stored and read via HDFS, LocalFS, etc without the need for loading the file back into memory directly

func NewWriterFile

func NewWriterFile(writer io.Writer) ParquetFile

type WriterFile

type WriterFile struct {
	Writer io.Writer
}

func (*WriterFile) Close

func (self *WriterFile) Close() error

func (*WriterFile) Create

func (self *WriterFile) Create(name string) (ParquetFile, error)

func (*WriterFile) Open

func (self *WriterFile) Open(name string) (ParquetFile, error)

func (*WriterFile) Read

func (self *WriterFile) Read(b []byte) (int, error)

func (*WriterFile) Seek

func (self *WriterFile) Seek(offset int64, pos int) (int64, error)

func (*WriterFile) Write

func (self *WriterFile) Write(b []byte) (int, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL