arrow: github.com/apache/arrow/go/arrow/csv Index | Examples | Files

package csv

import "github.com/apache/arrow/go/arrow/csv"

Package csv reads CSV files and presents the extracted data as records, also writes data as record into CSV files

Code:

f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
0;0;str-0
1;1;str-1
2;2;str-2
3;3;str-3
4;4;str-4
5;5;str-5
6;6;str-6
7;7;str-7
8;8;str-8
9;9;str-9
`)

schema := arrow.NewSchema(
    []arrow.Field{
        {Name: "i64", Type: arrow.PrimitiveTypes.Int64},
        {Name: "f64", Type: arrow.PrimitiveTypes.Float64},
        {Name: "str", Type: arrow.BinaryTypes.String},
    },
    nil,
)
r := csv.NewReader(f, schema, csv.WithComment('#'), csv.WithComma(';'))
defer r.Release()

n := 0
for r.Next() {
    rec := r.Record()
    for i, col := range rec.Columns() {
        fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
    }
    n++
}

Output:

rec[0]["i64"]: [0]
rec[0]["f64"]: [0]
rec[0]["str"]: ["str-0"]
rec[1]["i64"]: [1]
rec[1]["f64"]: [1]
rec[1]["str"]: ["str-1"]
rec[2]["i64"]: [2]
rec[2]["f64"]: [2]
rec[2]["str"]: ["str-2"]
rec[3]["i64"]: [3]
rec[3]["f64"]: [3]
rec[3]["str"]: ["str-3"]
rec[4]["i64"]: [4]
rec[4]["f64"]: [4]
rec[4]["str"]: ["str-4"]
rec[5]["i64"]: [5]
rec[5]["f64"]: [5]
rec[5]["str"]: ["str-5"]
rec[6]["i64"]: [6]
rec[6]["f64"]: [6]
rec[6]["str"]: ["str-6"]
rec[7]["i64"]: [7]
rec[7]["f64"]: [7]
rec[7]["str"]: ["str-7"]
rec[8]["i64"]: [8]
rec[8]["f64"]: [8]
rec[8]["str"]: ["str-8"]
rec[9]["i64"]: [9]
rec[9]["f64"]: [9]
rec[9]["str"]: ["str-9"]

Code:

f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
0;0;str-0
1;1;str-1
2;2;str-2
3;3;str-3
4;4;str-4
5;5;str-5
6;6;str-6
7;7;str-7
8;8;str-8
9;9;str-9
`)

schema := arrow.NewSchema(
    []arrow.Field{
        {Name: "i64", Type: arrow.PrimitiveTypes.Int64},
        {Name: "f64", Type: arrow.PrimitiveTypes.Float64},
        {Name: "str", Type: arrow.BinaryTypes.String},
    },
    nil,
)
r := csv.NewReader(
    f, schema,
    csv.WithComment('#'), csv.WithComma(';'),
    csv.WithChunk(3),
)
defer r.Release()

n := 0
for r.Next() {
    rec := r.Record()
    for i, col := range rec.Columns() {
        fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
    }
    n++
}

Output:

rec[0]["i64"]: [0 1 2]
rec[0]["f64"]: [0 1 2]
rec[0]["str"]: ["str-0" "str-1" "str-2"]
rec[1]["i64"]: [3 4 5]
rec[1]["f64"]: [3 4 5]
rec[1]["str"]: ["str-3" "str-4" "str-5"]
rec[2]["i64"]: [6 7 8]
rec[2]["f64"]: [6 7 8]
rec[2]["str"]: ["str-6" "str-7" "str-8"]
rec[3]["i64"]: [9]
rec[3]["f64"]: [9]
rec[3]["str"]: ["str-9"]

Code:

f := new(bytes.Buffer)

pool := memory.NewGoAllocator()
schema := arrow.NewSchema(
    []arrow.Field{
        {Name: "i64", Type: arrow.PrimitiveTypes.Int64},
        {Name: "f64", Type: arrow.PrimitiveTypes.Float64},
        {Name: "str", Type: arrow.BinaryTypes.String},
    },
    nil,
)

b := array.NewRecordBuilder(pool, schema)
defer b.Release()

b.Field(0).(*array.Int64Builder).AppendValues([]int64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, nil)
b.Field(1).(*array.Float64Builder).AppendValues([]float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, nil)
b.Field(2).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2", "str-3", "str-4", "str-5", "str-6", "str-7", "str-8", "str-9"}, nil)

rec := b.NewRecord()
defer rec.Release()

w := csv.NewWriter(f, schema, csv.WithComma(';'))
err := w.Write(rec)
if err != nil {
    log.Fatal(err)
}

err = w.Flush()
if err != nil {
    log.Fatal(err)
}

err = w.Error()
if err != nil {
    log.Fatal(err)
}

r := csv.NewReader(f, schema, csv.WithComment('#'), csv.WithComma(';'))
defer r.Release()

n := 0
for r.Next() {
    rec := r.Record()
    for i, col := range rec.Columns() {
        fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
    }
    n++
}

Output:

rec[0]["i64"]: [0]
rec[0]["f64"]: [0]
rec[0]["str"]: ["str-0"]
rec[1]["i64"]: [1]
rec[1]["f64"]: [1]
rec[1]["str"]: ["str-1"]
rec[2]["i64"]: [2]
rec[2]["f64"]: [2]
rec[2]["str"]: ["str-2"]
rec[3]["i64"]: [3]
rec[3]["f64"]: [3]
rec[3]["str"]: ["str-3"]
rec[4]["i64"]: [4]
rec[4]["f64"]: [4]
rec[4]["str"]: ["str-4"]
rec[5]["i64"]: [5]
rec[5]["f64"]: [5]
rec[5]["str"]: ["str-5"]
rec[6]["i64"]: [6]
rec[6]["f64"]: [6]
rec[6]["str"]: ["str-6"]
rec[7]["i64"]: [7]
rec[7]["f64"]: [7]
rec[7]["str"]: ["str-7"]
rec[8]["i64"]: [8]
rec[8]["f64"]: [8]
rec[8]["str"]: ["str-8"]
rec[9]["i64"]: [9]
rec[9]["f64"]: [9]
rec[9]["str"]: ["str-9"]

Index

Examples

Package Files

common.go reader.go writer.go

Variables

var (
    ErrMismatchFields = errors.New("arrow/csv: number of records mismatch")
)

type Option Uses

type Option func(config)

Option configures a CSV reader/writer.

func WithAllocator Uses

func WithAllocator(mem memory.Allocator) Option

WithAllocator specifies the Arrow memory allocator used while building records.

func WithCRLF Uses

func WithCRLF(useCRLF bool) Option

WithCRLF specifies the line terminator used while writing CSV files. If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used. The default value is false.

func WithChunk Uses

func WithChunk(n int) Option

WithChunk specifies the chunk size used while parsing CSV files.

If n is zero or 1, no chunking will take place and the reader will create one record per row. If n is greater than 1, chunks of n rows will be read. If n is negative, the reader will load the whole CSV file into memory and create one big record with all the rows.

func WithComma Uses

func WithComma(c rune) Option

WithComma specifies the fields separation character used while parsing CSV files.

func WithComment Uses

func WithComment(c rune) Option

WithComment specifies the comment character used while parsing CSV files.

func WithHeader Uses

func WithHeader() Option

type Reader Uses

type Reader struct {
    // contains filtered or unexported fields
}

Reader wraps encoding/csv.Reader and creates array.Records from a schema.

func NewReader Uses

func NewReader(r io.Reader, schema *arrow.Schema, opts ...Option) *Reader

NewReader returns a reader that reads from the CSV file and creates array.Records from the given schema.

NewReader panics if the given schema contains fields that have types that are not primitive types.

func (*Reader) Err Uses

func (r *Reader) Err() error

Err returns the last error encountered during the iteration over the underlying CSV file.

func (*Reader) Next Uses

func (r *Reader) Next() bool

Next returns whether a Record could be extracted from the underlying CSV file.

Next panics if the number of records extracted from a CSV row does not match the number of fields of the associated schema.

func (*Reader) Record Uses

func (r *Reader) Record() array.Record

Record returns the current record that has been extracted from the underlying CSV file. It is valid until the next call to Next.

func (*Reader) Release Uses

func (r *Reader) Release()

Release decreases the reference count by 1. When the reference count goes to zero, the memory is freed. Release may be called simultaneously from multiple goroutines.

func (*Reader) Retain Uses

func (r *Reader) Retain()

Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.

func (*Reader) Schema Uses

func (r *Reader) Schema() *arrow.Schema

type Writer Uses

type Writer struct {
    // contains filtered or unexported fields
}

Writer wraps encoding/csv.Writer and writes array.Record based on a schema.

func NewWriter Uses

func NewWriter(w io.Writer, schema *arrow.Schema, opts ...Option) *Writer

NewWriter returns a writer that writes array.Records to the CSV file with the given schema.

NewWriter panics if the given schema contains fields that have types that are not primitive types.

func (*Writer) Error Uses

func (w *Writer) Error() error

Error reports any error that has occurred during a previous Write or Flush.

func (*Writer) Flush Uses

func (w *Writer) Flush() error

Flush writes any buffered data to the underlying csv Writer. If an error occurred during the Flush, return it

func (*Writer) Schema Uses

func (w *Writer) Schema() *arrow.Schema

func (*Writer) Write Uses

func (w *Writer) Write(record array.Record) error

Write writes a single Record as one row to the CSV file

Package csv imports 12 packages (graph). Updated 2019-04-25. Refresh now. Tools for package owners.