yacr

package module
v0.0.0-...-6edd46e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 2, 2015 License: BSD-3-Clause Imports: 15 Imported by: 0

README

Changes from parent repo is the following

> diff -ru ../../gwenn/yacr/reader.go reader.go
--- ../../gwenn/yacr/reader.go  2014-10-27 21:40:04.000000000 -0700
+++ reader.go   2014-11-01 14:41:48.000000000 -0700
@@ -6,10 +6,10 @@
 package yacr

 import (
-   "bufio"
    "bytes"
    "encoding"
    "fmt"
+   "github.com/harikb/bufio"
    "io"
    "reflect"
    "strconv"
@@ -63,7 +63,7 @@
        if err := s.value(value, true); err != nil {
            return i, err
        } else if s.EndOfRecord() != (i == len(values)-1) {
-           return i, fmt.Errorf("unexpected number of fields: want %d, got %d", len(values), i+1)
+           return i, fmt.Errorf("unexpected number of fields: want %d, got %d (or more)", len(values), i+2)
        }
    }
    return len(values), nil

Yet another CSV reader (and writer) with small memory usage.

All credit goes to:

  • Rob Pike, creator of Scanner interface,
  • D. Richard Hipp, for his CSV parser implementation.

Build Status

GoDoc

There is a standard package named encoding/csv.

BenchmarkParsing	    5000	    381518 ns/op	 256.87 MB/s	    4288 B/op	       5 allocs/op
BenchmarkQuotedParsing	    5000	    487599 ns/op	 209.19 MB/s	    4288 B/op	       5 allocs/op
BenchmarkEmbeddedNL	    5000	    594618 ns/op	 201.81 MB/s	    4288 B/op	       5 allocs/op
BenchmarkStdParser	     500	   5026100 ns/op	  23.88 MB/s	  625499 B/op	   16037 allocs/op
BenchmarkYacrParser	    5000	    593165 ns/op	 202.30 MB/s	    4288 B/op	       5 allocs/op
BenchmarkYacrWriter	  200000	      9433 ns/op	  98.05 MB/s	    2755 B/op	       0 allocs/op
BenchmarkStdWriter	  100000	     27804 ns/op	  33.27 MB/s	    2755 B/op	       0 allocs/op

USAGES

Documentation

Overview

Package yacr is yet another CSV reader (and writer) with small memory usage.

Example
package main

import (
	"fmt"
	"os"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.NewReader(os.Stdin, '\t', false, false)
	w := yacr.NewWriter(os.Stdout, '\t', false)

	for r.Scan() && w.Write(r.Bytes()) {
		if r.EndOfRecord() {
			w.EndOfRecord()
		}
	}
	w.Flush()
	if err := r.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Output:

Example (Reader)
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("c1,\"c\"\"2\",\"c\n3\",\"c,4\""))
	fmt.Print("[")
	for r.Scan() {
		fmt.Print(r.Text())
		if r.EndOfRecord() {
			fmt.Print("]\n")
		} else {
			fmt.Print(" ")
		}
	}
	if err := r.Err(); err != nil {
		fmt.Println(err)
	}
}
Output:

[c1 c"2 c
3 c,4]
Example (Writer)
package main

import (
	"fmt"
	"os"

	yacr "github.com/gwenn/yacr"
)

func main() {
	w := yacr.DefaultWriter(os.Stdout)
	for _, field := range []string{"c1", "c\"2", "c\n3", "c,4"} {
		if !w.WriteString(field) {
			break
		}
	}
	w.Flush()
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Output:

c1,"c""2","c
3","c,4"

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	// ErrNewLine is the error returned when a value contains a newline in unquoted mode.
	ErrNewLine = errors.New("yacr.Writer: newline character in value")
	// ErrSeparator is the error returned when a value contains a separator in unquoted mode.
	ErrSeparator = errors.New("yacr.Writer: separator in value")
)

Functions

func IsNumber

func IsNumber(s []byte) (isNum bool, isReal bool)

IsNumber determines if the string is a number or not.

func Zopen

func Zopen(filepath string) (io.ReadCloser, error)

Zopen transparently opens gzip/bzip files (based on their extension).

Types

type Reader

type Reader struct {
	*bufio.Scanner

	Trim    bool // trim spaces (only on unquoted values). Break rfc4180 rule: "Spaces are considered part of a field and should not be ignored."
	Comment byte // character marking the start of a line comment. When specified (not 0), line comment appears as empty line.
	Lazy    bool // specify if quoted values may contains unescaped quote not followed by a separator or a newline

	Headers map[string]int // Index (first is 1) by header
	// contains filtered or unexported fields
}

Reader provides an interface for reading CSV data (compatible with rfc4180 and extended with the option of having a separator other than ","). Successive calls to the Scan method will step through the 'fields', skipping the separator/newline between the fields. The EndOfRecord method tells when a field is terminated by a line break.

func DefaultReader

func DefaultReader(rd io.Reader) *Reader

DefaultReader creates a "standard" CSV reader (separator is comma and quoted mode active)

func NewReader

func NewReader(r io.Reader, sep byte, quoted, guess bool) *Reader

NewReader returns a new CSV scanner to read from r. When quoted is false, values must not contain a separator or newline.

func (*Reader) EndOfRecord

func (s *Reader) EndOfRecord() bool

EndOfRecord returns true when the most recent field has been terminated by a newline (not a separator).

func (*Reader) IsNumber

func (s *Reader) IsNumber() (isNum bool, isReal bool)

IsNumber determines if the current token is a number or not.

func (*Reader) LineNumber

func (s *Reader) LineNumber() int

LineNumber returns current line number (not record number)

func (*Reader) ScanField

func (s *Reader) ScanField(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanField implements bufio.SplitFunc for CSV. Lexing is adapted from csv_read_one_field function in SQLite3 shell sources.

func (*Reader) ScanHeaders

func (s *Reader) ScanHeaders() error

ScanHeaders loads current line as the header line.

func (*Reader) ScanRecord

func (s *Reader) ScanRecord(values ...interface{}) (int, error)

ScanRecord decodes one line fields to values. Empty lines are ignored/skipped. It's like fmt.Scan or database.sql.Rows.Scan. Returns (0, nil) on EOF, (*, err) on error and (n >= 1, nil) on success (n may be less or greater than len(values)).

  var n int
  var err error
  for {
    values := make([]string, N)
    if n, err = s.ScanRecord(&values[0]/*, &values[1], ...*/); err != nil || n == 0 {
      break // or error handling
    } else if (n > N) {
      n = N
  	 }
    for _, value := range values[0:n] {
      // ...
    }
	 }
  if err != nil {
    // error handling
  }
Example
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("11,12,13,14\n21,22,23,24\n31,32,33,34\n41,42,43,44"))
	fmt.Print("[")
	var i1, i2, i3, i4 int
	for {
		if n, err := r.ScanRecord(&i1, &i2, &i3, &i4); err != nil {
			fmt.Println(err)
			break
		} else if n != 4 {
			break
		}
		fmt.Println(i1, i2, i3, i4)
	}
	fmt.Print("]")
}
Output:

[11 12 13 14
21 22 23 24
31 32 33 34
41 42 43 44
]

func (*Reader) ScanRecordByName

func (s *Reader) ScanRecordByName(args ...interface{}) (int, error)

ScanRecordByName decodes one line fields by name (name1, value1, ...).

func (*Reader) ScanValue

func (s *Reader) ScanValue(value interface{}) error

ScanValue advances to the next token and decodes field's content to value. The value may point to data that will be overwritten by a subsequent call to Scan.

func (*Reader) Sep

func (s *Reader) Sep() byte

Sep returns the values separator used/guessed

func (*Reader) SkipRecords

func (s *Reader) SkipRecords(n int) error

SkipRecords skips n records/headers

func (*Reader) Value

func (s *Reader) Value(value interface{}) error

Value decodes field's content to value. The value may point to data that will be overwritten by a subsequent call to Scan.

Example
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("1,\"2\",3,4"))
	fmt.Print("[")
	var i int
	for r.Scan() {
		if err := r.Value(&i); err != nil {
			fmt.Println(err)
			break
		}
		fmt.Print(i)
		if r.EndOfRecord() {
			fmt.Print("]\n")
		} else {
			fmt.Print(" ")
		}
	}
	if err := r.Err(); err != nil {
		fmt.Println(err)
	}
}
Output:

[1 2 3 4]

type Writer

type Writer struct {
	UseCRLF bool // True to use \r\n as the line terminator
	// contains filtered or unexported fields
}

Writer provides an interface for writing CSV data (compatible with rfc4180 and extended with the option of having a separator other than ","). Successive calls to the Write method will automatically insert the separator. The EndOfRecord method tells when a line break is inserted.

func DefaultWriter

func DefaultWriter(wr io.Writer) *Writer

DefaultWriter creates a "standard" CSV writer (separator is comma and quoted mode active)

func NewWriter

func NewWriter(w io.Writer, sep byte, quoted bool) *Writer

NewWriter returns a new CSV writer.

func (*Writer) EndOfRecord

func (w *Writer) EndOfRecord()

EndOfRecord tells when a line break must be inserted.

func (*Writer) Err

func (w *Writer) Err() error

Err returns the first error that was encountered by the Writer.

func (*Writer) Flush

func (w *Writer) Flush()

Flush ensures the writer's buffer is flushed.

func (*Writer) Write

func (w *Writer) Write(value []byte) bool

Write ensures that value is quoted when needed.

func (*Writer) WriteHeader

func (w *Writer) WriteHeader(headers []string) bool

WriteHeader writes a header. Headers are always strings

func (*Writer) WriteRecord

func (w *Writer) WriteRecord(values ...interface{}) bool

WriteRecord ensures that values are quoted when needed. It's like fmt.Println.

func (*Writer) WriteString

func (w *Writer) WriteString(value string) bool

WriteString ensures that value is quoted when needed.

func (*Writer) WriteValue

func (w *Writer) WriteValue(value interface{}) bool

WriteValue ensures that value is quoted when needed. Value's type/kind is used to encode value to text.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL