Documentation ¶
Overview ¶
Package byline implements Reader interface for processing io.Reader line-by-line. You can add UNIX text processing principles to its Reader (like with awk, grep, sed ...).
Install
go get -u github.com/msoap/byline
Usage
import "github.com/msoap/byline" // Create new line-by-line Reader from io.Reader: lr := byline.NewReader(reader) // Add to the Reader stack of a filter functions: lr.MapString(func(line string) string {return "prefix_" + line}).GrepByRegexp(regexp.MustCompile("only this")) // Read all content result, err := lr.ReadAll() // Use everywhere instead of io.Reader _, err := io.Copy(os.Stdout, lr) // Or in one place result, err := byline.NewReader(reader).MapString(func(line string) string {return "prefix_" + line}).ReadAll()
Example ¶
package main import ( "bytes" "fmt" "io" "regexp" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(`CSV Title CSV description ID,NAME,PRICE A001,name one,12.3 A002,second row;7.1 A003,three row;15.51 Total: .... Some text `) lr := byline.NewReader(reader). GrepString(func(line string) bool { // skip empty lines return line != "" && line != "\n" }). Grep(func(line []byte) bool { return !bytes.HasPrefix(line, []byte("CSV")) }). SetFS(regexp.MustCompile(`[,;]`)). AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) { // skip header if strings.HasPrefix(fields[0], "ID") { return "", byline.ErrOmitLine } // skip footer if strings.HasPrefix(fields[0], "Total:") { return "", io.EOF } return line, nil }). MapString(func(line string) string { return "Z" + line }). AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) { if vars.NF < 3 { return "", fmt.Errorf("csv parse failed for %q", line) } return fmt.Sprintf("%s - %s (line:%d)", fields[0], fields[1], vars.NR), nil }) result, err := lr.ReadAllString() fmt.Print("\n", result, err) }
Output: ZA001 - name one (line:4) ZA002 - second row (line:6) ZA003 - three row (line:7) <nil>
Index ¶
- Variables
- type AWKVars
- type Reader
- func (lr *Reader) AWKMode(filterFn func(line string, fields []string, vars AWKVars) (string, error)) *Reader
- func (lr *Reader) Discard() error
- func (lr *Reader) Each(filterFn func([]byte)) *Reader
- func (lr *Reader) EachString(filterFn func(string)) *Reader
- func (lr *Reader) Grep(filterFn func([]byte) bool) *Reader
- func (lr *Reader) GrepByRegexp(re *regexp.Regexp) *Reader
- func (lr *Reader) GrepString(filterFn func(string) bool) *Reader
- func (lr *Reader) Map(filterFn func([]byte) []byte) *Reader
- func (lr *Reader) MapErr(filterFn func([]byte) ([]byte, error)) *Reader
- func (lr *Reader) MapString(filterFn func(string) string) *Reader
- func (lr *Reader) MapStringErr(filterFn func(string) (string, error)) *Reader
- func (lr *Reader) Read(p []byte) (n int, err error)
- func (lr *Reader) ReadAll() ([]byte, error)
- func (lr *Reader) ReadAllSlice() ([][]byte, error)
- func (lr *Reader) ReadAllSliceString() ([]string, error)
- func (lr *Reader) ReadAllString() (string, error)
- func (lr *Reader) SetFS(fs *regexp.Regexp) *Reader
- func (lr *Reader) SetRS(rs byte) *Reader
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( // ErrOmitLine - error for Map*Err/AWKMode, for omitting current line ErrOmitLine = errors.New("ErrOmitLine") // ErrNilReader - error for provided reader being nil ErrNilReader = errors.New("nil reader") )
Functions ¶
This section is empty.
Types ¶
type AWKVars ¶
type AWKVars struct { NR int // number of the current line (begin from 1) NF int // number of fields in the current line RS byte // record separator, default is '\n' FS *regexp.Regexp // field separator, default is `\s+` }
AWKVars - settings for AWK mode, see man awk
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader - line by line Reader
func (*Reader) AWKMode ¶
func (lr *Reader) AWKMode(filterFn func(line string, fields []string, vars AWKVars) (string, error)) *Reader
AWKMode - process lines with AWK like mode
Example ¶
package main import ( "fmt" "io" "regexp" "strconv" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(`ID,NAME,PRICE A001,name one,12.3 A002,second row;7.1 A003,three row;15.51 Total: .... Some text `) sum := 0.0 lr := byline.NewReader(reader). SetFS(regexp.MustCompile(`[,;]`)). AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) { if vars.NR == 1 { // skip first line return "", byline.ErrOmitLine } if vars.NF > 0 && strings.HasPrefix(fields[0], "Total:") { // skip rest of file return "", io.EOF } if vars.NF < 3 { return "", fmt.Errorf("csv parse failed for %q", line) } if price, err := strconv.ParseFloat(fields[2], 10); err != nil { return "", err } else if price < 10 { return "", byline.ErrOmitLine } else { sum += price } return fmt.Sprintf("line:%d. %s - %s", vars.NR, fields[0], fields[1]), nil }) result, err := lr.ReadAllString() if err != nil { fmt.Println(err) return } fmt.Print(result) fmt.Printf("Sum: %.2f", sum) }
Output: line:2. A001 - name one line:4. A003 - three row Sum: 27.81
func (*Reader) Discard ¶
Discard - read all content from Reader for side effect from filter functions
func (*Reader) Each ¶
Each - processing each line. Do not save the value of the byte slice, since it can change in the next filter-steps.
Example ¶
package main import ( "fmt" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(`1 1 1 2 2 2 3 3 3 `) spacesCount, bytesCount, linesCount := 0, 0, 0 err := byline.NewReader(reader). Each(func(line []byte) { linesCount++ bytesCount += len(line) for _, b := range line { if b == ' ' { spacesCount++ } } }).Discard() if err == nil { fmt.Printf("spaces: %d, bytes: %d, lines: %d\n", spacesCount, bytesCount, linesCount) } }
Output: spaces: 6, bytes: 18, lines: 3
func (*Reader) EachString ¶
EachString - processing each line as string
Example ¶
package main import ( "fmt" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(`111 222 333 `) result := []string{} err := byline.NewReader(reader). EachString(func(line string) { result = append(result, line) }).Discard() if err == nil { fmt.Printf("%q\n", result) } }
Output: ["111\n" "222\n" "333\n"]
func (*Reader) Grep ¶
Grep - grep lines by func
Example ¶
package main import ( "fmt" "os" "regexp" "github.com/msoap/byline" ) type StateMachine struct { beginRe *regexp.Regexp endRe *regexp.Regexp inBlock bool } func (sm *StateMachine) SMFilter(line []byte) bool { switch { case sm.beginRe.Match(line): sm.inBlock = true return true case sm.inBlock && sm.endRe.Match(line): sm.inBlock = false return true default: return sm.inBlock } } func main() { file, err := os.Open("byline.go") if err != nil { fmt.Println(err) return } // get all lines between "^type..." and "^}" sm := StateMachine{ beginRe: regexp.MustCompile(`^type `), endRe: regexp.MustCompile(`^}\s+$`), } lr := byline.NewReader(file).Grep(sm.SMFilter).Map(func(line []byte) []byte { // and remove comments return regexp.MustCompile(`\s+//.+`).ReplaceAll(line, []byte{}) }) result, err := lr.ReadAllString() if err != nil { fmt.Println(err) return } fmt.Print("\n" + result) }
Output: type Reader struct { scanner *bufio.Scanner buffer bytes.Buffer existsData bool filterFuncs []func(line []byte) ([]byte, error) awkVars AWKVars } type AWKVars struct { NR int NF int RS byte FS *regexp.Regexp }
func (*Reader) GrepByRegexp ¶
GrepByRegexp - grep lines by regexp
Example ¶
package main import ( "fmt" "regexp" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(`ID,NAME,PRICE A001,name one,12.3 A002,second row;7.1 A003,three row;15.51 Total: .... Some text `) result, err := byline.NewReader(reader).GrepByRegexp(regexp.MustCompile(`^A\d+,`)).ReadAllString() fmt.Print("\n"+result, err) }
Output: A001,name one,12.3 A002,second row;7.1 A003,three row;15.51 <nil>
func (*Reader) GrepString ¶
GrepString - grep lines as string by func
func (*Reader) MapErr ¶
MapErr - set filter function for process each line, returns error if needed (io.EOF for example)
func (*Reader) MapStringErr ¶
MapStringErr - set filter function for process each line as string, returns error if needed (io.EOF for example)
Example ¶
package main import ( "fmt" "io" "strings" "github.com/msoap/byline" ) func main() { reader := strings.NewReader(` 100000 200000 300000 end ... Some text `) result, err := byline.NewReader(reader). MapStringErr(func(line string) (string, error) { switch { case line == "" || line == "\n": return "", byline.ErrOmitLine case strings.HasPrefix(line, "end "): return "", io.EOF default: return "<" + line, nil } }). ReadAllString() fmt.Print("\n"+result, err) }
Output: <100000 <200000 <300000 <nil>
func (*Reader) ReadAllSlice ¶
ReadAllSlice - read all content from Reader by lines to slice of []byte
func (*Reader) ReadAllSliceString ¶
ReadAllSliceString - read all content from Reader to string slice by lines
func (*Reader) ReadAllString ¶
ReadAllString - read all content from Reader to one string