linear

package
v1.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 29, 2021 License: BSD-3-Clause Imports: 5 Imported by: 106

Documentation

Overview

Package linear handles single sequences.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type QSeq

type QSeq struct {
	seq.Annotation
	Seq       alphabet.QLetters
	Threshold alphabet.Qphred // Threshold for returning valid letter.
	QFilter   seq.QFilter     // How to represent below threshold letter.
	Encode    alphabet.Encoding
}

A QSeq is a basic linear sequence with Phred quality scores.

func NewQSeq

func NewQSeq(id string, ql []alphabet.QLetter, alpha alphabet.Alphabet, enc alphabet.Encoding) *QSeq

NewQSeq create a new QSeq with the given id, letter sequence, alphabet and quality encoding.

Example
d := NewQSeq("example DNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.DNA, alphabet.Sanger)
fmt.Printf("%-s %v\n", d, d.Moltype())
Output:

ACGCTG DNA

func (*QSeq) AppendLetters

func (s *QSeq) AppendLetters(a ...alphabet.Letter) error

Append append Letters to the sequence, the DefaultQphred value is used for quality scores.

func (*QSeq) AppendQLetters

func (s *QSeq) AppendQLetters(a ...alphabet.QLetter) error

Append appends QLetters to the sequence.

Example
q := []alphabet.Qphred{
	1, 13, 19, 22, 19, 18, 20, 23, 23, 20, 16, 21, 24, 22, 22, 18, 17, 18, 22, 23, 22, 24, 22, 24, 20, 15,
	18, 18, 19, 19, 20, 12, 18, 17, 20, 20, 20, 18, 15, 18, 24, 21, 13, 8, 15, 20, 20, 19, 20, 20, 20, 18,
	16, 16, 16, 10, 15, 18, 18, 18, 11, 1, 11, 20, 19, 18, 18, 16, 10, 12, 22, 0, 0, 0, 0}
l := []alphabet.Letter("NTTTCTTCTATATCCTTTTCATCTTTTAATCCATTCACCATTTTTTTCCCTCCACCTACCTNTCCTTCTCTTTCT")
s := NewQSeq("example DNA", nil, alphabet.DNA, alphabet.Sanger)

for i := range l {
	s.AppendQLetters(alphabet.QLetter{L: l[i], Q: q[i]})
}
fmt.Println("Forward:")
fmt.Printf("%-s\n", s)
s.RevComp()
fmt.Println("Reverse:")
fmt.Printf("%-s\n", s)
Output:

Forward:
nTTTCTTCTATATCCTTTTCATCTTTTAATCCATTCACCATTTTTTTCCCTCCACCTACCTnTCCTTCTCTnnnn
Reverse:
nnnnAGAGAAGGAnAGGTAGGTGGAGGGAAAAAAATGGTGAATGGATTAAAAGATGAAAAGGATATAGAAGAAAn

func (*QSeq) At

func (s *QSeq) At(i int) alphabet.QLetter

At returns the letter at position pos.

func (*QSeq) Clone

func (s *QSeq) Clone() seq.Sequence

Clone returns a copy of the sequence.

func (*QSeq) EAt

func (s *QSeq) EAt(i int) float64

EAt returns the probability of a sequence error at position pos.

func (*QSeq) Encoding

func (s *QSeq) Encoding() alphabet.Encoding

Encoding returns the quality encoding scheme.

func (*QSeq) End

func (s *QSeq) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (*QSeq) Format

func (s *QSeq) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (*QSeq) Len

func (s *QSeq) Len() int

Len returns the length of the sequence.

func (*QSeq) New

func (s *QSeq) New() seq.Sequence

New returns an empty *QSeq sequence with the same alphabet.

func (*QSeq) QEncode

func (s *QSeq) QEncode(i int) byte

QEncode encodes the quality at position pos to a letter based on the sequence encoding setting.

func (*QSeq) RevComp

func (s *QSeq) RevComp()

RevComp reverse complements the sequence. RevComp will panic if the alphabet used by the receiver is not a Complementor.

func (*QSeq) Reverse

func (s *QSeq) Reverse()

Reverse reverses the order of letters in the the sequence without complementing them.

func (*QSeq) Set

func (s *QSeq) Set(i int, l alphabet.QLetter) error

Set sets the letter at position pos to l.

func (*QSeq) SetE

func (s *QSeq) SetE(i int, e float64) error

SetE sets the quality at position pos to e to reflect the given p(Error).

func (*QSeq) SetEncoding

func (s *QSeq) SetEncoding(e alphabet.Encoding) error

SetEncoding sets the quality encoding scheme to e.

func (*QSeq) SetSlice

func (s *QSeq) SetSlice(sl alphabet.Slice)

SetSlice sets the sequence data represented by the sequence. SetSlice will panic if sl is not a alphabet.QLetters.

func (*QSeq) Slice

func (s *QSeq) Slice() alphabet.Slice

Slice returns the sequence data as a alphabet.Slice.

func (*QSeq) Start

func (s *QSeq) Start() int

Start return the start position of the sequence in coordinates relative to the sequence location.

func (*QSeq) String

func (s *QSeq) String() string

String returns a string representation of the sequence data only.

func (*QSeq) Validate

func (s *QSeq) Validate() (bool, int)

Validate validates the letters of the sequence according to the sequence alphabet.

Example
r := NewQSeq("example RNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.RNA, alphabet.Sanger)
fmt.Printf("%-s %v\n", r, r.Moltype())
if ok, pos := r.Validate(); ok {
	fmt.Println("valid RNA")
} else {
	fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position")
}
Output:

ACGCTG RNA
    ^ first invalid RNA position

type Seq

type Seq struct {
	seq.Annotation
	Seq alphabet.Letters
}

A Seq is a basic linear sequence.

Example (Compose)
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta"), alphabet.DNA)
f := fs{
	fe{s: 0, e: 32},
	fe{s: 1, e: 8, st: -1},
	fe{s: 28, e: s.Len() - 1},
}
fmt.Printf("%-s\n", s)
if err := sequtils.Compose(s, s, f); err == nil {
	fmt.Printf("%-s\n", s)
}
Output:

aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta
aAGTATAAgtcagtgcagtgtctggcag<TS>TTATACT<TS>gtagtgaagtagggttagttt
Example (Join)
var s1, s2 *Seq

s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA)
s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA)
fmt.Printf("%-s %-s\n", s1, s2)
if err := sequtils.Join(s1, s2, seq.Start); err == nil {
	fmt.Printf("%-s\n", s1)
}

s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA)
s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA)
if err := sequtils.Join(s1, s2, seq.End); err == nil {
	fmt.Printf("%-s\n", s1)
}
Output:

agctgtgctga CGTGCAGTCATGAGTGA
CGTGCAGTCATGAGTGAagctgtgctga
agctgtgctgaCGTGCAGTCATGAGTGA
Example (Stitch)
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa"), alphabet.DNA)
f := fs{
	fe{s: 1, e: 8},
	fe{s: 28, e: 37},
	fe{s: 49, e: s.Len() - 1},
}
fmt.Printf("%-s\n", s)
if err := sequtils.Stitch(s, s, f); err == nil {
	fmt.Printf("%-s\n", s)
}
Output:

aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa
AGTATAATGCTCGTGCGGTTAGTTT
Example (Truncate_a)
s := NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA)
fmt.Printf("%-s\n", s)
if err := sequtils.Truncate(s, s, 5, 12); err == nil {
	fmt.Printf("%-s\n", s)
}
Output:

ACGCTGACTTGGTGCACGT
GACTTGG
Example (Truncate_b)
var s *Seq

s = NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA)
s.Conform = feat.Circular
fmt.Printf("%-s Conformation = %v\n", s, s.Conformation())
if err := sequtils.Truncate(s, s, 12, 5); err == nil {
	fmt.Printf("%-s Conformation = %v\n", s, s.Conformation())
} else {
	fmt.Println("Error:", err)
}

s = NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA)
fmt.Printf("%-s Conformation = %v\n", s, s.Conformation())
if err := sequtils.Truncate(s, s, 12, 5); err == nil {
	fmt.Printf("%-s Conformation = %v\n", s, s.Conformation())
} else {
	fmt.Println("Error:", err)
}
Output:

ACGCTGACTTGGTGCACGT Conformation = circular
TGCACGTACGCT Conformation = linear
ACGCTGACTTGGTGCACGT Conformation = linear
Error: sequtils: start position greater than end position for linear sequence

func NewSeq

func NewSeq(id string, b []alphabet.Letter, alpha alphabet.Alphabet) *Seq

NewSeq creates a new Seq with the given id, letter sequence and alphabet.

Example
d := NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA)
fmt.Printf("%-s %v\n", d, d.Moltype())
Output:

ACGCTGACTTGGTGCACGT DNA

func (*Seq) AppendLetters

func (s *Seq) AppendLetters(a ...alphabet.Letter) error

Append appends Letters to the sequence.

func (*Seq) AppendQLetters

func (s *Seq) AppendQLetters(a ...alphabet.QLetter) error

Append append QLetters to the sequence, ignoring Q component.

func (*Seq) At

func (s *Seq) At(i int) alphabet.QLetter

At returns the letter at position pos.

func (*Seq) Clone

func (s *Seq) Clone() seq.Sequence

Clone returns a copy of the sequence.

func (*Seq) End

func (s *Seq) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (*Seq) Format

func (s *Seq) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (*Seq) Len

func (s *Seq) Len() int

Len returns the length of the sequence.

func (*Seq) New

func (s *Seq) New() seq.Sequence

New returns an empty *Seq sequence with the same alphabet.

func (*Seq) RevComp

func (s *Seq) RevComp()

RevComp reverse complements the sequence. RevComp will panic if the alphabet used by the receiver is not a Complementor.

Example
s := NewSeq("example DNA", []alphabet.Letter("ATGCtGACTTGGTGCACGT"), alphabet.DNA)
fmt.Printf("%-s\n", s)
s.RevComp()
fmt.Printf("%-s\n", s)
Output:

ATGCtGACTTGGTGCACGT
ACGTGCACCAAGTCaGCAT

func (*Seq) Reverse

func (s *Seq) Reverse()

Reverse reverses the order of letters in the the sequence without complementing them.

func (*Seq) Set

func (s *Seq) Set(i int, l alphabet.QLetter) error

Set sets the letter at position pos to l.

func (*Seq) SetSlice

func (s *Seq) SetSlice(sl alphabet.Slice)

SetSlice sets the sequence data represented by the sequence. SetSlice will panic if sl is not a alphabet.Letters.

func (*Seq) Slice

func (s *Seq) Slice() alphabet.Slice

Slice returns the sequence data as a alphabet.Slice.

func (*Seq) Start

func (s *Seq) Start() int

Start returns the start position of the sequence in coordinates relative to the sequence location.

func (*Seq) String

func (s *Seq) String() string

String returns a string representation of the sequence data only.

func (*Seq) Validate

func (s *Seq) Validate() (bool, int)

Validate validates the letters of the sequence according to the sequence alphabet.

Example
r := NewSeq("example RNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.RNA)
fmt.Printf("%-s %v\n", r, r.Moltype())
if ok, pos := r.Validate(); ok {
	fmt.Println("valid RNA")
} else {
	fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position")
}
Output:

ACGCTGACTTGGTGCACGT RNA
    ^ first invalid RNA position

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL