sequences

package

v0.0.0-...-040724e Latest Latest Go to latest Published: Jun 6, 2019 License: BSD-3-Clause, GPL-2.0, BSD-3-Clause, + 1 more Imports: 8 Imported by: 7

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/antha-lang/antha

Documentation ¶

Overview ¶

Package sequences is for interacting with and manipulating biological sequences; in extension to methods available in wtype

synthesisvalidation.go

Package sequences is for interacting with and manipulating biological sequences; in extension to methods available in wtype

Index ¶

Constants
Variables
func AAPosition(dnaposition int) (aaposition int)
func AddFeature(annotated *wtype.DNASequence, newFeature wtype.Feature)
func AddFeatures(dnaSeq *wtype.DNASequence, features []wtype.Feature)
func CodonOptions(codon string) (replacementoptions []string)
func DoublestrandedNonOverlappingORFS(seq string) (features features)
func DoublestrandedORFS(seq string) (features features)
func EqualFold(a, b *wtype.DNASequence) bool
func FindDirectionalPositionInSequence(largeSequence wtype.DNASequence, smallSequence wtype.DNASequence) (start int, end int, err error)
func FindPositionInSequence(largeSequence wtype.DNASequence, smallSequence wtype.DNASequence) (start int, end int, err error)
func FindSeqsinSeqs(bigseq string, smallseqs []string) (seqsfound []search.Result)
func FindStarts(seq string) (atgs int)
func GCcontent(fwdsequence string) (Percentage float64)
func GtoMolarConc(conc wunit.Concentration, mw float64) (molesperL float64)
func Illegalnucleotides(fwdsequence wtype.DNASequence) (pass bool, illegalfound []search.Result, wobblefound []search.Result)
func InSequences(seqs []*wtype.DNASequence, query *wtype.DNASequence) bool
func LookforSpecificORF(seq string, targetAASeq string) (present bool)
func MakeAnnotatedSeq(name string, seq string, circular bool, features []wtype.Feature) (annotated wtype.DNASequence, err error)
func MakeFeature(name string, seq string, start int, end int, sequencetype string, class string, ...) (feature wtype.Feature)
func MassDNA(fwdsequence string, phosphate5prime bool, doublestranded bool) (mw float64)
func Molecularweight(orf ORF) (kDa float64)
func Moles(conc wunit.Concentration, mw float64, vol wunit.Volume) (moles float64)
func MolesDNA(mass wunit.Mass, mw float64) (moles float64)
func MoletoGConc(molarconc float64, mw float64) (gperL wunit.Concentration)
func ORFs2Features(orfs []ORF) (features []wtype.Feature)
func Prefix(seq string, lengthofprefix int) (prefix string)
func RemoveFeatures(dnaSeq *wtype.DNASequence)
func RemoveSite(sequence wtype.DNASequence, enzyme wtype.RestrictionEnzyme, ...) (newseq wtype.DNASequence, err error)
func RemoveSitesOutsideofFeatures(dnaseq wtype.DNASequence, site string, algorithm ReplacementAlgorithm, ...) (newseq wtype.DNASequence, err error)
func Replace(sequence wtype.DNASequence, position PositionPair, ...) (newSeq wtype.DNASequence, err error)
func ReplaceAll(sequence, seqToReplace, replaceWith wtype.DNASequence) (newSeq wtype.DNASequence, err error)
func ReplaceAvoidingPositionPairs(seq string, positionpairs []StartEndPair, original string, replacement string) (newseq string)
func ReplaceBycomplement(sequence, thingtoreplace string, otherseqstoavoid []string) (replacement string, err error)
func ReplaceCodoninORF(sequence wtype.DNASequence, startandendoforf StartEndPair, position int, ...) (newseq wtype.DNASequence, codontochange string, option string, err error)
func ReplacePosition(sequence string, position StartEndPair, replacement string) (newseq string)
func RevArrayOrder(array []string) (reversedOrder []string)
func RevComp(seq string) string
func RevTranslate(aaSeq wtype.ProteinSequence, codonUsageTable CodonUsageTable) (dnaSeq wtype.DNASequence, err error)
func RevTranslatetoNstring(aaSeq string) (NNN string)
func Rotate(seq wtype.DNASequence, rotateBy int, reverse bool) (rotatedSeq wtype.DNASequence)
func SetFeatures(dnaSeq *wtype.DNASequence, features []wtype.Feature)
func Suffix(seq string, lengthofsuffix int) (suffix string)
func Translate(dna wtype.DNASequence) (aa wtype.ProteinSequence, err error)
func ValidateSynthesis(parts []wtype.DNASequence, vector string, manufacturer string) (string, bool)
func Wobble(seq string) (alloptions []string)
type ByPositionPairStartPosition
- func (p ByPositionPairStartPosition) Len() int
- func (p ByPositionPairStartPosition) Less(i, j int) bool
- func (p ByPositionPairStartPosition) Swap(i, j int)
type CodonUsageTable
type FrequencyTable
- func (table FrequencyTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)
type NTable
- func (table NTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)
type ORF
- func FindBiggestORF(seq string) (finalorf ORF, orftrue bool)
- func FindDirectionalORF(seq string, reverse bool) (orf ORF, orftrue bool)
- func FindNonOverlappingORFsinstrand(seq string) (orfs []ORF)
- func FindORF(seq string) (orf ORF, orftrue bool)
- func FindallNonOverlappingORFS(seq string) []ORF
- func FindallORFs(seq string) []ORF
- func Findorfsinstrand(seq string) (orfs []ORF)
- func MergeORFs(feats features) []ORF
type PositionPair
- func (p PositionPair) CodeFriendly(ignoredirection ...bool) (start, end int)
- func (p *PositionPair) Coordinates(options ...string) (start, end int)
- func (p PositionPair) End(options ...string) int
- func (p PositionPair) HumanFriendly(ignoredirection ...bool) (start, end int)
- func (p PositionPair) Start(options ...string) int
type ReplacementAlgorithm
type SearchResult
- func FindAll(bigSequence, smallSequence *wtype.DNASequence) (seqsFound SearchResult)
type SimpleUsageTable
- func (table SimpleUsageTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)
type StartEndPair
- func Codonfromposition(sequence string, dnaposition int) (codontoreturn string, position StartEndPair, err error)
- func MakeStartendPair(start, end int) (pair StartEndPair)

Constants ¶

View Source

const IGNOREDIRECTION bool = true

IGNOREDIRECTION is a boolean constant to specify direction of a feature position should be ignored when returning start and end positions of a feature. If selected, the start position will be the first position at which the feature is encountered regardless of orientation.

Variables ¶

View Source

var (

	// Convert all amino acids to NNN; all stop codons to ***
	ConvertToNNN NTable = NTable{}

	// Return the first Codon value in the RevCodonTable for any amino acid.
	UseAnyCodon = SimpleUsageTable{Table: RevCodonTable}

	// EcoliTable is an example of a frequency table for E.Coli.
	// A codon for a specific amino acid will be returned with the probability set by the CodonSet
	//
	EColiTable = FrequencyTable{
		TaxID: "E.Coli",
		CodonByAA: map[string]wtype.CodonSet{
			"F": {
				"TTT": 0.58,
				"TTC": 0.42,
			},
			"L": {
				"TTA": 0.14,
				"TTG": 0.13,
				"CTT": 0.12,
				"CTC": 0.1,
				"CTA": 0.04,
				"CTG": 0.47,
			},
			"Y": {
				"TAT": 0.59,
				"TAC": 0.41,
			},
			"*": {
				"TAA": 0.61,
				"TAG": 0.09,
				"TGA": 0.3,
			},
			"H": {
				"CAT": 0.57,
				"CAC": 0.43,
			},
			"Q": {
				"CAA": 0.34,
				"CAG": 0.66,
			},
			"I": {
				"ATT": 0.49,
				"ATC": 0.39,
				"ATA": 0.11,
			},
			"M": {
				"ATG": 1.0,
			},
			"N": {
				"AAT": 0.49,
				"AAC": 0.51,
			},
			"K": {
				"AAA": 0.74,
				"AAG": 0.26,
			},
			"V": {
				"GTT": 0.28,
				"GTC": 0.2,
				"GTA": 0.17,
				"GTG": 0.35,
			},
			"D": {
				"GAT": 0.63,
				"GAC": 0.37,
			},
			"E": {
				"GAA": 0.68,
				"GAG": 0.32,
			},
			"S": {
				"TCT": 0.17,
				"TCC": 0.15,
				"TCA": 0.14,
				"TCG": 0.14,
				"AGT": 0.16,
				"AGC": 0.25,
			},
			"C": {
				"TGT": 0.46,
				"TGC": 0.54,
			},
			"W": {
				"TGG": 1,
			},
			"P": {
				"CCT": 0.18,
				"CCC": 0.13,
				"CCA": 0.2,
				"CCG": 0.49,
			},
			"R": {
				"CGT": 0.36,
				"CGC": 0.36,
				"CGA": 0.07,
				"CGG": 0.11,
				"AGA": 0.07,
				"AGG": 0.04,
			},
			"T": {
				"ACT": 0.19,
				"ACC": 0.4,
				"ACA": 0.17,
				"ACG": 0.25,
			},
			"A": {
				"GCT": 0.18,
				"GCC": 0.26,
				"GCA": 0.23,
				"GCG": 0.33,
			},
			"G": {
				"GGT": 0.35,
				"GGC": 0.37,
				"GGA": 0.13,
				"GGG": 0.15,
			},
		},
		AAByCodon: Codontable,
	}
)

Some example CodonUsageTables.

View Source

var Algorithmlookuptable = map[string]ReplacementAlgorithm{
	"ReplacebyComplement": ReplaceBycomplement,
}

View Source

var Codontable = map[string]string{

	"AAC": "N",
	"AAT": "N",
	"AAA": "K",
	"AAG": "K",

	"ACC": "T",
	"ACT": "T",
	"ACA": "T",
	"ACG": "T",

	"ATC": "I",
	"ATT": "I",
	"ATA": "I",
	"ATG": "M",

	"AGC": "S",
	"AGT": "S",
	"AGA": "R",
	"AGG": "R",

	"TAC": "Y",
	"TAT": "Y",
	"TAA": "*",
	"TAG": "*",

	"TCC": "S",
	"TCT": "S",
	"TCA": "S",
	"TCG": "S",

	"TTC": "F",
	"TTT": "F",
	"TTA": "L",
	"TTG": "L",

	"TGC": "C",
	"TGT": "C",
	"TGA": "*",
	"TGG": "W",

	"GAC": "D",
	"GAT": "D",
	"GAA": "E",
	"GAG": "E",

	"GTC": "V",
	"GTT": "V",
	"GTA": "V",
	"GTG": "V",

	"GCA": "A",
	"GCC": "A",
	"GCG": "A",
	"GCT": "A",

	"GGC": "G",
	"GGT": "G",
	"GGA": "G",
	"GGG": "G",

	"CAC": "H",
	"CAT": "H",
	"CAA": "Q",
	"CAG": "Q",

	"CCC": "P",
	"CCT": "P",
	"CCA": "P",
	"CCG": "P",

	"CTC": "L",
	"CTT": "L",
	"CTA": "L",
	"CTG": "L",

	"CGC": "R",
	"CGT": "R",
	"CGA": "R",
	"CGG": "R",
}

Codontable describes the mapping between a Codon and the amino acid which it encodes.

View Source

var Nucleotidegpermol = map[string]float64{
	"A":    313.2,
	"T":    304.2,
	"C":    289.2,
	"G":    329.2,
	"N":    303.7,
	"dATP": 491.2,
	"dCTP": 467.2,
	"dGTP": 507.2,
	"dTTP": 482.2,
	"dNTP": 487.0,
}

View Source

var RevCodonTable = map[string][]string{

	"N": {"AAC", "AAT"},
	"K": {"AAA", "AAG"},
	"T": {"ACC", "ACT", "ACA", "ACG"},
	"I": {"ATC", "ATT", "ATA"},
	"M": {"ATG"},
	"R": {"AGA", "AGG", "CGC", "CGT", "CGA", "CGG"},
	"Y": {"TAC", "TAT"},
	"*": {"TAA", "TAG", "TGA"},
	"S": {"AGC", "AGT", "TCC", "TCT", "TCA", "TCG"},
	"F": {"TTC", "TTT"},
	"L": {"TTA", "TTG", "CTC", "CTT", "CTA", "CTG"},
	"C": {"TGC", "TGT"},
	"W": {"TGG"},
	"D": {"GAC", "GAT"},
	"E": {"GAA", "GAG"},
	"V": {"GTC", "GTT", "GTA", "GTG"},
	"A": {"GCA", "GCC", "GCG", "GCT"},
	"G": {"GGC", "GGT", "GGA", "GGG"},
	"H": {"CAC", "CAT"},
	"Q": {"CAA", "CAG"},
	"P": {"CCC", "CCT", "CCA", "CCG"},
}

RevCodonTable describes the mapping between an amino acid in single letter format and the codons which encode it.

View Source

var SynthesisStandards = map[string]map[string]interface{}{
	"Gen9": {
		"Vector":    []string{"pG9m-2"},
		"MaxLength": 10000,
		"MinLength": 400,
		"RepeatMax": 70,
		"MinOrder":  20000,
	},
	"DNA20": {
		"Vector":    []string{"pJ341", "pJ221", "pJ321", "pJ201", "pJ344", "pJ224", "pJ324", "pJ204", "pJ347", "pJ227", "pJ327", "pJ207", "pJ348", "pJ228", "pJ328", "pJ208", "pJ349", "pJ229", "pJ329", "pJ209", "pJ351", "pJ231", "pJ331", "pJ211", "J354", "pJ234", "pJ334", "pJ214", "pJ357", "pJ234", "pJ334", "pJ217", "pJ358", "pJ238", "pJ338", "pJ218", "pJ359", "pJ239", "pJ339", "pJ219", "pM265", "pM268", "pM269", "pM275", "pM278", "pM279", "pM269E-19C", "pM269Y-19C", "pM262", "pM263", "pM264", "pM272", "pM273", "pM273", "pM274"},
		"MaxLength": 3000,
		"MinLength": 400,
		"RepeatMax": 70,
		"MinOrder":  0,
	},
	"GenScript": {
		"Vector":    []string{"pUC57", "pUC57-Kan", "pUC57-Simple", "pUC57-mini", "pUC18", "pUC19"},
		"MaxLength": 8000,
		"MinLength": 400,
		"RepeatMax": 70,
		"MinOrder":  455,
	},
	"GeneWiz": {
		"Vector":    []string{"pUC57"},
		"MaxLength": 10000,
		"MinLength": 200,
		"RepeatMax": 70,
		"MinOrder":  455,
	},
	"OriGene": {
		"Vector":    []string{"pUCAmp", "pUCKan", "pUCAmpMinusMCS", "pUCKanMinusMCS"},
		"MaxLength": 10000,
		"MinLength": 200,
		"RepeatMax": 70,
		"MinOrder":  455,
	},
	"GeneArt": {
		"Vector":    []string{"pUCAmp", "pUCKan", "pUCAmpMinusMCS", "pUCKanMinusMCS"},
		"MaxLength": 10000,
		"MinLength": 200,
		"RepeatMax": 70,
		"MinOrder":  455,
	},
	"EuroFins": {
		"Vector":    []string{"pEX-A2", "pEX-K4"},
		"MaxLength": 10000,
		"MinLength": 200,
		"RepeatMax": 20,
		"MinOrder":  455,
	},
}

View Source

var WobbleMap = map[string][]string{
	"A": {"A"},
	"T": {"T"},
	"U": {"U"},
	"C": {"C"},
	"G": {"G"},
	"a": {"A"},
	"t": {"T"},
	"u": {"U"},
	"c": {"C"},
	"g": {"G"},
	"Y": {"C", "T"},
	"R": {"A", "G"},
	"W": {"A", "T"},
	"S": {"G", "C"},
	"K": {"G", "T"},
	"M": {"A", "C"},
	"D": {"A", "G", "T"},
	"V": {"A", "C", "G"},
	"H": {"A", "C", "T"},
	"B": {"C", "G", "T"},
	"N": {"A", "T", "C", "G"},
	"X": {"A", "T", "C", "G"},
	"-": {"-", "."},
	".": {"-", "."},
}

WobbleMap represents a mapping of each IUPAC nucleotide to all valid alternative IUPAC nucleotides for that nucleotide. This may be useful for protein engineering applications where mutations may wish to be introduced.

For example N can be substituted for any primary nucleotide (A, C, T or G). R may be substituted for any purine base (A, G). gaps are represented by - or .

Functions ¶

func AAPosition ¶

func AAPosition(dnaposition int) (aaposition int)

func AddFeature ¶

func AddFeature(annotated *wtype.DNASequence, newFeature wtype.Feature)

AddFeature adds a feature to a DNASequence The positions will be added automatically; if more than one matching sequence is found, multiple features will be added.

func AddFeatures ¶

func AddFeatures(dnaSeq *wtype.DNASequence, features []wtype.Feature)

AddFeatures adds features to the existing features of the DNASequence.

func CodonOptions ¶

func CodonOptions(codon string) (replacementoptions []string)

func DoublestrandedNonOverlappingORFS ¶

func DoublestrandedNonOverlappingORFS(seq string) (features features)

func DoublestrandedORFS ¶

func DoublestrandedORFS(seq string) (features features)

func EqualFold ¶

func EqualFold(a, b *wtype.DNASequence) bool

EqualFold compares whether two sequences are equivalent to each other.

The comparison will be performed in a case insensitive manner with respect to the actual sequence. The orientation is not important; i.e. a sequence and it's reverse complement will be classsified as equal. The two sequences must have the same circularisation status (i.e. both plasmid or both linear). If the sequences are plasmids then the rotation of the sequences is not important. Feature Annotations, double or single stranded status and overhang information are not taken into consideration.

func FindDirectionalPositionInSequence ¶

func FindDirectionalPositionInSequence(largeSequence wtype.DNASequence, smallSequence wtype.DNASequence) (start int, end int, err error)

FindDirectionalPositionInSequence returns the directional Positions of the feature. If more than one matching feature is found an error will be returned.

func FindPositionInSequence ¶

func FindPositionInSequence(largeSequence wtype.DNASequence, smallSequence wtype.DNASequence) (start int, end int, err error)

FindPositionInSequence returns directionless Positions; if a feature is found in the reverse orientation the first position found in the sequence will be returned rather than the start of the feature. If more than one matching feature is found an error will be returned.

func FindSeqsinSeqs ¶

func FindSeqsinSeqs(bigseq string, smallseqs []string) (seqsfound []search.Result)

FindSeqsinSeqs searches for small sequences (as strings) in a big sequence. The sequence is considered to be linear and matches will not be found if the sequence is circular and the sequence overlaps the end of the sequence. In this case, FindSeqs should be used.

func FindStarts ¶

func FindStarts(seq string) (atgs int)

type Promoter struct {
	StartPosition int
	EndPosition   int
	DNASeq        string
}

func FindPromoter (seq string) promoter Promoter {

seq = strings.ToUpper(seq)

if strings.Contains(seq,"TTGACA") {
	index := strings.Index(seq,"TTGACA")
	if strings.Index(seq+25,restofsequence := seq[index:]
	if
}

}

func GCcontent ¶

func GCcontent(fwdsequence string) (Percentage float64)

Calculate global GC content

func GtoMolarConc ¶

func GtoMolarConc(conc wunit.Concentration, mw float64) (molesperL float64)

calculate molar concentration of DNA sample

func Illegalnucleotides ¶

func Illegalnucleotides(fwdsequence wtype.DNASequence) (pass bool, illegalfound []search.Result, wobblefound []search.Result)

Check for illegal nucleotides

func InSequences ¶

func InSequences(seqs []*wtype.DNASequence, query *wtype.DNASequence) bool

InSequences evaluates whether a query is present in a set of DNASequences using the same criteria as the EqualFold function.

The comparison will be performed in a case insensitive manner with respect to the actual sequence. The orientation is not important; i.e. a sequence and it's reverse complement will be classsified as equal. The two sequences must have the same circularisation status (i.e. both plasmid or both linear). If the sequences are plasmids then the rotation of the sequences is not important. Feature Annotations, double or single stranded status and overhang information are not taken into consideration.

func LookforSpecificORF ¶

func LookforSpecificORF(seq string, targetAASeq string) (present bool)

func MakeAnnotatedSeq ¶

func MakeAnnotatedSeq(name string, seq string, circular bool, features []wtype.Feature) (annotated wtype.DNASequence, err error)

MakeAnnotatedSeq makes a DNA sequence adding the specified features with their correct positions in the sequence specified in human friendly format.

func MakeFeature ¶

func MakeFeature(name string, seq string, start int, end int, sequencetype string, class string, reverse string) (feature wtype.Feature)

MakeFeature constructs an annotated feature to be added to a sequence. The feature will be defined by it's class and it's position in the sequence once added to a sequence using AddFeature. A protein sequence can be specified if appropriate. valid class fields are:

ORF = "ORF"
CDS = "CDS"
GENE = "gene"
MISC_FEATURE = "misc_feature"
PROMOTER = "promoter"
TRNA = "tRNA"
RRNA = "rRNA"
NCRNA = "ncRNA"
REGULATORY = "regulatory"
REPEAT_REGION = "repeat_region"

valid sequence types entries are: "aa" = amino acid/ protein sequence "dna" = DNA sequence "rna = "RNA sequence Use the AddFeature function to add the feature to a DNASequence such that the positions are added correctly.

func MassDNA ¶

func MassDNA(fwdsequence string, phosphate5prime bool, doublestranded bool) (mw float64)

Calculate Molecular weight of DNA

func Molecularweight ¶

func Molecularweight(orf ORF) (kDa float64)

Molecularweight estimates molecular weight of a protein product.

func Moles ¶

func Moles(conc wunit.Concentration, mw float64, vol wunit.Volume) (moles float64)

func MolesDNA ¶

func MolesDNA(mass wunit.Mass, mw float64) (moles float64)

Calclulate number of moles of a mass of DNA

func MoletoGConc ¶

func MoletoGConc(molarconc float64, mw float64) (gperL wunit.Concentration)

func ORFs2Features ¶

func ORFs2Features(orfs []ORF) (features []wtype.Feature)

ORFs2Features converts a set of ORFs into a set of features

func Prefix ¶

func Prefix(seq string, lengthofprefix int) (prefix string)

func RemoveFeatures ¶

func RemoveFeatures(dnaSeq *wtype.DNASequence)

RemoveFeatures clears all existing feature annotations from a sequence.

func RemoveSite ¶

func RemoveSite(sequence wtype.DNASequence, enzyme wtype.RestrictionEnzyme, otherseqstoavoid []string) (newseq wtype.DNASequence, err error)

todo: fix this func

func RemoveSitesOutsideofFeatures ¶

func RemoveSitesOutsideofFeatures(dnaseq wtype.DNASequence, site string, algorithm ReplacementAlgorithm, featurelisttoavoid []wtype.Feature) (newseq wtype.DNASequence, err error)

func Replace ¶

func Replace(sequence wtype.DNASequence, position PositionPair, replaceWith wtype.DNASequence) (newSeq wtype.DNASequence, err error)

Replace takes a PositionPair and replaces the sequence between the pair with the replaeWith sequence. Features will be deleted if part of the feature is replaced. Note, if used to delete sections from a plasmid, the sequence returned will be in plasmid form and it will be attempted to maintion the original orientation. In this case it may be necessary to rotate the sequence if looking to generate a linear sequence of interest.

func ReplaceAll ¶

func ReplaceAll(sequence, seqToReplace, replaceWith wtype.DNASequence) (newSeq wtype.DNASequence, err error)

ReplaceAll searches for a sequence within a sequence and replaces all instances with the replaceWith sequence. Features will be deleted if part of the feature is replaced. Note, if used to delete sections from a plasmid, the sequence returned will be in plasmid form and it will be attempted to maintion the original orientation. In this case it may be necessary to rotate the sequence if looking to generate a linear sequence of interest.

func ReplaceAvoidingPositionPairs ¶

func ReplaceAvoidingPositionPairs(seq string, positionpairs []StartEndPair, original string, replacement string) (newseq string)

func ReplaceBycomplement ¶

func ReplaceBycomplement(sequence, thingtoreplace string, otherseqstoavoid []string) (replacement string, err error)

func ReplaceCodoninORF ¶

func ReplaceCodoninORF(sequence wtype.DNASequence, startandendoforf StartEndPair, position int, seqstoavoid []string) (newseq wtype.DNASequence, codontochange string, option string, err error)

func ReplacePosition ¶

func ReplacePosition(sequence string, position StartEndPair, replacement string) (newseq string)

func RevArrayOrder ¶

func RevArrayOrder(array []string) (reversedOrder []string)

func RevComp ¶

func RevComp(seq string) string

func RevTranslate ¶

func RevTranslate(aaSeq wtype.ProteinSequence, codonUsageTable CodonUsageTable) (dnaSeq wtype.DNASequence, err error)

RevTranslate converts an amino acid sequence into a dna sequence according the codon usage table specified. A CodonUsageTable is an interface for any type which has a ChooseCodon method. Examples of these are SimpleUsageTable, FrequencyTable and NTable

func RevTranslatetoNstring ¶

func RevTranslatetoNstring(aaSeq string) (NNN string)

RevTranslatetoNstring converts a string amino acid sequence to a sequence of NNN codons.

func Rotate ¶

func Rotate(seq wtype.DNASequence, rotateBy int, reverse bool) (rotatedSeq wtype.DNASequence)

Rotate will rotate the sequence by the number of characters specified by rotateBy. If reverse is true the sequence will be rotated in the reverse direction.

func SetFeatures ¶

func SetFeatures(dnaSeq *wtype.DNASequence, features []wtype.Feature)

SetFeatures replaces any existing feature annotations of the DNASequence with the features specified.

func Suffix ¶

func Suffix(seq string, lengthofsuffix int) (suffix string)

func Translate ¶

func Translate(dna wtype.DNASequence) (aa wtype.ProteinSequence, err error)

func ValidateSynthesis ¶

func ValidateSynthesis(parts []wtype.DNASequence, vector string, manufacturer string) (string, bool)

This simulates the sequence assembly reaction to validate if parts will synthesise with intended manufacturer. Does not validate construct assembly so should be used in conjunction with enzymes.Assemblysimulator()

func Wobble ¶

func Wobble(seq string) (alloptions []string)

Wobble returns an array of sequence options, as strings. Options are caclulated based on cross referencing each nucleotide with the WobbleMap to find each alternative option, if any, for that nucleotide. For example: ACT would return one sequence: ACT RCT would return ACT and GCT NCT would return ACT, GCT, TCT, CCT RYT would return ACT, GCT, ATT and GTT

Types ¶

type ByPositionPairStartPosition ¶

type ByPositionPairStartPosition []PositionPair

ByPositionPairStartPosition obeys the sort interface making the position pairs to be sorted in ascending start position. Direction is ignored during sorting.

func (ByPositionPairStartPosition) Len ¶

func (p ByPositionPairStartPosition) Len() int

Len returns the number of PositionPairs in PositionPairSet

func (ByPositionPairStartPosition) Less ¶

func (p ByPositionPairStartPosition) Less(i, j int) bool

Less evaluates whether the entry of PositionPairSet with index i is less than entry with index j the directionless start position is used to assess this. If the start positions are the same the end position is used.

func (ByPositionPairStartPosition) Swap ¶

func (p ByPositionPairStartPosition) Swap(i, j int)

Swap changes positions of two entries in a PositionPairSet

type CodonUsageTable ¶

type CodonUsageTable interface {
	// ChooseCodon converts an amino acid into a codon.
	// A nil error is returned if this is done successfully.
	ChooseCodon(aminoAcid wtype.AminoAcid) (wtype.Codon, error)
}

CodonUsageTable is an interface for any type which can convert an amino acid into a codon and error.

type FrequencyTable ¶

type FrequencyTable wtype.CodonTable

type FrequencyTable chooses the next codon based on the frequency of the codon for that amino acid in the specified organism. for example: in Ecoli, F is encoded by TTT and TTC. The relative frequency of each is: TTT 0.58 TTC 0.42 The ChooseCodon method run on F would therefore return TTT 58% of the time and TTC 42%.

func (FrequencyTable) ChooseCodon ¶

func (table FrequencyTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)

ChooseCodon converts an amino acid into a codon. A nil error is returned if this is done successfully.

type NTable ¶

type NTable struct {
}

type NTable converts each amino acid to NNN. This may be useful when a sequence is left to a DNA synthesis provider to codon optimise.

func (NTable) ChooseCodon ¶

func (table NTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)

ChooseCodon converts an amino acid into a codon. All amino acids will be converted to NNN; all stop codons to ***

type ORF ¶

type ORF struct {
	StartPosition int
	EndPosition   int
	DNASeq        string
	ProtSeq       string
	Direction     string
}

type ORF is an open reading frame

func FindBiggestORF ¶

func FindBiggestORF(seq string) (finalorf ORF, orftrue bool)

func FindDirectionalORF ¶

func FindDirectionalORF(seq string, reverse bool) (orf ORF, orftrue bool)

func FindNonOverlappingORFsinstrand ¶

func FindNonOverlappingORFsinstrand(seq string) (orfs []ORF)

func FindORF ¶

func FindORF(seq string) (orf ORF, orftrue bool)

func FindallNonOverlappingORFS ¶

func FindallNonOverlappingORFS(seq string) []ORF

func FindallORFs ¶

func FindallORFs(seq string) []ORF

all orfs above 20 amino acids

func Findorfsinstrand ¶

func Findorfsinstrand(seq string) (orfs []ORF)

finds all orfs and if they're greater than 20 amino acids (the smallest known protein) in length adds them to an array of orfs to be returned

func MergeORFs ¶

func MergeORFs(feats features) []ORF

type PositionPair ¶

type PositionPair struct {
	StartPosition int
	EndPosition   int
	Reverse       bool
}

PositionPair stores the Start and Endposition of feature in a sequence in human friendly format rather than code format. i.e. in a Sequence "ATGTGTTG" position 1 is A and there is no position zero. To convert the format, the methods HumanFriendly() and CodeFriendly() return the positions in the corresponding formats.

func (PositionPair) CodeFriendly ¶

func (p PositionPair) CodeFriendly(ignoredirection ...bool) (start, end int)

CodeFriendly returns a sequence PositionPair's start and end positions in a code friendly format i.e. in a Sequence "ATGTGTTG" position 0 is A. If ignoredirection is used as an argument and set to true, the start position will be the first position at which the feature is encountered regardless of orientation.

func (*PositionPair) Coordinates ¶

func (p *PositionPair) Coordinates(options ...string) (start, end int)

Coordinates returns the start and end positions of the feature by default this will return the start position followed by the end position in human friendly format Availabe options are: HUMANFRIENDLY returns a sequence PositionPair's start and end positions in a human friendly format i.e. in a Sequence "ATGTGTTG" position 1 is A, 2 is T. CODEFRIENDLY returns a sequence PositionPair's start and end positions in a code friendly format i.e. in a Sequence "ATGTGTTG" position 0 is A, 1 is T. IGNOREDIRECTION is a constant to specify that direction of a feature position should be ignored when returning start and end positions of a feature. If selected, the start position will be the first position at which the feature is encountered regardless of orientation.

func (PositionPair) End ¶

func (p PositionPair) End(options ...string) int

End returns the end position of the PositionPair by default this will return a directional human friendly position

func (PositionPair) HumanFriendly ¶

func (p PositionPair) HumanFriendly(ignoredirection ...bool) (start, end int)

HumanFriendly returns a sequence PositionPair's start and end positions in a human friendly format i.e. in a Sequence "ATGTGTTG" position 1 is A and there is no position zero. If ignoredirection is used as an argument and set to true, the start position will be the first position at which the feature is encountered regardless of orientation.

func (PositionPair) Start ¶

func (p PositionPair) Start(options ...string) int

Start returns the start position of the PositionPair by default this will return a directional human friendly position

type ReplacementAlgorithm ¶

type ReplacementAlgorithm func(sequence, thingtoreplace string, otherseqstoavoid []string) (replacement string, err error)

will potentially be generalisable for codon optimisation

type SearchResult ¶

type SearchResult struct {
	Template  wtype.BioSequence
	Query     wtype.BioSequence
	Positions []PositionPair
}

SearchResult stores the results of a search query against a template sequence.

func FindAll ¶

func FindAll(bigSequence, smallSequence *wtype.DNASequence) (seqsFound SearchResult)

FindAll searches for a DNA sequence within a larger DNA sequence and returns all matches on both coding and complimentary strands.

type SimpleUsageTable ¶

type SimpleUsageTable struct {
	// Table is a mapping between the amino acid and all codon options for that amino acid.
	Table map[string][]string
}

SimpleUsageTable contains a reverse translation table mapping of amino acid to all codon options. The first codon option for a specified Amino Acid is always chosen.

func (SimpleUsageTable) ChooseCodon ¶

func (table SimpleUsageTable) ChooseCodon(aa wtype.AminoAcid) (codon wtype.Codon, err error)

ChooseCodon converts an amino acid into a codon. An error is returned if no value for the amino acid is found.

type StartEndPair ¶

type StartEndPair [2]int

func Codonfromposition ¶

func Codonfromposition(sequence string, dnaposition int) (codontoreturn string, position StartEndPair, err error)

func MakeStartendPair ¶

func MakeStartendPair(start, end int) (pair StartEndPair)

Source Files ¶

View all Source files

Directories ¶

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Path	Synopsis
Seqtools Package for processing sequencing results	Package for processing sequencing results
align Package align allows aligning Antha sequences using the biogo implementation of the Needleman-Wunsch and Smith-Waterman alignment algorithms	Package align allows aligning Antha sequences using the biogo implementation of the Needleman-Wunsch and Smith-Waterman alignment algorithms
biogo
biogo/feat Package feat provides the base for storage and manipulation of biological interval information.	Package feat provides the base for storage and manipulation of biological interval information.
biogo/io/seqio Package seqio provides interfaces for sequence I/O functions.	Package seqio provides interfaces for sequence I/O functions.
biogo/io/seqio/fasta Package fasta provides types to read and write FASTA format files.	Package fasta provides types to read and write FASTA format files.
biogo/seq Package seq provides the base for storage and manipulation of biological sequence information.	Package seq provides the base for storage and manipulation of biological sequence information.
biogo/seq/linear Package linear handles single sequences.	Package linear handles single sequences.
ncbi/blast Package blast provides support for interaction with the NCBI BLAST service.	Package blast provides support for interaction with the NCBI BLAST service.
ncbi/entrez Package entrez provides support for interaction with the NCBI Entrez Utility Programs (E-utilities).	Package entrez provides support for interaction with the NCBI Entrez Utility Programs (E-utilities).
ncbi/entrez/global
ncbi/entrez/info
ncbi/entrez/link
ncbi/entrez/search
ncbi/entrez/spell
ncbi/entrez/summary
ncbi/ncbi Package ncbi provides support for interaction with the NCBI services, Entrez and Blast.	Package ncbi provides support for interaction with the NCBI services, Entrez and Blast.
blast Package for performing blast queries	Package for performing blast queries
entrez package for querying all of NCBI databases	package for querying all of NCBI databases
kmer
oligos Package for designing oligos	Package for designing oligos
parse package parse converts DNA sequence files into a set of DNA sequences.	package parse converts DNA sequence files into a set of DNA sequences.
fasta package fasta converts DNA sequence files in FASTA format into a set of DNA sequences.	package fasta converts DNA sequence files in FASTA format into a set of DNA sequences.
gdx package gdx converts DNA sequence files in .gdx format into a set of DNA sequences.	package gdx converts DNA sequence files in .gdx format into a set of DNA sequences.
genbank package genbank converts DNA sequence files in genbank format into a set of DNA sequences.	package genbank converts DNA sequence files in genbank format into a set of DNA sequences.
plasmid Package plasmid checks for common plasmid features in a test DNA sequence.	Package plasmid checks for common plasmid features in a test DNA sequence.