bktrim

package module
v0.0.0-...-b5eef3e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 9, 2023 License: MPL-2.0 Imports: 2 Imported by: 0

README

bktrim

Fast and accurate adapter trimming for processing next-generation sequencing (NGS) paired-end sequences.

Go implementation of the bit-masked k-difference matching algorithm translated from skewer (see publication in BMC Bioinformatics).

Installation

$ go get git.sr.ht/~vejnar/bktrim

Example

package main

import (
	"fmt"
	"strings"

	"git.sr.ht/~vejnar/bktrim"
)

func main() {

	seq1 := []byte("GGGCCTCGAGAGCCACCATTCTGTAAAATTGAAGCACATTTTTCATTGTGTTTGGATCCGTCAGATCGGAAGAGCA")
	seq2 := []byte("GACGGATCCAAACACAATGAAAAATGTGCTTCAATTTTACAGAATGGTGGCTCTCGAGGCCCAGATCGGAAGAGCA")
	quality1 := []byte("IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII")
	quality2 := []byte("IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII")

	// Alignment parameters
	epsilon := 0.1
	delta := 0.15
	minOverlap := 3
	baseQual := 33

	m := bktrim.NewMatrix(epsilon, delta, minOverlap, baseQual)

	// Upstream adapter
	adapter1 := []byte("AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC")
	m.AddAdapter(adapter1, bktrim.TRIM_TAIL, 0)
	// Downstream adapter
	adapter2 := []byte("AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA")
	m.AddAdapter(adapter2, bktrim.TRIM_TAIL, 1)

	// Align
	determined, solution1, solution2 := m.FindAdapterWithPE(seq1, quality1, seq2, quality2)

	if determined {
		if m.CombinePairSeqs(seq1, quality1, seq2, quality2, solution1.Pos, solution2.Pos) {
			fmt.Println(strings.Repeat("-", solution1.Pos) + string(adapter1))
			fmt.Println(string(seq1))
			fmt.Println(strings.Repeat("-", solution2.Pos) + string(adapter2))
			fmt.Println(string(seq2))
		}
	}
}

License

bktrim is distributed under the Mozilla Public License Version 2.0 (see /LICENSE).

Copyright © 2020-2023 Charles E. Vejnar

Documentation

Index

Constants

View Source
const (
	TRIM_DEFAULT = iota
	TRIM_HEAD
	TRIM_TAIL
)
View Source
const (
	CD_NONE = iota
	CD_A    // Adenosine
	CD_C    // Cytidine
	CD_G    // Guanosine
	CD_T    // Thymidine (or Uridine)
	CD_R    // puRine, A or G
	CD_Y    // pYrimidine, T or C
	CD_S    // Strong, G or C
	CD_W    // Weak, A or T
	CD_K    // Keto, G or T
	CD_M    // aMino, A or C
	CD_B    // not A
	CD_D    // not C
	CD_H    // not G
	CD_V    // not T
	CD_N    // any base
	CD_CNT
)
View Source
const MAX_PENALTY = 4.477121255
View Source
const MEAN_PENALTY = 2.477121255
View Source
const MIN_PENALTY = 0.477121255

Variables

This section is empty.

Functions

This section is empty.

Types

type Adapter

type Adapter struct {
	Seq       []byte
	Length    int
	MatchBits [CD_CNT]uint64
	// contains filtered or unexported fields
}

func NewAdapter

func NewAdapter(seq []byte, trimMode int) *Adapter

func (*Adapter) Align

func (a *Adapter) Align(read []byte, qual []byte, result *SolutionSet, bestAlign bool, matrix *Matrix) bool

func (*Adapter) UpdateColumn

func (a *Adapter) UpdateColumn(queue *deque.Deque[*Solution], d0bits uint64, lbits uint64, unbits uint64, dnbits uint64, penal float64, maxPenalty float64, maxIndel int, matrix *Matrix) (uint64, uint64, uint64, uint64, float64, float64, int)

type Matrix

type Matrix struct {
	Epsilon       float64
	EpsilonIndel  float64
	PenaltyPerErr float64
	Delta         float64
	Mu            float64
	Penalty       [256]float64
	Sensitive     bool
	MinOverlap    int
	Adapter1      *Adapter
	Adapter2      *Adapter
}

func NewMatrix

func NewMatrix(epsilon float64, epsilonIndel float64, minOverlap int, baseQual int) *Matrix

func (*Matrix) AddAdapter

func (m *Matrix) AddAdapter(seq []byte, trimMode int, order int)

func (*Matrix) CalcRevCompScore

func (m *Matrix) CalcRevCompScore(seq1 []byte, qual1 []byte, seq2 []byte, qual2 []byte, rIdx int, qLen int) (bool, float64)

func (*Matrix) CombinePairSeqs

func (m *Matrix) CombinePairSeqs(seq1 []byte, qual1 []byte, seq2 []byte, qual2 []byte, idx1 int, idx2 int) bool

func (*Matrix) FindAdapter

func (m *Matrix) FindAdapter(seq []byte, qual []byte) (bool, Solution)

func (*Matrix) FindAdapterWithPE

func (m *Matrix) FindAdapterWithPE(seq1 []byte, qual1 []byte, seq2 []byte, qual2 []byte) (bool, Solution, Solution)

type Solution

type Solution struct {
	Score  float64
	NIndel int
	Pos    int
	// contains filtered or unexported fields
}

type SolutionSet

type SolutionSet struct {
	Solutions []Solution
}

func (*SolutionSet) Get

func (e *SolutionSet) Get(i int) Solution

func (*SolutionSet) Insert

func (e *SolutionSet) Insert(elem Solution)

func (*SolutionSet) IsEmpty

func (e *SolutionSet) IsEmpty() bool

func (*SolutionSet) Length

func (e *SolutionSet) Length() int

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL