matrix

package module
v0.0.0-...-1020ebc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 21, 2024 License: GPL-3.0 Imports: 10 Imported by: 4

README

matrix

import "gitlab.com/Grouumf/enhlinktools/matrix"

Package matrix is a library to load sparse matrices from single-cell data and/or mtx and COO format and create efficient indexing.

Index

Constants

const (
    // Matrix format
    coo        Format = "coo"
    mtx        Format = "mtx"
    cellRanger Format = "cellRanger"
    buffsize   int    = 120000
    nbSteps    int    = 100
)

Variables

MATRIXFORMATS possible options for matrix format

var MATRIXFORMATS = [...]Format{coo, mtx, cellRanger}

func GetRandomBootstrapIndex

func GetRandomBootstrapIndex(arr []uint, downsample int) (index []uint)

GetRandomBootstrapIndex get a random index with repetition

func LoadIndexFileToIndex

func LoadIndexFileToIndex(fname utils.Filename, downcase bool, refMapping map[string]uint) (celliddict map[string]uint, maxIndex int)

LoadIndexFileToIndex create cell ID index dict. Return also max Index

func LoadPeakDictsToIndex

func LoadPeakDictsToIndex(fname utils.Filename, sepIn, sepOut string) (featiddict map[string]uint, maxIndex int)

LoadPeakDictsToIndex create cell ID index dict

func Mean

func Mean(arr []float64, size int) (mean float64)

Mean return mean of arr given total size

func Std

func Std(arr []float64, mean float64, size int) (std float64)

Std return std of arr given total size

func TestStringToPeak

func TestStringToPeak(str string) error

TestStringToPeak test if string is a valid peak

func TtestPval

func TtestPval(mean, std float64, size int) (pval float64)

TtestPval return Student test pval using T CDF

func diffMap

func diffMap(m1, m2 map[string]uint) (missing []string)

func maxUintMap

func maxUintMap(imap map[string]uint) (maxMap int)

func minIndexSliceInt

func minIndexSliceInt(slice []int, validIndexes []int) int

func minInt

func minInt(a, b int) int

func processMtxHeader

func processMtxHeader(ismtx, transpose bool, reader *bufio.Scanner, maxLengthX, maxLengthY int, xgi, ygi utils.Filename) (splitChar string)

func reverseIndex

func reverseIndex(index map[string]uint, lenIndex int) (indexC []string)

reverseIndex Internal function to reverse a map index

func reverseIndexC

func reverseIndexC(indexC []string) (index map[string]uint)

reverseIndexC Internal function to reverse an index

type Attributes

Attributes matrix attributes pasrsed during init

type Attributes struct {
    Xgi          utils.Filename
    Ygi          utils.Filename
    MatFile      utils.Filename
    XgiSubset    utils.Filename
    YgiSubset    utils.Filename
    ClustersFile utils.Filename
    MatrixFormat string
    NbThreads    int
}

type Format

Format matrix format type

type Format string
func (Format) isValid
func (t Format) isValid() Format

isValid is the matrix format valid

type MatColFloatHash

MatColFloatHash matrix column class for sparse float matrix

type MatColFloatHash struct {
    // Column matrix mat[ygi][xgi]
    matCol []map[uint]float64
    // Dense column matrix mat[ygi][xgi]
    // Index
    subIndexHash map[int]uint
    xDim, yDim   uint
}
func (*MatColFloatHash) GetRow
func (mc *MatColFloatHash) GetRow(ygi uint) map[uint]float64

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColFloatHash) Init
func (mc *MatColFloatHash) Init(matCol []map[uint]float64, xDim uint)

Init init MatColHash

type MatColHash

MatColHash matrix column class that can allocate dense submatrices

type MatColHash struct {
    // Column matrix mat[ygi][xgi]
    matCol *[]map[uint]bool
    // Dense column matrix mat[ygi][xgi]
    matColDense [][]bool
    // Index
    subIndexHash map[int]uint
    xDim         uint
    isDense      bool
}
func (*MatColHash) Get
func (mc *MatColHash) Get(ygi, xgi uint) bool

Get get matrix value

func (*MatColHash) GetCol
func (mc *MatColHash) GetCol(ygi uint) []bool

GetCol get matrix coloumn in dense bool vector

func (*MatColHash) GetDim
func (mc *MatColHash) GetDim() (xDim, yDim int)

GetDim return dimenssion

func (*MatColHash) GetIndex
func (mc *MatColHash) GetIndex(ygi uint) uint

GetIndex get index from hashed ygi

func (*MatColHash) GetRow
func (mc *MatColHash) GetRow(ygi uint) map[uint]bool

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColHash) GetRowDense
func (mc *MatColHash) GetRowDense(ygi uint) (vect []bool)

GetRowDense return row vector as a dense bool array. If matrix is not sparse, construct the vector

func (*MatColHash) Init
func (mc *MatColHash) Init(matCol *[]map[uint]bool, xDim uint)

Init init MatColHash

func (*MatColHash) InitDense
func (mc *MatColHash) InitDense(matColDense [][]bool)

InitDense init MatColHash with a dense matrix

func (*MatColHash) IsDense
func (mc *MatColHash) IsDense() bool

IsDense return if struct dense is initiated

func (*MatColHash) Len
func (mc *MatColHash) Len(ygiIndex uint) int

Len Return the number of non-zero elements of a columns

func (*MatColHash) RmDense
func (mc *MatColHash) RmDense()

RmDense remove dense matrix if any

func (*MatColHash) ToDense
func (mc *MatColHash) ToDense()

ToDense sparse to dense

func (*MatColHash) ToDenseFromSubset
func (mc *MatColHash) ToDenseFromSubset(ygis []uint) (newYgis []uint)

ToDenseFromSubset sparse to dense

func (*MatColHash) ToDenseFromSubsetAlreadyLoaded
func (mc *MatColHash) ToDenseFromSubsetAlreadyLoaded(ygis []uint) (newYgis []uint)

ToDenseFromSubsetAlreadyLoaded sparse to dense but does not recreate matColDense because already loaded (used when neighborhood == 0)

type SparseBoolMatrix

SparseBoolMatrix class

type SparseBoolMatrix struct {
    // Input files
    xgi, ygi, matFile, clustersFile utils.Filename
    xgiSubset, ygiSubset            utils.Filename
    matrixFormat                    Format

    XgiIndex, YgiIndex   []string
    XgiIndexC, YgiIndexC map[string]uint
    Clusters             map[string][]uint // cluster key -> list of cell IDs
    Xdim, Ydim           int               // Dimension of the matrixyDim int // Dimension of the matrix

    MatCol     MatColHash // mat.Get(posy, posx)
    RandMatCol MatColHash // mat[posy][posx] with random posx from

    matCol, matColT []map[uint]bool // Original matCol value and passed as reference to MatCol. MatcolT is the transpose

    ////////  Sync utils ////////
    nbThreads int
    waiting   sync.WaitGroup
    guard     chan int
    mutex     sync.Mutex
}
func (*SparseBoolMatrix) CreateRandMat
func (sbm *SparseBoolMatrix) CreateRandMat(nbFeat int, refFeats []uint)

CreateRandMat Create a random matrix of size nbFeat x len(XgiIndex)

func (*SparseBoolMatrix) GetMatColT
func (sbm *SparseBoolMatrix) GetMatColT() []map[uint]bool

GetMatColT Get MatColT

func (*SparseBoolMatrix) GetUniformSampling
func (sbm *SparseBoolMatrix) GetUniformSampling(downsample, totXgi int, matColBucket [][]uint) (xgiIndex []uint)

GetUniformSampling get a uniform sampling of the xgi indexes according to the ygi

func (*SparseBoolMatrix) Init
func (sbm *SparseBoolMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseBoolMatrix) Init2
func (sbm *SparseBoolMatrix) Init2(attributes Attributes)

Init2 Init dedicated to the gene matrix without loading the cluster file. The ygi index is not regarded as peak region and is loaded with LoadIndexFileToIndex

func (*SparseBoolMatrix) InitMeta
func (sbm *SparseBoolMatrix) InitMeta(xgiMap map[string]uint, attributes Attributes, skipFirst bool)

InitMeta init Metadata matrix, drop first binary attributes

func (*SparseBoolMatrix) InitTranspose
func (sbm *SparseBoolMatrix) InitTranspose()

InitTranspose create a transpose matrix of matCol and instantiate matColBucket

func (*SparseBoolMatrix) LoadClustersFile
func (sbm *SparseBoolMatrix) LoadClustersFile()

LoadClustersFile load cluster file for sparse matrix

func (*SparseBoolMatrix) LoadMatrix
func (sbm *SparseBoolMatrix) LoadMatrix()

LoadMatrix load matrix

func (*SparseBoolMatrix) LoadMatrix2
func (sbm *SparseBoolMatrix) LoadMatrix2(xgiMap, ygiMap map[string]uint)

LoadMatrix2 load matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

func (*SparseBoolMatrix) initThreading
func (sbm *SparseBoolMatrix) initThreading(attributes Attributes)
func (*SparseBoolMatrix) loadMatrixCoo
func (sbm *SparseBoolMatrix) loadMatrixCoo(matFormat Format, xgiSubset, ygiSubset map[string]uint)

loadMatrixCoo load function with either MTX header or not. if xgiSubset is provided, replace xgi index by the index present in xgiSubset

func (*SparseBoolMatrix) loadMatrixCooOneTh
func (sbm *SparseBoolMatrix) loadMatrixCooOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]bool, splitChar string, transpose, delOne bool)
func (*SparseBoolMatrix) loadMetaMatrix
func (sbm *SparseBoolMatrix) loadMetaMatrix(xgiMap map[string]uint, skipFirst bool)

Load meta file with a header and in dense tsv format. If skipFirst, he first value of each field is skipped to avoid singluar matrix

type SparseFloatMatrix

SparseFloatMatrix class

type SparseFloatMatrix struct {
    // Input files
    xgi, ygi, matFile    utils.Filename
    xgiSubset, ygiSubset utils.Filename
    matrixFormat         Format

    XgiIndex, YgiIndex   []string
    XgiIndexC, YgiIndexC map[string]uint
    Xdim, Ydim           int // Dimension of the matrixyDim int // Dimension of the matrix

    MatCol MatColFloatHash // mat.Get(posy, posx)

    ////////  Sync utils ////////
    nbThreads int
    waiting   sync.WaitGroup
    guard     chan int
    mutex     sync.Mutex
}
func (*SparseFloatMatrix) Init
func (sfm *SparseFloatMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseFloatMatrix) LoadMatrix
func (sfm *SparseFloatMatrix) LoadMatrix(xgiMap, ygiMap map[string]uint)

LoadMatrix load float matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

func (*SparseFloatMatrix) initThreading
func (sfm *SparseFloatMatrix) initThreading(attributes Attributes)
func (*SparseFloatMatrix) loadMatrixFloat
func (sfm *SparseFloatMatrix) loadMatrixFloat(xgiSubset, ygiSubset map[string]uint)

loadMatrixFloat load function with either MTX header or not. if xgiSubset is provided, replace xgi index by the index present in xgiSubset

func (*SparseFloatMatrix) loadMatrixFloatOneTh
func (sfm *SparseFloatMatrix) loadMatrixFloatOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]float64, splitChar string, transpose, delOne bool)

Generated by gomarkdoc

Documentation

Overview

Package matrix is a library to load sparse matrices from single-cell data and/or mtx and COO format and create efficient indexing.

Index

Constants

This section is empty.

Variables

View Source
var MATRIXFORMATS = [...]Format{coo, mtx, cellRanger}

MATRIXFORMATS possible options for matrix format

Functions

func GetRandomBootstrapIndex

func GetRandomBootstrapIndex(arr []uint, downsample int) (index []uint)

GetRandomBootstrapIndex get a random index with repetition

func LoadIndexFileToIndex

func LoadIndexFileToIndex(fname utils.Filename, downcase bool, refMapping map[string]uint) (celliddict map[string]uint, maxIndex int)

LoadIndexFileToIndex create cell ID index dict. Return also max Index

func LoadPeakDictsToIndex

func LoadPeakDictsToIndex(fname utils.Filename, sepIn, sepOut string) (featiddict map[string]uint, maxIndex int)

LoadPeakDictsToIndex create cell ID index dict

func Mean

func Mean(arr []float64, size int) (mean float64)

Mean return mean of arr given total size

func Std

func Std(arr []float64, mean float64, size int) (std float64)

Std return std of arr given total size

func TestStringToPeak

func TestStringToPeak(str string) error

TestStringToPeak test if string is a valid peak

func TtestPval

func TtestPval(mean, std float64, size int) (pval float64)

TtestPval return Student test pval using T CDF

Types

type Attributes

type Attributes struct {
	Xgi          utils.Filename
	Ygi          utils.Filename
	MatFile      utils.Filename
	XgiSubset    utils.Filename
	YgiSubset    utils.Filename
	ClustersFile utils.Filename
	MatrixFormat string
	NbThreads    int
}

Attributes matrix attributes pasrsed during init

type Format

type Format string

Format matrix format type

type MatColFloatHash

type MatColFloatHash struct {
	// contains filtered or unexported fields
}

MatColFloatHash matrix column class for sparse float matrix

func (*MatColFloatHash) GetRow

func (mc *MatColFloatHash) GetRow(ygi uint) map[uint]float64

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColFloatHash) Init

func (mc *MatColFloatHash) Init(matCol []map[uint]float64, xDim uint)

Init init MatColHash

type MatColHash

type MatColHash struct {
	// contains filtered or unexported fields
}

MatColHash matrix column class that can allocate dense submatrices

func (*MatColHash) Get

func (mc *MatColHash) Get(ygi, xgi uint) bool

Get get matrix value

func (*MatColHash) GetCol

func (mc *MatColHash) GetCol(ygi uint) []bool

GetCol get matrix coloumn in dense bool vector

func (*MatColHash) GetDim

func (mc *MatColHash) GetDim() (xDim, yDim int)

GetDim return dimenssion

func (*MatColHash) GetIndex

func (mc *MatColHash) GetIndex(ygi uint) uint

GetIndex get index from hashed ygi

func (*MatColHash) GetRow

func (mc *MatColHash) GetRow(ygi uint) map[uint]bool

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColHash) GetRowDense

func (mc *MatColHash) GetRowDense(ygi uint) (vect []bool)

GetRowDense return row vector as a dense bool array. If matrix is not sparse, construct the vector

func (*MatColHash) Init

func (mc *MatColHash) Init(matCol *[]map[uint]bool, xDim uint)

Init init MatColHash

func (*MatColHash) InitDense

func (mc *MatColHash) InitDense(matColDense [][]bool)

InitDense init MatColHash with a dense matrix

func (*MatColHash) IsDense

func (mc *MatColHash) IsDense() bool

IsDense return if struct dense is initiated

func (*MatColHash) Len

func (mc *MatColHash) Len(ygiIndex uint) int

Len Return the number of non-zero elements of a columns

func (*MatColHash) RmDense

func (mc *MatColHash) RmDense()

RmDense remove dense matrix if any

func (*MatColHash) ToDense

func (mc *MatColHash) ToDense()

ToDense sparse to dense

func (*MatColHash) ToDenseFromSubset

func (mc *MatColHash) ToDenseFromSubset(ygis []uint) (newYgis []uint)

ToDenseFromSubset sparse to dense

func (*MatColHash) ToDenseFromSubsetAlreadyLoaded

func (mc *MatColHash) ToDenseFromSubsetAlreadyLoaded(ygis []uint) (newYgis []uint)

ToDenseFromSubsetAlreadyLoaded sparse to dense but does not recreate matColDense because already loaded (used when neighborhood == 0)

type SparseBoolMatrix

type SparseBoolMatrix struct {
	XgiIndex, YgiIndex   []string
	XgiIndexC, YgiIndexC map[string]uint
	Clusters             map[string][]uint // cluster key -> list of cell IDs
	Xdim, Ydim           int               // Dimension of the matrixyDim int // Dimension of the matrix

	MatCol     MatColHash // mat.Get(posy, posx)
	RandMatCol MatColHash // mat[posy][posx] with random posx from
	// contains filtered or unexported fields
}

SparseBoolMatrix class

func (*SparseBoolMatrix) CreateRandMat

func (sbm *SparseBoolMatrix) CreateRandMat(nbFeat int, refFeats []uint)

CreateRandMat Create a random matrix of size nbFeat x len(XgiIndex)

func (*SparseBoolMatrix) GetMatColT

func (sbm *SparseBoolMatrix) GetMatColT() []map[uint]bool

GetMatColT Get MatColT

func (*SparseBoolMatrix) GetUniformSampling

func (sbm *SparseBoolMatrix) GetUniformSampling(downsample, totXgi int, matColBucket [][]uint) (xgiIndex []uint)

GetUniformSampling get a uniform sampling of the xgi indexes according to the ygi

func (*SparseBoolMatrix) Init

func (sbm *SparseBoolMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseBoolMatrix) Init2

func (sbm *SparseBoolMatrix) Init2(attributes Attributes)

Init2 Init dedicated to the gene matrix without loading the cluster file. The ygi index is not regarded as peak region and is loaded with LoadIndexFileToIndex

func (*SparseBoolMatrix) InitMeta

func (sbm *SparseBoolMatrix) InitMeta(xgiMap map[string]uint, attributes Attributes, skipFirst bool)

InitMeta init Metadata matrix, drop first binary attributes

func (*SparseBoolMatrix) InitTranspose

func (sbm *SparseBoolMatrix) InitTranspose()

InitTranspose create a transpose matrix of matCol and instantiate matColBucket

func (*SparseBoolMatrix) LoadClustersFile

func (sbm *SparseBoolMatrix) LoadClustersFile()

LoadClustersFile load cluster file for sparse matrix

func (*SparseBoolMatrix) LoadMatrix

func (sbm *SparseBoolMatrix) LoadMatrix()

LoadMatrix load matrix

func (*SparseBoolMatrix) LoadMatrix2

func (sbm *SparseBoolMatrix) LoadMatrix2(xgiMap, ygiMap map[string]uint)

LoadMatrix2 load matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

type SparseFloatMatrix

type SparseFloatMatrix struct {
	XgiIndex, YgiIndex   []string
	XgiIndexC, YgiIndexC map[string]uint
	Xdim, Ydim           int // Dimension of the matrixyDim int // Dimension of the matrix

	MatCol MatColFloatHash // mat.Get(posy, posx)
	// contains filtered or unexported fields
}

SparseFloatMatrix class

func (*SparseFloatMatrix) Init

func (sfm *SparseFloatMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseFloatMatrix) LoadMatrix

func (sfm *SparseFloatMatrix) LoadMatrix(xgiMap, ygiMap map[string]uint)

LoadMatrix load float matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL