enhlinkobject

package module
v0.0.0-...-1020ebc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 21, 2024 License: GPL-3.0 Imports: 16 Imported by: 3

README

enhlinkobject

import "gitlab.com/Grouumf/enhlinktools/enhlinkobject"

package enhlinkobject is a library to create an Enhlink Object and perform Enhlink analysis

Index

Variables

VERSION version of the current software

var VERSION = "0.21.3"

linkFormats possible options for matrix format

var linkTypes = [...]LinkType{allLink, posLink, negLink}

func AssertIfFileExists

func AssertIfFileExists(filename, tag string)

AssertIfFileExists panic if err is nil from os.Stats

func MergeClosePromoterRegions

func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)

MergeClosePromoterRegions merge close promoters according to cutoff

func pickNGenesAtRandom

func pickNGenesAtRandom(nbGenes int, geneSet map[string]uint) (newGeneSet map[string]bool)

type EnhlinkObject

EnhlinkObject main enhlink object containing

type EnhlinkObject struct {

    //////////////// files and matrices ////////////
    // promoter file
    promoterFile utils.Filename
    // sparse matrix
    SparseMatrix matrix.SparseBoolMatrix
    // sparse matrix for gene activity
    SparseMatrixGene *matrix.SparseBoolMatrix
    // sparse float matrix for gene expression (substitute SparseMatrixGene )
    SparseMatrixFloat *matrix.SparseFloatMatrix
    // sparse matrix for covariates
    SparseMatrixCovar *matrix.SparseBoolMatrix

    //////////////// Internal variables //////////
    // current gene under study
    currentGene string
    // internal promoter map that defines all the current promoter regions
    // If matrix is constructed from peakMat, it is only 1 region
    currentPeaks map[utils.Peak]bool
    // peaks banned from beeing in the neighborhood matrix
    // because they are in a current promoter region
    blacklistedPeaks map[uint]bool

    // features on which to perform the analysis
    relevantFeatures []int
    // endog response binary vector
    ygiVector    []int //map[xgiID]value
    ygiCovVector []int //map[xgiID]value
    // endog response float vector
    ygiVectorFloat    []float64 //map[xgiID]value
    ygiCovVectorFloat []float64 //map[xgiID]value
    // Sum of ygi for all cluster
    ygiClusterSum map[string]float64
    // Remove peaks within promoter boundaries
    rmPeaksInPromoters bool

    // surrounding matrix
    surroundingPeaks []uint
    // Number of additional random features
    nbRandFeat int
    // Number of features used for the model
    nbFeatUsed int
    // is gene matrix provided
    isGeneMat bool
    // is gene expression matrix provided
    isFloatMat bool
    // is cov matrix provided
    isCovMat bool
    //starting time
    tStart time.Time

    bucketCovariates map[string][][]uint

    // valid peak and covariates before
    validYgi, validCovar map[string][]uint

    xgiCovMap []map[int]bool

    // Internal variable to indicate wether the 2nd order inference mode is activated
    isInferring2nd bool

    //verbose status
    verbose bool

    //////////////// Simulated variables ////////
    simColMat         matrix.MatColHash
    simYgiVector      []int
    simYgiVectorFloat []float64
    nbSimFeat         int
    isSim             bool
    lambda1           float64 // poisson param for dropout level
    lambda2           float64 // poisson param for false positive level

    //////// Float matrix attributes ///////////
    nonNullMean float64

    //////////////// TREE attributes //////////
    //treeAttributes object passed duringthe init
    attributes TreeAttributes
    // Number of internal threads to perform the multiple tasks
    nbThreads int
    // region in number of base pairs to define the surrounding enhancers
    surroundingSize int
    //Min matrix size
    minMatSize int
    // Max depth
    maxDepth int
    //Number of classes for ygi vector
    nbClass int
    // min leaf size of the tree
    minLeafSize int
    // number of boostrap
    nbBoot int
    // P-value threshold
    threshold float64
    // downsample the number of samples
    downsample int
    // Maximum number of explanatory features per bootstrap model.
    maxNbFeatures int
    // Maximum number of explanatory features per bootstrap model for second order models.
    secondOrderMaxFeat int
    //Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain
    ignoreEnhancerWeight bool
    // Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
    keepSparse bool
    // Identify the covariates associated with each inferred enhancer-promoter links
    secondOrder bool
    // Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
    maxFeatType MaxFeaturesType
    // Only perform simulation
    onlySim bool
    // keep only links with positive correlations
    LinkType LinkType
    // uniform covariate sampling for each tree
    uniformSampling bool

    ////////// Sync objects /////////////////
    guard         chan bool
    mutex, mutex2 sync.Mutex
    waiting       sync.WaitGroup

    //promoter list map[gene]list<peak>
    Promoters *PromoterList
    // Reduced Intervals for ygi index map[chrID]interval
    YgiIntervalReduced utils.PeakIntervalTreeObject
    // Intervals for ygi index map[chrID]interval
    YgiInterval utils.PeakIntervalTreeObject
    // refined index of ygis not in promoters
    ygisNotInPromoters map[string]uint

    //////////////// Files objects //////////
    outDir, outTag string
    // map[cluster] -> file
    writers, writersCov, writers2ndOrder map[string]*io.WriteCloser
    // map[cluster] file name
    files, filesCov, files2ndOrder map[string]string
    // writer of simulated features results
    writerSim *io.WriteCloser
    fileSim   string
}
func (*EnhlinkObject) AnalyseAllGenesFromGeneMat
func (eo *EnhlinkObject) AnalyseAllGenesFromGeneMat()

AnalyseAllGenesFromGeneMat analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseAllPromoters
func (eo *EnhlinkObject) AnalyseAllPromoters(geneSubset utils.Filename)

AnalyseAllPromoters analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseNGenes
func (eo *EnhlinkObject) AnalyseNGenes(geneMap map[string]bool, verbose bool)

AnalyseNGenes analysis one gene and close output files

func (*EnhlinkObject) AnalyseOneGene
func (eo *EnhlinkObject) AnalyseOneGene(gene string)

AnalyseOneGene analysis one gene and close output files

func (*EnhlinkObject) AnalyseRandomSubsetFromGeneMat
func (eo *EnhlinkObject) AnalyseRandomSubsetFromGeneMat(nSamples int)

AnalyseRandomSubsetFromGeneMat pick n genes at random from gene mat and analyse them

func (*EnhlinkObject) AnalyseRandomSubsetOfPromoters
func (eo *EnhlinkObject) AnalyseRandomSubsetOfPromoters(geneSubsetFile utils.Filename, nSamples int)

AnalyseRandomSubsetOfPromoters analyse all genes from GeneMat

func (*EnhlinkObject) Init
func (eo *EnhlinkObject) Init(mat matrix.SparseBoolMatrix, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *PromoterList, attributes TreeAttributes)

Init init enhlinkObject with a sparse matrix and a promoter list

func (*EnhlinkObject) analyseOneGene
func (eo *EnhlinkObject) analyseOneGene(gene string)

analyseOneGene analysis one gene and close output files

func (*EnhlinkObject) blacklistAllPeaksInPromoter
func (eo *EnhlinkObject) blacklistAllPeaksInPromoter(targetPeaks []utils.Peak)

blacklistAllPeaksInPromoter init blacklistedPeaks with all peaks within any current prom region

func (*EnhlinkObject) computeOnePvalue
func (eo *EnhlinkObject) computeOnePvalue(arr []float64, ygi uint, pvals *[]pvalPoint)
func (*EnhlinkObject) computePvalues
func (eo *EnhlinkObject) computePvalues(scoreArr map[uint][]float64) (pvals []pvalPoint)
func (*EnhlinkObject) computeRecursiveIGFloat
func (eo *EnhlinkObject) computeRecursiveIGFloat(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
func (*EnhlinkObject) computeRecursiveInformationGain
func (eo *EnhlinkObject) computeRecursiveInformationGain(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
func (*EnhlinkObject) computeTrees
func (eo *EnhlinkObject) computeTrees()

computeTrees Compute tree

func (*EnhlinkObject) computeTreesCovar
func (eo *EnhlinkObject) computeTreesCovar()
func (*EnhlinkObject) computeTreesOneThreads
func (eo *EnhlinkObject) computeTreesOneThreads(cluster string, ygiMap map[uint]bool, scoreArr map[uint][]float64)

computeTreesOneThreads Compute tree for one bootstrap index

func (*EnhlinkObject) computeTreesSim
func (eo *EnhlinkObject) computeTreesSim(ygiMap map[uint]bool)

computeTreesSim Compute tree using simulated variables

func (*EnhlinkObject) createYgiMapForCovar
func (eo *EnhlinkObject) createYgiMapForCovar(ygiToFocus uint, validYgi, validCovar []uint) (ygiMap map[uint]bool)
func (*EnhlinkObject) deferCloseFiles
func (eo *EnhlinkObject) deferCloseFiles()
func (*EnhlinkObject) defineBoolYgiVectorFromPeakMat
func (eo *EnhlinkObject) defineBoolYgiVectorFromPeakMat(intervals []interval.IntInterface)
func (*EnhlinkObject) defineClusterFloatYgiSum
func (eo *EnhlinkObject) defineClusterFloatYgiSum()

defineClusterFloatYgiSum define the nb of xgi

func (*EnhlinkObject) defineClusterYgiSum
func (eo *EnhlinkObject) defineClusterYgiSum()

defineClusterYgiSum define the nb of xgi

func (*EnhlinkObject) defineYgiVectorFromFloatMat
func (eo *EnhlinkObject) defineYgiVectorFromFloatMat(gene string) (isValid bool)

defineYgiVectorFromGeneFloatMat define the endog ygi vectors using the gene float mat. return if the vector is valid

func (*EnhlinkObject) defineYgiVectorFromGeneMat
func (eo *EnhlinkObject) defineYgiVectorFromGeneMat(gene string) (isValid bool)

defineYgiVectorFromGeneMat define the endog ygi vectors using the gene mat. return if the vector is valid

func (*EnhlinkObject) defineYgiVectorFromPeakMat
func (eo *EnhlinkObject) defineYgiVectorFromPeakMat(targetPeak utils.Peak) (isValid bool)

defineYgiVectorFromPeakMat define the endog ygi vectors using the peak mat. return if the vector is valid

func (*EnhlinkObject) getIGFloat
func (eo *EnhlinkObject) getIGFloat(xgiArr *[]uint, ygi uint) (infGainScore float64)

getIGFloat return weighted Information gain for float ygi vector. Dichotomize ygi using nonNullMean and compute IG. The final score is IG x non-null ygi ratio x non-null feature ratio

func (*EnhlinkObject) getInformationGain
func (eo *EnhlinkObject) getInformationGain(xgiArr *[]uint, ygi uint) (infGainScore float64)

getIGFloat return weighted Information gain for integer ygi vector. The final score is IG x non-null ygi ratio x non-null feature ratio

func (*EnhlinkObject) initIntervals
func (eo *EnhlinkObject) initIntervals()

initIntervals init (*eo).YgiInterval. If (*eo).rmPeaksInPromoters is true, remove from index ygis intersecting promoters

func (*EnhlinkObject) initRandomYgiFor2ndOrder
func (eo *EnhlinkObject) initRandomYgiFor2ndOrder(totnbRealFeat int)
func (*EnhlinkObject) initSimFloatMat
func (eo *EnhlinkObject) initSimFloatMat()
func (*EnhlinkObject) initSimMat
func (eo *EnhlinkObject) initSimMat()
func (*EnhlinkObject) initSimWriter
func (eo *EnhlinkObject) initSimWriter()
func (*EnhlinkObject) initSurroundingEnhancersMat
func (eo *EnhlinkObject) initSurroundingEnhancersMat(peak utils.Peak)

initSurroundingEnhancersMat

func (*EnhlinkObject) initWriters
func (eo *EnhlinkObject) initWriters()
func (*EnhlinkObject) initWritersWithHeader
func (eo *EnhlinkObject) initWritersWithHeader()
func (*EnhlinkObject) initYgiVectCovar
func (eo *EnhlinkObject) initYgiVectCovar(ygi uint)
func (*EnhlinkObject) initbucketCovariates
func (eo *EnhlinkObject) initbucketCovariates()
func (*EnhlinkObject) writePvals
func (eo *EnhlinkObject) writePvals(pvals []pvalPoint, cluster string)
func (*EnhlinkObject) writePvals2ndOrder
func (eo *EnhlinkObject) writePvals2ndOrder(pvals []pvalPoint, cluster string, currentYgi uint)
func (*EnhlinkObject) writePvalsSim
func (eo *EnhlinkObject) writePvalsSim(pvals []pvalPoint, cluster string)

type LinkType

LinkType type of link to keep from {"all", "positive", "negative"}

type LinkType string
const (
    allLink LinkType = "all"
    posLink LinkType = "positive"
    negLink LinkType = "negative"
)
func (LinkType) IsValid
func (t LinkType) IsValid() LinkType

IsValid is the matrix format valid

type MaxFeaturesType

MaxFeaturesType max features type

type MaxFeaturesType struct {
    mfString string
    fracFeat float64
    nbFeat   int
}
func (*MaxFeaturesType) SelectFeatures
func (mf *MaxFeaturesType) SelectFeatures(ygiMap map[uint]bool) map[uint]bool

SelectFeatures create feature map according to the strategy chosen

func (*MaxFeaturesType) Set
func (mf *MaxFeaturesType) Set(v string) error

Set set value

func (*MaxFeaturesType) String
func (mf *MaxFeaturesType) String() string
func (*MaxFeaturesType) check
func (mf *MaxFeaturesType) check()

type PromoterList

PromoterList map[geneID] -> list of peaks

type PromoterList map[string][]utils.Peak
func LoadPromotersFile
func LoadPromotersFile(fname utils.Filename) (plist PromoterList)

LoadPromotersFile load the promoter file

func (*PromoterList) Len
func (pl *PromoterList) Len() int

Len return length

type TreeAttributes

TreeAttributes attributes for enhlink

type TreeAttributes struct {
    // Number of internal threads to perform the multiple tasks
    NbThreads int
    // Remove peaks within promoter boundaries
    RmPeaksInPromoters bool
    // region in number of base pairs to define the surrounding enhancers
    SurroundingSize int
    //Min matrix size
    MinMatSize int
    // Max depth
    MaxDepth int
    // min leaf size of the tree
    MinLeafSize int
    // Number of boostraps
    NBboot int
    // P-value threshold
    Threshold float64
    // Downsample the number of samples
    Downsample int
    // output directory and files tag
    OutDir, OutTag string
    // Maximum number of explanatory features per bootstrap model.
    MaxNbFeatures int
    // Maximum number of explanatory features per bootstrap model for second order models.
    SecondOrderMaxFeat int
    // Number of simulated features to use
    NbSimFeat int
    // Poisson parameter to control the amount of dropouts of the simulated variables
    Lambda1 float64
    // Poisson parameter to control the amount of false positive of the simulated variables
    Lambda2 float64
    // Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
    KeepSparse bool
    // Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
    //Which links to keep {all pos, nef}
    LinkType    LinkType
    MaxFeatType MaxFeaturesType
    // only perform simulation
    OnlySim bool
    //Identify the covariates associated with each inferred enhancer-promoter links
    SecondOrder bool
    //Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified IF
    IgnoreEnhancerWeight bool
    // For each tree, Randomly sample the cells to have an uniform covariate distribution
    UniformSampling bool
    //////////////// Arguments used only for header writing ////////////
    Version           string
    MatAttr, GmatAttr matrix.Attributes
    // mergingCutoff only used for header writting
    MergingCutoff int
    IsGeneExpr    bool
    //// Files ////
    PromoterFile, Metadata utils.Filename

    // verbose
    Verbose bool
}

type pvalPoint

type pvalPoint struct {
    pval, fdr, score float64
    index            uint
    isValid          bool
}

Generated by gomarkdoc

Documentation

Overview

package enhlinkobject is a library to create an Enhlink Object and perform Enhlink analysis

Index

Constants

This section is empty.

Variables

View Source
var VERSION = "0.21.3"

VERSION version of the current software

Functions

func AssertIfFileExists

func AssertIfFileExists(filename, tag string)

AssertIfFileExists panic if err is nil from os.Stats

func MergeClosePromoterRegions

func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)

MergeClosePromoterRegions merge close promoters according to cutoff

Types

type EnhlinkObject

type EnhlinkObject struct {

	// sparse matrix
	SparseMatrix matrix.SparseBoolMatrix
	// sparse matrix for gene activity
	SparseMatrixGene *matrix.SparseBoolMatrix
	// sparse float matrix for gene expression (substitute SparseMatrixGene )
	SparseMatrixFloat *matrix.SparseFloatMatrix
	// sparse matrix for covariates
	SparseMatrixCovar *matrix.SparseBoolMatrix

	// keep only links with positive correlations
	LinkType LinkType

	//promoter list map[gene]list<peak>
	Promoters *PromoterList
	// Reduced Intervals for ygi index map[chrID]interval
	YgiIntervalReduced utils.PeakIntervalTreeObject
	// Intervals for ygi index map[chrID]interval
	YgiInterval utils.PeakIntervalTreeObject
	// contains filtered or unexported fields
}

EnhlinkObject main enhlink object containing

func (*EnhlinkObject) AnalyseAllGenesFromGeneMat

func (eo *EnhlinkObject) AnalyseAllGenesFromGeneMat()

AnalyseAllGenesFromGeneMat analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseAllPromoters

func (eo *EnhlinkObject) AnalyseAllPromoters(geneSubset utils.Filename)

AnalyseAllPromoters analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseNGenes

func (eo *EnhlinkObject) AnalyseNGenes(geneMap map[string]bool, verbose bool)

AnalyseNGenes analysis one gene and close output files

func (*EnhlinkObject) AnalyseOneGene

func (eo *EnhlinkObject) AnalyseOneGene(gene string)

AnalyseOneGene analysis one gene and close output files

func (*EnhlinkObject) AnalyseRandomSubsetFromGeneMat

func (eo *EnhlinkObject) AnalyseRandomSubsetFromGeneMat(nSamples int)

AnalyseRandomSubsetFromGeneMat pick n genes at random from gene mat and analyse them

func (*EnhlinkObject) AnalyseRandomSubsetOfPromoters

func (eo *EnhlinkObject) AnalyseRandomSubsetOfPromoters(geneSubsetFile utils.Filename, nSamples int)

AnalyseRandomSubsetOfPromoters analyse all genes from GeneMat

func (*EnhlinkObject) Init

func (eo *EnhlinkObject) Init(mat matrix.SparseBoolMatrix, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *PromoterList, attributes TreeAttributes)

Init init enhlinkObject with a sparse matrix and a promoter list

type LinkType

type LinkType string

LinkType type of link to keep from {"all", "positive", "negative"}

func (LinkType) IsValid

func (t LinkType) IsValid() LinkType

IsValid is the matrix format valid

type MaxFeaturesType

type MaxFeaturesType struct {
	// contains filtered or unexported fields
}

MaxFeaturesType max features type

func (*MaxFeaturesType) SelectFeatures

func (mf *MaxFeaturesType) SelectFeatures(ygiMap map[uint]bool) map[uint]bool

SelectFeatures create feature map according to the strategy chosen

func (*MaxFeaturesType) Set

func (mf *MaxFeaturesType) Set(v string) error

Set set value

func (*MaxFeaturesType) String

func (mf *MaxFeaturesType) String() string

type PromoterList

type PromoterList map[string][]utils.Peak

PromoterList map[geneID] -> list of peaks

func LoadPromotersFile

func LoadPromotersFile(fname utils.Filename) (plist PromoterList)

LoadPromotersFile load the promoter file

func (*PromoterList) Len

func (pl *PromoterList) Len() int

Len return length

type TreeAttributes

type TreeAttributes struct {
	// Number of internal threads to perform the multiple tasks
	NbThreads int
	// Remove peaks within promoter boundaries
	RmPeaksInPromoters bool
	// region in number of base pairs to define the surrounding enhancers
	SurroundingSize int
	//Min matrix size
	MinMatSize int
	// Max depth
	MaxDepth int
	// min leaf size of the tree
	MinLeafSize int
	// Number of boostraps
	NBboot int
	// P-value threshold
	Threshold float64
	// Downsample the number of samples
	Downsample int
	// output directory and files tag
	OutDir, OutTag string
	// Maximum number of explanatory features per bootstrap model.
	MaxNbFeatures int
	// Maximum number of explanatory features per bootstrap model for second order models.
	SecondOrderMaxFeat int
	// Number of simulated features to use
	NbSimFeat int
	// Poisson parameter to control the amount of dropouts of the simulated variables
	Lambda1 float64
	// Poisson parameter to control the amount of false positive of the simulated variables
	Lambda2 float64
	// Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
	KeepSparse bool
	// Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
	//Which links to keep {all pos, nef}
	LinkType    LinkType
	MaxFeatType MaxFeaturesType
	// only perform simulation
	OnlySim bool
	//Identify the covariates associated with each inferred enhancer-promoter links
	SecondOrder bool
	//Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified IF
	IgnoreEnhancerWeight bool
	// For each tree, Randomly sample the cells to have an uniform covariate distribution
	UniformSampling bool
	//////////////// Arguments used only for header writing ////////////
	Version           string
	MatAttr, GmatAttr matrix.Attributes
	// mergingCutoff only used for header writting
	MergingCutoff int
	IsGeneExpr    bool
	//// Files ////
	PromoterFile, Metadata utils.Filename

	// verbose
	Verbose bool
}

TreeAttributes attributes for enhlink

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL