enhlinkobject
import "gitlab.com/Grouumf/enhlinktools/enhlinkobject"
package enhlinkobject is a library to create an Enhlink Object and perform Enhlink analysis
Index
- Variables
- func AssertIfFileExists(filename, tag string)
- func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)
- func pickNGenesAtRandom(nbGenes int, geneSet map[string]uint) (newGeneSet map[string]bool)
- type EnhlinkObject
- func (eo *EnhlinkObject) AnalyseAllGenesFromGeneMat()
- func (eo *EnhlinkObject) AnalyseAllPromoters(geneSubset utils.Filename)
- func (eo *EnhlinkObject) AnalyseNGenes(geneMap map[string]bool, verbose bool)
- func (eo *EnhlinkObject) AnalyseOneGene(gene string)
- func (eo *EnhlinkObject) AnalyseRandomSubsetFromGeneMat(nSamples int)
- func (eo *EnhlinkObject) AnalyseRandomSubsetOfPromoters(geneSubsetFile utils.Filename, nSamples int)
- func (eo *EnhlinkObject) Init(mat matrix.SparseBoolMatrix, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *PromoterList, attributes TreeAttributes)
- func (eo *EnhlinkObject) analyseOneGene(gene string)
- func (eo *EnhlinkObject) blacklistAllPeaksInPromoter(targetPeaks []utils.Peak)
- func (eo *EnhlinkObject) computeOnePvalue(arr []float64, ygi uint, pvals *[]pvalPoint)
- func (eo *EnhlinkObject) computePvalues(scoreArr map[uint][]float64) (pvals []pvalPoint)
- func (eo *EnhlinkObject) computeRecursiveIGFloat(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
- func (eo *EnhlinkObject) computeRecursiveInformationGain(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
- func (eo *EnhlinkObject) computeTrees()
- func (eo *EnhlinkObject) computeTreesCovar()
- func (eo *EnhlinkObject) computeTreesOneThreads(cluster string, ygiMap map[uint]bool, scoreArr map[uint][]float64)
- func (eo *EnhlinkObject) computeTreesSim(ygiMap map[uint]bool)
- func (eo *EnhlinkObject) createYgiMapForCovar(ygiToFocus uint, validYgi, validCovar []uint) (ygiMap map[uint]bool)
- func (eo *EnhlinkObject) deferCloseFiles()
- func (eo *EnhlinkObject) defineBoolYgiVectorFromPeakMat(intervals []interval.IntInterface)
- func (eo *EnhlinkObject) defineClusterFloatYgiSum()
- func (eo *EnhlinkObject) defineClusterYgiSum()
- func (eo *EnhlinkObject) defineYgiVectorFromFloatMat(gene string) (isValid bool)
- func (eo *EnhlinkObject) defineYgiVectorFromGeneMat(gene string) (isValid bool)
- func (eo *EnhlinkObject) defineYgiVectorFromPeakMat(targetPeak utils.Peak) (isValid bool)
- func (eo *EnhlinkObject) getIGFloat(xgiArr *[]uint, ygi uint) (infGainScore float64)
- func (eo *EnhlinkObject) getInformationGain(xgiArr *[]uint, ygi uint) (infGainScore float64)
- func (eo *EnhlinkObject) initIntervals()
- func (eo *EnhlinkObject) initRandomYgiFor2ndOrder(totnbRealFeat int)
- func (eo *EnhlinkObject) initSimFloatMat()
- func (eo *EnhlinkObject) initSimMat()
- func (eo *EnhlinkObject) initSimWriter()
- func (eo *EnhlinkObject) initSurroundingEnhancersMat(peak utils.Peak)
- func (eo *EnhlinkObject) initWriters()
- func (eo *EnhlinkObject) initWritersWithHeader()
- func (eo *EnhlinkObject) initYgiVectCovar(ygi uint)
- func (eo *EnhlinkObject) initbucketCovariates()
- func (eo *EnhlinkObject) writePvals(pvals []pvalPoint, cluster string)
- func (eo *EnhlinkObject) writePvals2ndOrder(pvals []pvalPoint, cluster string, currentYgi uint)
- func (eo *EnhlinkObject) writePvalsSim(pvals []pvalPoint, cluster string)
- type LinkType
- type MaxFeaturesType
- type PromoterList
- type TreeAttributes
- type pvalPoint
Variables
VERSION version of the current software
var VERSION = "0.21.3"
linkFormats possible options for matrix format
var linkTypes = [...]LinkType{allLink, posLink, negLink}
func AssertIfFileExists(filename, tag string)
AssertIfFileExists panic if err is nil from os.Stats
func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)
MergeClosePromoterRegions merge close promoters according to cutoff
func pickNGenesAtRandom(nbGenes int, geneSet map[string]uint) (newGeneSet map[string]bool)
EnhlinkObject main enhlink object containing
type EnhlinkObject struct {
//////////////// files and matrices ////////////
// promoter file
promoterFile utils.Filename
// sparse matrix
SparseMatrix matrix.SparseBoolMatrix
// sparse matrix for gene activity
SparseMatrixGene *matrix.SparseBoolMatrix
// sparse float matrix for gene expression (substitute SparseMatrixGene )
SparseMatrixFloat *matrix.SparseFloatMatrix
// sparse matrix for covariates
SparseMatrixCovar *matrix.SparseBoolMatrix
//////////////// Internal variables //////////
// current gene under study
currentGene string
// internal promoter map that defines all the current promoter regions
// If matrix is constructed from peakMat, it is only 1 region
currentPeaks map[utils.Peak]bool
// peaks banned from beeing in the neighborhood matrix
// because they are in a current promoter region
blacklistedPeaks map[uint]bool
// features on which to perform the analysis
relevantFeatures []int
// endog response binary vector
ygiVector []int //map[xgiID]value
ygiCovVector []int //map[xgiID]value
// endog response float vector
ygiVectorFloat []float64 //map[xgiID]value
ygiCovVectorFloat []float64 //map[xgiID]value
// Sum of ygi for all cluster
ygiClusterSum map[string]float64
// Remove peaks within promoter boundaries
rmPeaksInPromoters bool
// surrounding matrix
surroundingPeaks []uint
// Number of additional random features
nbRandFeat int
// Number of features used for the model
nbFeatUsed int
// is gene matrix provided
isGeneMat bool
// is gene expression matrix provided
isFloatMat bool
// is cov matrix provided
isCovMat bool
//starting time
tStart time.Time
bucketCovariates map[string][][]uint
// valid peak and covariates before
validYgi, validCovar map[string][]uint
xgiCovMap []map[int]bool
// Internal variable to indicate wether the 2nd order inference mode is activated
isInferring2nd bool
//verbose status
verbose bool
//////////////// Simulated variables ////////
simColMat matrix.MatColHash
simYgiVector []int
simYgiVectorFloat []float64
nbSimFeat int
isSim bool
lambda1 float64 // poisson param for dropout level
lambda2 float64 // poisson param for false positive level
//////// Float matrix attributes ///////////
nonNullMean float64
//////////////// TREE attributes //////////
//treeAttributes object passed duringthe init
attributes TreeAttributes
// Number of internal threads to perform the multiple tasks
nbThreads int
// region in number of base pairs to define the surrounding enhancers
surroundingSize int
//Min matrix size
minMatSize int
// Max depth
maxDepth int
//Number of classes for ygi vector
nbClass int
// min leaf size of the tree
minLeafSize int
// number of boostrap
nbBoot int
// P-value threshold
threshold float64
// downsample the number of samples
downsample int
// Maximum number of explanatory features per bootstrap model.
maxNbFeatures int
// Maximum number of explanatory features per bootstrap model for second order models.
secondOrderMaxFeat int
//Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain
ignoreEnhancerWeight bool
// Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
keepSparse bool
// Identify the covariates associated with each inferred enhancer-promoter links
secondOrder bool
// Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
maxFeatType MaxFeaturesType
// Only perform simulation
onlySim bool
// keep only links with positive correlations
LinkType LinkType
// uniform covariate sampling for each tree
uniformSampling bool
////////// Sync objects /////////////////
guard chan bool
mutex, mutex2 sync.Mutex
waiting sync.WaitGroup
//promoter list map[gene]list<peak>
Promoters *PromoterList
// Reduced Intervals for ygi index map[chrID]interval
YgiIntervalReduced utils.PeakIntervalTreeObject
// Intervals for ygi index map[chrID]interval
YgiInterval utils.PeakIntervalTreeObject
// refined index of ygis not in promoters
ygisNotInPromoters map[string]uint
//////////////// Files objects //////////
outDir, outTag string
// map[cluster] -> file
writers, writersCov, writers2ndOrder map[string]*io.WriteCloser
// map[cluster] file name
files, filesCov, files2ndOrder map[string]string
// writer of simulated features results
writerSim *io.WriteCloser
fileSim string
}
func (eo *EnhlinkObject) AnalyseAllGenesFromGeneMat()
AnalyseAllGenesFromGeneMat analyse all genes from GeneMat
func (eo *EnhlinkObject) AnalyseAllPromoters(geneSubset utils.Filename)
AnalyseAllPromoters analyse all genes from GeneMat
func (eo *EnhlinkObject) AnalyseNGenes(geneMap map[string]bool, verbose bool)
AnalyseNGenes analysis one gene and close output files
func (eo *EnhlinkObject) AnalyseOneGene(gene string)
AnalyseOneGene analysis one gene and close output files
func (eo *EnhlinkObject) AnalyseRandomSubsetFromGeneMat(nSamples int)
AnalyseRandomSubsetFromGeneMat pick n genes at random from gene mat and analyse them
func (eo *EnhlinkObject) AnalyseRandomSubsetOfPromoters(geneSubsetFile utils.Filename, nSamples int)
AnalyseRandomSubsetOfPromoters analyse all genes from GeneMat
func (*EnhlinkObject) Init
func (eo *EnhlinkObject) Init(mat matrix.SparseBoolMatrix, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *PromoterList, attributes TreeAttributes)
Init init enhlinkObject with a sparse matrix and a promoter list
func (eo *EnhlinkObject) analyseOneGene(gene string)
analyseOneGene analysis one gene and close output files
func (eo *EnhlinkObject) blacklistAllPeaksInPromoter(targetPeaks []utils.Peak)
blacklistAllPeaksInPromoter init blacklistedPeaks with all peaks within any current prom region
func (eo *EnhlinkObject) computeOnePvalue(arr []float64, ygi uint, pvals *[]pvalPoint)
func (eo *EnhlinkObject) computePvalues(scoreArr map[uint][]float64) (pvals []pvalPoint)
func (eo *EnhlinkObject) computeRecursiveIGFloat(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
func (eo *EnhlinkObject) computeRecursiveInformationGain(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)
func (eo *EnhlinkObject) computeTrees()
computeTrees Compute tree
func (eo *EnhlinkObject) computeTreesCovar()
func (eo *EnhlinkObject) computeTreesOneThreads(cluster string, ygiMap map[uint]bool, scoreArr map[uint][]float64)
computeTreesOneThreads Compute tree for one bootstrap index
func (eo *EnhlinkObject) computeTreesSim(ygiMap map[uint]bool)
computeTreesSim Compute tree using simulated variables
func (eo *EnhlinkObject) createYgiMapForCovar(ygiToFocus uint, validYgi, validCovar []uint) (ygiMap map[uint]bool)
func (eo *EnhlinkObject) deferCloseFiles()
func (eo *EnhlinkObject) defineBoolYgiVectorFromPeakMat(intervals []interval.IntInterface)
func (eo *EnhlinkObject) defineClusterFloatYgiSum()
defineClusterFloatYgiSum define the nb of xgi
func (eo *EnhlinkObject) defineClusterYgiSum()
defineClusterYgiSum define the nb of xgi
func (eo *EnhlinkObject) defineYgiVectorFromFloatMat(gene string) (isValid bool)
defineYgiVectorFromGeneFloatMat define the endog ygi vectors using the gene float mat. return if the vector is valid
func (eo *EnhlinkObject) defineYgiVectorFromGeneMat(gene string) (isValid bool)
defineYgiVectorFromGeneMat define the endog ygi vectors using the gene mat. return if the vector is valid
func (eo *EnhlinkObject) defineYgiVectorFromPeakMat(targetPeak utils.Peak) (isValid bool)
defineYgiVectorFromPeakMat define the endog ygi vectors using the peak mat. return if the vector is valid
func (*EnhlinkObject) getIGFloat
func (eo *EnhlinkObject) getIGFloat(xgiArr *[]uint, ygi uint) (infGainScore float64)
getIGFloat return weighted Information gain for float ygi vector. Dichotomize ygi using nonNullMean and compute IG. The final score is IG x non-null ygi ratio x non-null feature ratio
func (eo *EnhlinkObject) getInformationGain(xgiArr *[]uint, ygi uint) (infGainScore float64)
getIGFloat return weighted Information gain for integer ygi vector. The final score is IG x non-null ygi ratio x non-null feature ratio
func (eo *EnhlinkObject) initIntervals()
initIntervals init (*eo).YgiInterval. If (*eo).rmPeaksInPromoters is true, remove from index ygis intersecting promoters
func (eo *EnhlinkObject) initRandomYgiFor2ndOrder(totnbRealFeat int)
func (eo *EnhlinkObject) initSimFloatMat()
func (*EnhlinkObject) initSimMat
func (eo *EnhlinkObject) initSimMat()
func (eo *EnhlinkObject) initSimWriter()
func (eo *EnhlinkObject) initSurroundingEnhancersMat(peak utils.Peak)
initSurroundingEnhancersMat
func (eo *EnhlinkObject) initWriters()
func (eo *EnhlinkObject) initWritersWithHeader()
func (eo *EnhlinkObject) initYgiVectCovar(ygi uint)
func (eo *EnhlinkObject) initbucketCovariates()
func (*EnhlinkObject) writePvals
func (eo *EnhlinkObject) writePvals(pvals []pvalPoint, cluster string)
func (eo *EnhlinkObject) writePvals2ndOrder(pvals []pvalPoint, cluster string, currentYgi uint)
func (eo *EnhlinkObject) writePvalsSim(pvals []pvalPoint, cluster string)
LinkType type of link to keep from {"all", "positive", "negative"}
type LinkType string
const (
allLink LinkType = "all"
posLink LinkType = "positive"
negLink LinkType = "negative"
)
func (LinkType) IsValid
func (t LinkType) IsValid() LinkType
IsValid is the matrix format valid
MaxFeaturesType max features type
type MaxFeaturesType struct {
mfString string
fracFeat float64
nbFeat int
}
func (mf *MaxFeaturesType) SelectFeatures(ygiMap map[uint]bool) map[uint]bool
SelectFeatures create feature map according to the strategy chosen
func (*MaxFeaturesType) Set
func (mf *MaxFeaturesType) Set(v string) error
Set set value
func (*MaxFeaturesType) String
func (mf *MaxFeaturesType) String() string
func (*MaxFeaturesType) check
func (mf *MaxFeaturesType) check()
PromoterList map[geneID] -> list of peaks
type PromoterList map[string][]utils.Peak
func LoadPromotersFile(fname utils.Filename) (plist PromoterList)
LoadPromotersFile load the promoter file
func (pl *PromoterList) Len() int
Len return length
TreeAttributes attributes for enhlink
type TreeAttributes struct {
// Number of internal threads to perform the multiple tasks
NbThreads int
// Remove peaks within promoter boundaries
RmPeaksInPromoters bool
// region in number of base pairs to define the surrounding enhancers
SurroundingSize int
//Min matrix size
MinMatSize int
// Max depth
MaxDepth int
// min leaf size of the tree
MinLeafSize int
// Number of boostraps
NBboot int
// P-value threshold
Threshold float64
// Downsample the number of samples
Downsample int
// output directory and files tag
OutDir, OutTag string
// Maximum number of explanatory features per bootstrap model.
MaxNbFeatures int
// Maximum number of explanatory features per bootstrap model for second order models.
SecondOrderMaxFeat int
// Number of simulated features to use
NbSimFeat int
// Poisson parameter to control the amount of dropouts of the simulated variables
Lambda1 float64
// Poisson parameter to control the amount of false positive of the simulated variables
Lambda2 float64
// Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
KeepSparse bool
// Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
//Which links to keep {all pos, nef}
LinkType LinkType
MaxFeatType MaxFeaturesType
// only perform simulation
OnlySim bool
//Identify the covariates associated with each inferred enhancer-promoter links
SecondOrder bool
//Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified IF
IgnoreEnhancerWeight bool
// For each tree, Randomly sample the cells to have an uniform covariate distribution
UniformSampling bool
//////////////// Arguments used only for header writing ////////////
Version string
MatAttr, GmatAttr matrix.Attributes
// mergingCutoff only used for header writting
MergingCutoff int
IsGeneExpr bool
//// Files ////
PromoterFile, Metadata utils.Filename
// verbose
Verbose bool
}
type pvalPoint struct {
pval, fdr, score float64
index uint
isValid bool
}
Generated by gomarkdoc