Documentation ¶
Overview ¶
Package formulation provides a library for automatically formulating queries.
Index ¶
- Constants
- Variables
- func EntityExpansion(query cqr.CommonQueryRepresentation, expander EntityExpander) (cqr.CommonQueryRepresentation, error)
- func Entropy(positive, negative float64) float64
- func FilterQueryTerms(conditions, treatments, studyTypes []string, field string, ...) ([]string, []string, []string, error)
- func ID3(training map[string][]string, labels map[string]bool, attrs []string) *tree
- func InformationGain(attr string, training map[string][]string, labels map[string]bool) (float64, []string, []string)
- func MakeQrels(docs []guru.MedlineDocument, topic string) trecresults.Qrels
- func MapKeywords(r cqr.CommonQueryRepresentation, mapper KeywordMapper) (cqr.CommonQueryRepresentation, error)
- func RelevanceFeedback(query cqr.CommonQueryRepresentation, docs guru.MedlineDocuments, ...) (cqr.CommonQueryRepresentation, error)
- type BackgroundCollection
- type ConceptualFormulator
- type Cui2VecEntityExpander
- type Cui2VecRPCEntityExpander
- type DecisionTreeFormulator
- type EntityExpander
- type EntityExtractor
- type Formulator
- type KeywordMapper
- type LogicComposer
- type MetaMapEntityExtractor
- type MetaMapKeywordMapper
- type MetaMapMapper
- type NLPLogicComposer
- type ObjectiveFormulator
- func (o ObjectiveFormulator) Derive() (cqr.CommonQueryRepresentation, cqr.CommonQueryRepresentation, ...)
- func (o ObjectiveFormulator) Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error)
- func (o ObjectiveFormulator) Method() string
- func (o ObjectiveFormulator) Topic() string
- type ObjectiveOption
- func ObjectiveAnalyser(analyser TermAnalyser, name string) ObjectiveOption
- func ObjectiveGrid(devK, popK []float64, meshK []int) ObjectiveOption
- func ObjectiveMinDocs(docs int) ObjectiveOption
- func ObjectiveOptimisation(optimisation eval.Evaluator) ObjectiveOption
- func ObjectivePopulation(population BackgroundCollection) ObjectiveOption
- func ObjectivePostProcessing(processes ...PostProcess) ObjectiveOption
- func ObjectiveQrels(rels trecresults.QrelsFile) ObjectiveOption
- func ObjectiveQuery(query pipeline.Query) ObjectiveOption
- func ObjectiveSeed(seed int) ObjectiveOption
- func ObjectiveSplitter(spitter Splitter) ObjectiveOption
- type PopulationSet
- type PostProcess
- type PubMedSet
- type QueryCategory
- type RAKELogicComposer
- type RandomSplitter
- type Splitter
- type TermAnalyser
- type TermStatistics
Constants ¶
const ( NA leaf = 0 TRUE = 1 FALSE = -1 )
const Entity = "entity"
Variables ¶
var ( /* #1 randomized controlled trial [pt] #2 controlled clinical trial [pt] #3 randomized [tiab] #4 placebo [tiab] #5 drug therapy [sh] #6 randomly [tiab] #7 trial [tiab] #8 groups [tiab] #9 #1 OR #2 OR #3 OR #4 OR #5 OR #6 OR #7 OR #8 #10 animals [mh] NOT humans [mh] #11 #9 NOT #10 */ SensitivityFilter = cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{ cqr.NewBooleanQuery(cqr.OR, []cqr.CommonQueryRepresentation{ cqr.NewKeyword("randomized controlled trial", fields.PublicationType), cqr.NewKeyword("controlled clinical trial", fields.PublicationType), cqr.NewKeyword("randomized", fields.TitleAbstract), cqr.NewKeyword("placebo", fields.TitleAbstract), cqr.NewKeyword("drug therapy", fields.FloatingMeshHeadings), cqr.NewKeyword("randomly", fields.TitleAbstract), cqr.NewKeyword("trial", fields.TitleAbstract), cqr.NewKeyword("groups", fields.TitleAbstract), }), cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{ cqr.NewKeyword("animals", fields.MeshHeadings), cqr.NewKeyword("humans", fields.MeshHeadings), }), }) /* #1 randomized controlled trial [pt] #2 controlled clinical trial [pt] #3 randomized [tiab] #4 placebo [tiab] #5 clinical trials as topic [mesh: noexp] #6 randomly [tiab] #7 trial [ti] #8 #1 OR #2 OR #3 OR #4 OR #5 OR #6 OR #7 #9 animals [mh] NOT humans [mh] #10 #8 NOT #9 */ PrecisionSensitivityFilter = cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{ cqr.NewBooleanQuery(cqr.OR, []cqr.CommonQueryRepresentation{ cqr.NewKeyword("randomized controlled trial", fields.PublicationType), cqr.NewKeyword("controlled clinical trial", fields.PublicationType), cqr.NewKeyword("randomized", fields.TitleAbstract), cqr.NewKeyword("placebo", fields.TitleAbstract), cqr.NewKeyword("clinical trials as topic", fields.MeshHeadings).SetOption(cqr.ExplodedString, false), cqr.NewKeyword("randomly", fields.TitleAbstract), cqr.NewKeyword("trial", fields.Title), }), cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{ cqr.NewKeyword("animals", fields.MeshHeadings), cqr.NewKeyword("humans", fields.MeshHeadings), }), }) )
Functions ¶
func EntityExpansion ¶
func EntityExpansion(query cqr.CommonQueryRepresentation, expander EntityExpander) (cqr.CommonQueryRepresentation, error)
EntityExpansion performs entity expansion on a query using a specified expander.
func FilterQueryTerms ¶
func FilterQueryTerms(conditions, treatments, studyTypes []string, field string, development trecresults.Qrels, e stats.EntrezStatisticsSource) ([]string, []string, []string, error)
FilterQueryTerms reduces further query TermStatistics by identifying the best combination of TermStatistics based on how many relevant documents they retrieve from the development set.
func InformationGain ¶
func MakeQrels ¶
func MakeQrels(docs []guru.MedlineDocument, topic string) trecresults.Qrels
MakeQrels creates a set of relevance assessments from some medline documents.
func MapKeywords ¶
func MapKeywords(r cqr.CommonQueryRepresentation, mapper KeywordMapper) (cqr.CommonQueryRepresentation, error)
MapKeywords takes as input a proto-query from a newly logically composed query and maps concepts in it to keywords using the specified mapper.
func RelevanceFeedback ¶
func RelevanceFeedback(query cqr.CommonQueryRepresentation, docs guru.MedlineDocuments, mm metawrap.HTTPClient) (cqr.CommonQueryRepresentation, error)
Types ¶
type BackgroundCollection ¶
type BackgroundCollection interface { Statistic(term string) (float64, error) Size() (float64, error) }
func GetPopulationSet ¶
func GetPopulationSet(e stats.EntrezStatisticsSource, analyser TermAnalyser) (BackgroundCollection, error)
GetPopulationSet retrieves a set of publications to form a population set.
type ConceptualFormulator ¶
type ConceptualFormulator struct { LogicComposer EntityExtractor EntityExpander KeywordMapper FeedbackDocs []int // contains filtered or unexported fields }
ConceptualFormulator formulates queries using the title or string of a systematic review.
func NewConceptualFormulator ¶
func NewConceptualFormulator(logicComposer LogicComposer, entityExtractor EntityExtractor, entityExpander EntityExpander, keywordMapper KeywordMapper, rf []int, e stats.EntrezStatisticsSource, postProcessing ...PostProcess) *ConceptualFormulator
func (ConceptualFormulator) Formulate ¶
func (t ConceptualFormulator) Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error)
func (ConceptualFormulator) Method ¶
func (t ConceptualFormulator) Method() string
type Cui2VecEntityExpander ¶
type Cui2VecEntityExpander struct {
// contains filtered or unexported fields
}
Cui2VecEntityExpander expands entities using cui2vec embeddings.
func NewCui2VecEntityExpander ¶
func NewCui2VecEntityExpander(embeddings cui2vec.PrecomputedEmbeddings) *Cui2VecEntityExpander
func (Cui2VecEntityExpander) Expand ¶
func (c Cui2VecEntityExpander) Expand(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
type Cui2VecRPCEntityExpander ¶
type Cui2VecRPCEntityExpander struct {
// contains filtered or unexported fields
}
Cui2VecEntityExpander expands entities using cui2vec embeddings.
func NewCui2VecRPCEntityExpander ¶
func NewCui2VecRPCEntityExpander(client *cui2vec.VecClient) *Cui2VecRPCEntityExpander
func (*Cui2VecRPCEntityExpander) Expand ¶
func (c *Cui2VecRPCEntityExpander) Expand(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
type DecisionTreeFormulator ¶
type DecisionTreeFormulator struct { // Terms identified as candidate query terms. N [][]string // Attributes. // contains filtered or unexported fields }
func NewDecisionTreeFormulator ¶
func NewDecisionTreeFormulator(topic string, positive, negative guru.MedlineDocuments) (*DecisionTreeFormulator, error)
func (DecisionTreeFormulator) Formulate ¶
func (dt DecisionTreeFormulator) Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error)
func (DecisionTreeFormulator) Method ¶
func (dt DecisionTreeFormulator) Method() string
type EntityExpander ¶
type EntityExpander interface {
Expand(q cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
}
EntityExpander takes as input a keyword that has been annotated with entities in the entity extraction step and expands it.
type EntityExtractor ¶
type EntityExtractor interface {
Extract(query cqr.CommonQueryRepresentation) (cqr.CommonQueryRepresentation, error)
}
EntityExtractor extracts entities from queries. These could be, for example, CUIs. Each Entity Extractor implementation adds the Entity option on queries which is used later in the keyword mapping step.
type Formulator ¶
type Formulator interface { Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error) Method() string }
Formulator formulates queries to some specification.
type KeywordMapper ¶
type KeywordMapper interface {
Map(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
}
KeywordMapper transforms entities (e.g., CUIs into keywords).
type LogicComposer ¶
type LogicComposer interface {
Compose(query pipeline.Query) (cqr.CommonQueryRepresentation, error)
}
type MetaMapEntityExtractor ¶
type MetaMapEntityExtractor struct {
// contains filtered or unexported fields
}
MetaMapEntityExtractor extracts CUI entities from queries.
func NewMetaMapEntityExtractor ¶
func NewMetaMapEntityExtractor(client metawrap.HTTPClient) MetaMapEntityExtractor
func (MetaMapEntityExtractor) Extract ¶
func (m MetaMapEntityExtractor) Extract(query cqr.CommonQueryRepresentation) (cqr.CommonQueryRepresentation, error)
type MetaMapKeywordMapper ¶
type MetaMapKeywordMapper struct {
// contains filtered or unexported fields
}
MetaMapKeywordMapper uses MetaMap to map entities (CUIs) to keywords.
func NewMetaMapKeywordMapper ¶
func NewMetaMapKeywordMapper(client metawrap.HTTPClient, mapper MetaMapMapper) MetaMapKeywordMapper
NewMetaMapKeywordMapper creates a new keyword mapper that uses MetaMap.
func (MetaMapKeywordMapper) Map ¶
func (m MetaMapKeywordMapper) Map(keyword cqr.Keyword) (keywords []cqr.CommonQueryRepresentation, err error)
Map maps text to several concepts.
type MetaMapMapper ¶
type MetaMapMapper func(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
MetaMapMapper maps candidates from MetaMap to one or more keywords.
func Alias ¶
func Alias(mapping cui2vec.AliasMapping) MetaMapMapper
Frequent identifies all of the terms for the concept in the UMLS meta-thesaurus.
func ElasticUMLS ¶
func ElasticUMLS(c *elastic.Client) MetaMapMapper
func Frequent ¶
func Frequent(mapping cui2vec.Mapping) MetaMapMapper
Frequent identifies the most frequently used term for the concept in the UMLS meta-thesaurus.
func MeSHMapper ¶
func MeSHMapper(mapper MetaMapMapper) MetaMapMapper
MeSHMapper uses the output of another MetaMap mapper to assign MeSH terms.
func Preferred ¶
func Preferred(client guru.UMLSClient) MetaMapMapper
Preferred uses the Preferred entity from UMLS.
type NLPLogicComposer ¶
type NLPLogicComposer struct {
// contains filtered or unexported fields
}
NLPLogicComposer composes queries logically using the stanford English parser.
func NewNLPLogicComposer ¶
func NewNLPLogicComposer(javaClassPath string) *NLPLogicComposer
func (NLPLogicComposer) Compose ¶
func (n NLPLogicComposer) Compose(query pipeline.Query) (cqr.CommonQueryRepresentation, error)
type ObjectiveFormulator ¶
type ObjectiveFormulator struct {
Folder, Pubdates, SemTypes, MetaMapURL string
MeSHK []int
DevK, PopK []float64
// contains filtered or unexported fields
}
ObjectiveFormulator formulates queries according to the objective approach. This implementation writes files to disk as a side effect swhich can be later be used for analysis.
func NewObjectiveFormulator ¶
func NewObjectiveFormulator(s stats.EntrezStatisticsSource, esClient *elastic.Client, qrels trecresults.QrelsFile, population BackgroundCollection, folder, pubdates, semTypes, metamapURL string, optimisation eval.Evaluator, options ...ObjectiveOption) *ObjectiveFormulator
func (ObjectiveFormulator) Derive ¶
func (o ObjectiveFormulator) Derive() (cqr.CommonQueryRepresentation, cqr.CommonQueryRepresentation, []guru.MedlineDocument, []guru.MedlineDocument, []guru.MedlineDocument, error)
func (ObjectiveFormulator) Formulate ¶
func (o ObjectiveFormulator) Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error)
Formulate returns two queries: one with MeSH terms and one without. It also returns the set of unseen documents for evaluation later.
func (ObjectiveFormulator) Method ¶
func (o ObjectiveFormulator) Method() string
func (ObjectiveFormulator) Topic ¶
func (o ObjectiveFormulator) Topic() string
type ObjectiveOption ¶
type ObjectiveOption func(o *ObjectiveFormulator)
func ObjectiveAnalyser ¶
func ObjectiveAnalyser(analyser TermAnalyser, name string) ObjectiveOption
func ObjectiveGrid ¶
func ObjectiveGrid(devK, popK []float64, meshK []int) ObjectiveOption
func ObjectiveMinDocs ¶
func ObjectiveMinDocs(docs int) ObjectiveOption
func ObjectiveOptimisation ¶
func ObjectiveOptimisation(optimisation eval.Evaluator) ObjectiveOption
func ObjectivePopulation ¶
func ObjectivePopulation(population BackgroundCollection) ObjectiveOption
func ObjectivePostProcessing ¶
func ObjectivePostProcessing(processes ...PostProcess) ObjectiveOption
func ObjectiveQrels ¶
func ObjectiveQrels(rels trecresults.QrelsFile) ObjectiveOption
func ObjectiveQuery ¶
func ObjectiveQuery(query pipeline.Query) ObjectiveOption
func ObjectiveSeed ¶
func ObjectiveSeed(seed int) ObjectiveOption
func ObjectiveSplitter ¶
func ObjectiveSplitter(spitter Splitter) ObjectiveOption
type PopulationSet ¶
type PopulationSet TermStatistics
func (PopulationSet) Size ¶
func (p PopulationSet) Size() (float64, error)
type PostProcess ¶
type PostProcess func(query cqr.CommonQueryRepresentation) (cqr.CommonQueryRepresentation, error)
PostProcess applies any post-formatting to a query.
func Stem ¶
func Stem(original cqr.CommonQueryRepresentation) PostProcess
Stem uses already stemmed terms from the original query to replace terms from the query that requires post-processing.
type PubMedSet ¶
type PubMedSet struct {
// contains filtered or unexported fields
}
func NewPubMedSet ¶
func NewPubMedSet(e stats.EntrezStatisticsSource) PubMedSet
type QueryCategory ¶
type QueryCategory int
type RAKELogicComposer ¶
type RAKELogicComposer struct {
// contains filtered or unexported fields
}
func NewRAKELogicComposer ¶
func NewRAKELogicComposer(semtypes, metamap string, titles map[string]string, seedPMIDs trecresults.QrelsFile, esClient *elastic.Client, vecClient *cui2vec.VecClient) RAKELogicComposer
func (RAKELogicComposer) Compose ¶
func (r RAKELogicComposer) Compose(query pipeline.Query) (cqr.CommonQueryRepresentation, error)
type RandomSplitter ¶
type RandomSplitter int64
func (RandomSplitter) Split ¶
func (r RandomSplitter) Split(docs []guru.MedlineDocument) ([]guru.MedlineDocument, []guru.MedlineDocument, []guru.MedlineDocument)
splitTest creates three slices of documents: 2:4 development, 1:4 validation, 1:4 unseen.
type Splitter ¶
type Splitter interface {
Split(docs []guru.MedlineDocument) ([]guru.MedlineDocument, []guru.MedlineDocument, []guru.MedlineDocument)
}
Splitter splits a test set into development, validation, and unseen.
type TermAnalyser ¶
type TermAnalyser func(docs []guru.MedlineDocument) (TermStatistics, error)
TermAnalyser records term/phrase statistics about a set of documents.
type TermStatistics ¶
func RAKEAnalyser ¶
func RAKEAnalyser(docs []guru.MedlineDocument) (TermStatistics, error)
func TermFrequencyAnalyser ¶
func TermFrequencyAnalyser(docs []guru.MedlineDocument) (TermStatistics, error)
TermFrequencyAnalyser computes the document frequency for the input documents.