formulation

package
v0.0.0-...-b7c488f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 2, 2021 License: MIT Imports: 46 Imported by: 2

Documentation

Overview

Package formulation provides a library for automatically formulating queries.

Index

Constants

View Source
const (
	NA    leaf = 0
	TRUE       = 1
	FALSE      = -1
)
View Source
const Entity = "entity"

Variables

View Source
var (
	/*
		#1 randomized controlled trial [pt]
		#2 controlled clinical trial [pt]
		#3 randomized [tiab]
		#4 placebo [tiab]
		#5 drug therapy [sh]
		#6 randomly [tiab]
		#7 trial [tiab]
		#8 groups [tiab]
		#9 #1 OR #2 OR #3 OR #4 OR #5 OR #6 OR #7 OR #8
		#10 animals [mh] NOT humans [mh]
		#11 #9 NOT #10
	*/
	SensitivityFilter = cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{
		cqr.NewBooleanQuery(cqr.OR, []cqr.CommonQueryRepresentation{
			cqr.NewKeyword("randomized controlled trial", fields.PublicationType),
			cqr.NewKeyword("controlled clinical trial", fields.PublicationType),
			cqr.NewKeyword("randomized", fields.TitleAbstract),
			cqr.NewKeyword("placebo", fields.TitleAbstract),
			cqr.NewKeyword("drug therapy", fields.FloatingMeshHeadings),
			cqr.NewKeyword("randomly", fields.TitleAbstract),
			cqr.NewKeyword("trial", fields.TitleAbstract),
			cqr.NewKeyword("groups", fields.TitleAbstract),
		}),
		cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{
			cqr.NewKeyword("animals", fields.MeshHeadings),
			cqr.NewKeyword("humans", fields.MeshHeadings),
		}),
	})

	/*
		#1 randomized controlled trial [pt]
		#2 controlled clinical trial [pt]
		#3 randomized [tiab]
		#4 placebo [tiab]
		#5 clinical trials as topic [mesh: noexp]
		#6 randomly [tiab]
		#7 trial [ti]
		#8 #1 OR #2 OR #3 OR #4 OR #5 OR #6 OR #7
		#9 animals [mh] NOT humans [mh]
		#10 #8 NOT #9
	*/
	PrecisionSensitivityFilter = cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{
		cqr.NewBooleanQuery(cqr.OR, []cqr.CommonQueryRepresentation{
			cqr.NewKeyword("randomized controlled trial", fields.PublicationType),
			cqr.NewKeyword("controlled clinical trial", fields.PublicationType),
			cqr.NewKeyword("randomized", fields.TitleAbstract),
			cqr.NewKeyword("placebo", fields.TitleAbstract),
			cqr.NewKeyword("clinical trials as topic", fields.MeshHeadings).SetOption(cqr.ExplodedString, false),
			cqr.NewKeyword("randomly", fields.TitleAbstract),
			cqr.NewKeyword("trial", fields.Title),
		}),
		cqr.NewBooleanQuery(cqr.NOT, []cqr.CommonQueryRepresentation{
			cqr.NewKeyword("animals", fields.MeshHeadings),
			cqr.NewKeyword("humans", fields.MeshHeadings),
		}),
	})
)

Functions

func EntityExpansion

EntityExpansion performs entity expansion on a query using a specified expander.

func Entropy

func Entropy(positive, negative float64) float64

func FilterQueryTerms

func FilterQueryTerms(conditions, treatments, studyTypes []string, field string, development trecresults.Qrels, e stats.EntrezStatisticsSource) ([]string, []string, []string, error)

FilterQueryTerms reduces further query TermStatistics by identifying the best combination of TermStatistics based on how many relevant documents they retrieve from the development set.

func ID3

func ID3(training map[string][]string, labels map[string]bool, attrs []string) *tree

func InformationGain

func InformationGain(attr string, training map[string][]string, labels map[string]bool) (float64, []string, []string)

func MakeQrels

func MakeQrels(docs []guru.MedlineDocument, topic string) trecresults.Qrels

MakeQrels creates a set of relevance assessments from some medline documents.

func MapKeywords

MapKeywords takes as input a proto-query from a newly logically composed query and maps concepts in it to keywords using the specified mapper.

Types

type BackgroundCollection

type BackgroundCollection interface {
	Statistic(term string) (float64, error)
	Size() (float64, error)
}

func GetPopulationSet

func GetPopulationSet(e stats.EntrezStatisticsSource, analyser TermAnalyser) (BackgroundCollection, error)

GetPopulationSet retrieves a set of publications to form a population set.

type ConceptualFormulator

type ConceptualFormulator struct {
	LogicComposer
	EntityExtractor
	EntityExpander
	KeywordMapper

	FeedbackDocs []int
	// contains filtered or unexported fields
}

ConceptualFormulator formulates queries using the title or string of a systematic review.

func NewConceptualFormulator

func NewConceptualFormulator(logicComposer LogicComposer, entityExtractor EntityExtractor, entityExpander EntityExpander, keywordMapper KeywordMapper, rf []int, e stats.EntrezStatisticsSource, postProcessing ...PostProcess) *ConceptualFormulator

func (ConceptualFormulator) Formulate

func (ConceptualFormulator) Method

func (t ConceptualFormulator) Method() string

type Cui2VecEntityExpander

type Cui2VecEntityExpander struct {
	// contains filtered or unexported fields
}

Cui2VecEntityExpander expands entities using cui2vec embeddings.

func NewCui2VecEntityExpander

func NewCui2VecEntityExpander(embeddings cui2vec.PrecomputedEmbeddings) *Cui2VecEntityExpander

func (Cui2VecEntityExpander) Expand

type Cui2VecRPCEntityExpander

type Cui2VecRPCEntityExpander struct {
	// contains filtered or unexported fields
}

Cui2VecEntityExpander expands entities using cui2vec embeddings.

func NewCui2VecRPCEntityExpander

func NewCui2VecRPCEntityExpander(client *cui2vec.VecClient) *Cui2VecRPCEntityExpander

func (*Cui2VecRPCEntityExpander) Expand

type DecisionTreeFormulator

type DecisionTreeFormulator struct {

	// Terms identified as candidate query terms.
	N [][]string // Attributes.
	// contains filtered or unexported fields
}

func NewDecisionTreeFormulator

func NewDecisionTreeFormulator(topic string, positive, negative guru.MedlineDocuments) (*DecisionTreeFormulator, error)

func (DecisionTreeFormulator) Formulate

func (DecisionTreeFormulator) Method

func (dt DecisionTreeFormulator) Method() string

type EntityExpander

type EntityExpander interface {
	Expand(q cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
}

EntityExpander takes as input a keyword that has been annotated with entities in the entity extraction step and expands it.

type EntityExtractor

type EntityExtractor interface {
	Extract(query cqr.CommonQueryRepresentation) (cqr.CommonQueryRepresentation, error)
}

EntityExtractor extracts entities from queries. These could be, for example, CUIs. Each Entity Extractor implementation adds the Entity option on queries which is used later in the keyword mapping step.

type Formulator

type Formulator interface {
	Formulate(query pipeline.Query) ([]cqr.CommonQueryRepresentation, []pipeline.SupplementalData, error)
	Method() string
}

Formulator formulates queries to some specification.

type KeywordMapper

type KeywordMapper interface {
	Map(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)
}

KeywordMapper transforms entities (e.g., CUIs into keywords).

type LogicComposer

type LogicComposer interface {
	Compose(query pipeline.Query) (cqr.CommonQueryRepresentation, error)
}

type MetaMapEntityExtractor

type MetaMapEntityExtractor struct {
	// contains filtered or unexported fields
}

MetaMapEntityExtractor extracts CUI entities from queries.

func NewMetaMapEntityExtractor

func NewMetaMapEntityExtractor(client metawrap.HTTPClient) MetaMapEntityExtractor

func (MetaMapEntityExtractor) Extract

type MetaMapKeywordMapper

type MetaMapKeywordMapper struct {
	// contains filtered or unexported fields
}

MetaMapKeywordMapper uses MetaMap to map entities (CUIs) to keywords.

func NewMetaMapKeywordMapper

func NewMetaMapKeywordMapper(client metawrap.HTTPClient, mapper MetaMapMapper) MetaMapKeywordMapper

NewMetaMapKeywordMapper creates a new keyword mapper that uses MetaMap.

func (MetaMapKeywordMapper) Map

func (m MetaMapKeywordMapper) Map(keyword cqr.Keyword) (keywords []cqr.CommonQueryRepresentation, err error)

Map maps text to several concepts.

type MetaMapMapper

type MetaMapMapper func(keyword cqr.Keyword) ([]cqr.CommonQueryRepresentation, error)

MetaMapMapper maps candidates from MetaMap to one or more keywords.

func Alias

func Alias(mapping cui2vec.AliasMapping) MetaMapMapper

Frequent identifies all of the terms for the concept in the UMLS meta-thesaurus.

func ElasticUMLS

func ElasticUMLS(c *elastic.Client) MetaMapMapper

func Frequent

func Frequent(mapping cui2vec.Mapping) MetaMapMapper

Frequent identifies the most frequently used term for the concept in the UMLS meta-thesaurus.

func Matched

func Matched() MetaMapMapper

Matched uses the Matched entity from MetaMap.

func MeSHMapper

func MeSHMapper(mapper MetaMapMapper) MetaMapMapper

MeSHMapper uses the output of another MetaMap mapper to assign MeSH terms.

func Preferred

func Preferred(client guru.UMLSClient) MetaMapMapper

Preferred uses the Preferred entity from UMLS.

type NLPLogicComposer

type NLPLogicComposer struct {
	// contains filtered or unexported fields
}

NLPLogicComposer composes queries logically using the stanford English parser.

func NewNLPLogicComposer

func NewNLPLogicComposer(javaClassPath string) *NLPLogicComposer

func (NLPLogicComposer) Compose

type ObjectiveFormulator

type ObjectiveFormulator struct {
	Folder, Pubdates, SemTypes, MetaMapURL string

	MeSHK      []int
	DevK, PopK []float64
	// contains filtered or unexported fields
}

ObjectiveFormulator formulates queries according to the objective approach. This implementation writes files to disk as a side effect swhich can be later be used for analysis.

func NewObjectiveFormulator

func NewObjectiveFormulator(s stats.EntrezStatisticsSource, esClient *elastic.Client, qrels trecresults.QrelsFile, population BackgroundCollection, folder, pubdates, semTypes, metamapURL string, optimisation eval.Evaluator, options ...ObjectiveOption) *ObjectiveFormulator

func (ObjectiveFormulator) Formulate

Formulate returns two queries: one with MeSH terms and one without. It also returns the set of unseen documents for evaluation later.

func (ObjectiveFormulator) Method

func (o ObjectiveFormulator) Method() string

func (ObjectiveFormulator) Topic

func (o ObjectiveFormulator) Topic() string

type ObjectiveOption

type ObjectiveOption func(o *ObjectiveFormulator)

func ObjectiveAnalyser

func ObjectiveAnalyser(analyser TermAnalyser, name string) ObjectiveOption

func ObjectiveGrid

func ObjectiveGrid(devK, popK []float64, meshK []int) ObjectiveOption

func ObjectiveMinDocs

func ObjectiveMinDocs(docs int) ObjectiveOption

func ObjectiveOptimisation

func ObjectiveOptimisation(optimisation eval.Evaluator) ObjectiveOption

func ObjectivePopulation

func ObjectivePopulation(population BackgroundCollection) ObjectiveOption

func ObjectivePostProcessing

func ObjectivePostProcessing(processes ...PostProcess) ObjectiveOption

func ObjectiveQrels

func ObjectiveQrels(rels trecresults.QrelsFile) ObjectiveOption

func ObjectiveQuery

func ObjectiveQuery(query pipeline.Query) ObjectiveOption

func ObjectiveSeed

func ObjectiveSeed(seed int) ObjectiveOption

func ObjectiveSplitter

func ObjectiveSplitter(spitter Splitter) ObjectiveOption

type PopulationSet

type PopulationSet TermStatistics

func (PopulationSet) Size

func (p PopulationSet) Size() (float64, error)

func (PopulationSet) Statistic

func (p PopulationSet) Statistic(term string) (float64, error)

type PostProcess

PostProcess applies any post-formatting to a query.

func Stem

Stem uses already stemmed terms from the original query to replace terms from the query that requires post-processing.

type PubMedSet

type PubMedSet struct {
	// contains filtered or unexported fields
}

func (PubMedSet) Size

func (p PubMedSet) Size() (float64, error)

func (PubMedSet) Statistic

func (p PubMedSet) Statistic(term string) (float64, error)

type QueryCategory

type QueryCategory int

type RAKELogicComposer

type RAKELogicComposer struct {
	// contains filtered or unexported fields
}

func NewRAKELogicComposer

func NewRAKELogicComposer(semtypes, metamap string, titles map[string]string, seedPMIDs trecresults.QrelsFile, esClient *elastic.Client, vecClient *cui2vec.VecClient) RAKELogicComposer

func (RAKELogicComposer) Compose

type RandomSplitter

type RandomSplitter int64

func (RandomSplitter) Split

splitTest creates three slices of documents: 2:4 development, 1:4 validation, 1:4 unseen.

type Splitter

type Splitter interface {
	Split(docs []guru.MedlineDocument) ([]guru.MedlineDocument, []guru.MedlineDocument, []guru.MedlineDocument)
}

Splitter splits a test set into development, validation, and unseen.

type TermAnalyser

type TermAnalyser func(docs []guru.MedlineDocument) (TermStatistics, error)

TermAnalyser records term/phrase statistics about a set of documents.

type TermStatistics

type TermStatistics map[string]float64

func RAKEAnalyser

func RAKEAnalyser(docs []guru.MedlineDocument) (TermStatistics, error)

func TermFrequencyAnalyser

func TermFrequencyAnalyser(docs []guru.MedlineDocument) (TermStatistics, error)

TermFrequencyAnalyser computes the document frequency for the input documents.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL