gcse: github.com/subosito/gcse Index | Files | Directories

package gcse

import "github.com/subosito/gcse"

Package gcse is the core supporting library for go-code-search-engine (GCSE). Its exported types and functions are mainly for sub packages. If you want some of the function, copy the code away.

Sub-projects

crawler crawling packages

indexer creating index data for web-server

server providing web services, including home/top/search services.

Data-flows

project Read Write ------- ---- ----- crawler fnCrawlerDB fnCrawlerDB

        fnDocDB       fnDocDB
		              DBOutSegments

indexer DBOutSegments IndexSegments

server IndexSegments

Index

Package Files

config.go crawler.go crawlerdb.go data.go db.go index.go ranking.go segment.go text.go utils.go

Constants

const (
    KindIndex = "index"
    IndexFn   = KindIndex + ".gob"

    KindDocDB = "docdb"

    FnCrawlerDB = "crawler"
    KindPackage = "package"
    KindPerson  = "person"
    KindToCheck = "tocheck"

    FnToCrawl = "tocrawl"
    FnPackage = "package"
    FnPerson  = "person"
    // key: RawString, value: DocInfo
    FnDocs    = "docs"
    FnNewDocs = "newdocs"
)
const (
    NDA_UPDATE = iota
    NDA_STARS
    NDA_DEL
)
const (
    IndexTextField = "text"
    IndexNameField = "name"
    IndexPkgField  = "pkg"
)
const (
    DOCS_PARTS = 128
)

Variables

var (
    ServerAddr = ":8080"
    ServerRoot = villa.Path("./server/")

    LoadTemplatePass = ""
    AutoLoadTemplate = false

    DataRoot      = villa.Path("./data/")
    CrawlerDBPath = DataRoot.Join(FnCrawlerDB)
    DocsDBPath    = DataRoot.Join(FnDocs)

    // producer: server, consumer: crawler
    ImportPath     villa.Path
    ImportSegments Segments

    // producer: crawler, consumer: indexer
    DBOutPath     villa.Path
    DBOutSegments Segments

    // producer: indexer, consumer: server.
    // server never delete index segments, indexer clear updated segments.
    IndexPath     villa.Path
    IndexSegments Segments

    // configures of crawler
    CrawlByGodocApi   = true
    CrawlGithubUpdate = true
    CrawlerDuePerRun  = 1 * time.Hour

    /*
    	Increase this to ignore etag of last versions to crawl and parse all
    	packages.

    	ChangeLog:
    	    0    First version
    	    1    Add TestImports/XTestImports to Imports
    	    2    Parse markdown readme to text before selecting synopsis
    		     from it
    		3    Add exported tokens to indexes
    		4    Move TestImports/XTestImports out of Imports, to TestImports
    		4    A bug of checking CrawlerVersion is fixed
    */
    CrawlerVersion = 5
)
var (
    ErrPackageNotModifed = errors.New("package not modified")
    ErrInvalidPackage    = errors.New("invalid package")
)

func AppendPackages Uses

func AppendPackages(pkgs []string) bool

AppendPackages appends a list packages to imports folder for crawler backend to read

func AppendTokens Uses

func AppendTokens(tokens villa.StrSet, text []byte) villa.StrSet

func AuthorOfPackage Uses

func AuthorOfPackage(pkg string) string

func CalcMatchScore Uses

func CalcMatchScore(doc *HitInfo, tokenList []string,
    textIdfs, nameIdfs []float64) float64

func CalcPackagePartition Uses

func CalcPackagePartition(pkg string, totalParts int) int

func CalcStaticScore Uses

func CalcStaticScore(doc *HitInfo) float64

func CalcTestStaticScore Uses

func CalcTestStaticScore(doc *HitInfo, realImported []string) float64

func CheckCamel Uses

func CheckCamel(last, current rune) index.RuneType

func CheckRuneType Uses

func CheckRuneType(last, current rune) index.RuneType

func ChooseImportantSentenses Uses

func ChooseImportantSentenses(text string, name, pkg string) []string

func ClearWatcherEvents Uses

func ClearWatcherEvents(watcher *fsnotify.Watcher)

func DumpMemStats Uses

func DumpMemStats()

func FetchAllPackagesInGodoc Uses

func FetchAllPackagesInGodoc(httpClient doc.HttpClient) ([]string, error)

FetchAllPackagesInGodoc fetches the list of all packages on godoc.org

func FullProjectOfPackage Uses

func FullProjectOfPackage(pkg string) string

func GenHttpClient Uses

func GenHttpClient(proxy string) doc.HttpClient

func GithubUpdates Uses

func GithubUpdates() (map[string]time.Time, error)

func HostOfPackage Uses

func HostOfPackage(pkg string) string

func IdOfPerson Uses

func IdOfPerson(site, username string) string

func Index Uses

func Index(docDB mr.Input) (*index.TokenSetSearcher, error)

func IsBadPackage Uses

func IsBadPackage(err error) bool

func LikeButton Uses

func LikeButton(httpClient doc.HttpClient, Url string) (int, error)

func NewDocInfo Uses

func NewDocInfo() sophie.Sophier

Returns a new instance of DocInfo as a sophie.Sophier

func NewNewDocAction Uses

func NewNewDocAction() sophie.Sophier

Returns a new instance of *NewDocAction as a Sophier

func NormWord Uses

func NormWord(word string) string

func ParsePersonId Uses

func ParsePersonId(id string) (site, username string)

func Plusone Uses

func Plusone(httpClient doc.HttpClient, url string) (int, error)

func ProjectOfPackage Uses

func ProjectOfPackage(pkg string) string

core project of a packaage

func ReadJsonFile Uses

func ReadJsonFile(fn villa.Path, data interface{}) error

func ReadPackages Uses

func ReadPackages(segm Segment) (pkgs []string, err error)

func ReadmeToText Uses

func ReadmeToText(fn, data string) string

func SegmentLess Uses

func SegmentLess(a, b Segment) bool

func SplitSentences Uses

func SplitSentences(text string) []string

func TrimPackageName Uses

func TrimPackageName(pkg string) string

func WaitForWatcherEvents Uses

func WaitForWatcherEvents(watcher *fsnotify.Watcher)

func WriteJsonFile Uses

func WriteJsonFile(fn villa.Path, data interface{}) error

type BlackRequest Uses

type BlackRequest struct {
    sync.RWMutex
    // contains filtered or unexported fields
}

func (*BlackRequest) Do Uses

func (br *BlackRequest) Do(req *http.Request) (*http.Response, error)

type CrawlerDB Uses

type CrawlerDB struct {
    PackageDB *MemDB
    PersonDB  *MemDB
}

* CrawlerDB including all crawler entires database.

func LoadCrawlerDB Uses

func LoadCrawlerDB() *CrawlerDB

LoadCrawlerDB loads PackageDB and PersonDB and returns a new *CrawlerDB

func (*CrawlerDB) AppendPackage Uses

func (cdb *CrawlerDB) AppendPackage(pkg string,
    inDocs func(pkg string) bool)

AppendPackage appends a package. If the package did not exist in either PackageDB or Docs, shedulet it (immediately).

func (*CrawlerDB) AppendPerson Uses

func (cdb *CrawlerDB) AppendPerson(site, username string) bool

AppendPerson appends a person to the PersonDB, schedules to crawl immediately for a new person

func (*CrawlerDB) SchedulePackage Uses

func (cdb *CrawlerDB) SchedulePackage(pkg string, sTime time.Time,
    etag string) error

SchedulePackage schedules a package to be crawled at a specific time.

func (*CrawlerDB) SchedulePerson Uses

func (cdb *CrawlerDB) SchedulePerson(id string, sTime time.Time) error

SchedulePerson schedules a person to be crawled at a specific time.

func (*CrawlerDB) Sync Uses

func (cdb *CrawlerDB) Sync() error

Sync syncs both PackageDB and PersonDB. Returns error if any of the sync failed.

type CrawlingEntry Uses

type CrawlingEntry struct {
    ScheduleTime time.Time
    // if gcse.CrawlerVersion is different from this value, etag is ignored
    Version int
    Etag    string
}

func (*CrawlingEntry) ReadFrom Uses

func (c *CrawlingEntry) ReadFrom(r sophie.Reader, l int) error

func (*CrawlingEntry) WriteTo Uses

func (c *CrawlingEntry) WriteTo(w sophie.Writer) error

type DocDB Uses

type DocDB interface {
    Sync() error
    Export(root villa.Path, kind string) error

    Get(key string, data interface{}) bool
    Put(key string, data interface{})
    Delete(key string)
    Iterate(output func(key string, val interface{}) error) error
}

type DocInfo Uses

type DocInfo struct {
    Name        string
    Package     string
    Author      string
    LastUpdated time.Time
    StarCount   int
    Synopsis    string
    Description string
    ProjectURL  string
    ReadmeFn    string
    ReadmeData  string
    Imports     []string
    TestImports []string
    Exported    []string // exported tokens(funcs/types)
}

DocInfo is the information stored in backend docDB

func (*DocInfo) ReadFrom Uses

func (d *DocInfo) ReadFrom(r sophie.Reader, l int) error

func (*DocInfo) WriteTo Uses

func (d *DocInfo) WriteTo(w sophie.Writer) error

type HitInfo Uses

type HitInfo struct {
    DocInfo

    Imported           []string
    TestImported       []string
    ImportantSentences []string

    AssignedStarCount float64
    StaticScore       float64
    TestStaticScore   float64
    StaticRank        int // zero-based
}

HitInfo is the information provided to frontend

type MemDB Uses

type MemDB struct {
    sync.RWMutex
    // contains filtered or unexported fields
}

func NewMemDB Uses

func NewMemDB(root villa.Path, kind string) *MemDB

func (*MemDB) Count Uses

func (mdb *MemDB) Count() int

Count returns the number of entries in the DB

func (*MemDB) Delete Uses

func (mdb *MemDB) Delete(key string)

func (*MemDB) Export Uses

func (mdb *MemDB) Export(root villa.Path, kind string) error

Export saves the data to some space, but not affecting the modified property.

func (*MemDB) Get Uses

func (mdb *MemDB) Get(key string, data interface{}) bool

Get fetches an entry of specified key. data is a pointer. Return false if not exists

func (*MemDB) Iterate Uses

func (mdb *MemDB) Iterate(output func(key string, val interface{}) error) error

func (*MemDB) LastModified Uses

func (mdb *MemDB) LastModified() time.Time

func (*MemDB) Load Uses

func (mdb *MemDB) Load() error

func (*MemDB) Modified Uses

func (mdb *MemDB) Modified() bool

func (*MemDB) Put Uses

func (mdb *MemDB) Put(key string, data interface{})

func (*MemDB) Sync Uses

func (mdb *MemDB) Sync() error

type NewDocAction Uses

type NewDocAction struct {
    Action sophie.VInt
    DocInfo
}

* If Action equals NDA_DEL, DocInfo is undefined.

func (*NewDocAction) ReadFrom Uses

func (nda *NewDocAction) ReadFrom(r sophie.Reader, l int) error

func (*NewDocAction) WriteTo Uses

func (nda *NewDocAction) WriteTo(w sophie.Writer) error

type Package Uses

type Package struct {
    Package     string
    Name        string
    Synopsis    string
    Doc         string
    ProjectURL  string
    StarCount   int
    ReadmeFn    string
    ReadmeData  string
    Imports     []string
    TestImports []string
    Exported    []string // exported tokens(funcs/types)

    References []string
    Etag       string
}

Package stores information from crawler

func CrawlPackage Uses

func CrawlPackage(httpClient doc.HttpClient, pkg string,
    etag string) (p *Package, err error)

type PackedDocDB Uses

type PackedDocDB struct {
    *MemDB
}

func (PackedDocDB) Get Uses

func (db PackedDocDB) Get(key string, data interface{}) bool

func (PackedDocDB) Iterate Uses

func (db PackedDocDB) Iterate(
    output func(key string, val interface{}) error) error

func (PackedDocDB) Put Uses

func (db PackedDocDB) Put(key string, data interface{})

type Person Uses

type Person struct {
    Id       string
    Packages []string
}

func CrawlPerson Uses

func CrawlPerson(httpClient doc.HttpClient, id string) (*Person, error)

type Segment Uses

type Segment interface {
    Name() string
    Join(name string) villa.Path
    IsDone() bool
    Done() error
    ListFiles() ([]villa.Path, error)
    Remove() error
}

type Segments Uses

type Segments interface {
    Watch(watcher *fsnotify.Watcher) error
    ListAll() ([]Segment, error)
    // all done
    ListDones() ([]Segment, error)
    // max done
    FindMaxDone() (Segment, error)
    // generates an arbitrary new segment
    GenNewSegment() (Segment, error)
    // generates a segment greated than all existence
    GenMaxSegment() (Segment, error)
    // clear
    ClearUndones() error
}

type Size Uses

type Size int64

func (Size) String Uses

func (s Size) String() string

type TokenIndexer Uses

type TokenIndexer struct {
    index.TokenIndexer

    sync.RWMutex
    // contains filtered or unexported fields
}

TokenIndexer is thread-safe.

func NewTokenIndexer Uses

func NewTokenIndexer(root villa.Path, kind string) *TokenIndexer

func (*TokenIndexer) Export Uses

func (ti *TokenIndexer) Export(root villa.Path, kind string) error

func (*TokenIndexer) IdsOfToken Uses

func (ti *TokenIndexer) IdsOfToken(token string) []string

func (*TokenIndexer) LastModified Uses

func (ti *TokenIndexer) LastModified() time.Time

func (*TokenIndexer) Load Uses

func (ti *TokenIndexer) Load() error

func (*TokenIndexer) Modified Uses

func (ti *TokenIndexer) Modified() bool

func (*TokenIndexer) Put Uses

func (ti *TokenIndexer) Put(id string, tokens villa.StrSet)

func (*TokenIndexer) Sync Uses

func (ti *TokenIndexer) Sync() error

func (*TokenIndexer) TokensOfId Uses

func (ti *TokenIndexer) TokensOfId(id string) []string

Directories

PathSynopsis
crawlerGCSE Crawler background program.
exps
indexer
mergedocs
serverGCSE HTTP server.
tocrawl
tools

Package gcse imports 33 packages (graph). Updated 2017-03-22. Refresh now. Tools for package owners.