gcse: github.com/daviddengcn/gcse Index | Files | Directories

package gcse

import "github.com/daviddengcn/gcse"

Package gcse is the core supporting library for go-code-search-engine (GCSE). Its exported types and functions are mainly for sub packages. If you want some of the function, copy the code away.

Index

Package Files

bi.go crawler.go crawlerdb.go data.go db.go gcse.go index.go ranking.go text.go utils.go

Constants

const (
    // whole document updated
    NDA_UPDATE = iota
    // only stars updated
    NDA_STARS
    // deleted
    NDA_DEL
    // Original document
    NDA_ORIGINAL
)
const (
    KindIndex   = "index"
    KindDocDB   = "docdb"
    KindPackage = "package"
    KindPerson  = "person"
    KindToCheck = "tocheck"
    IndexFn     = KindIndex + ".gob"
)
const (
    HitsArrFn = "hits"

    IndexTextField = "text"
    IndexNameField = "name"
    IndexPkgField  = "pkg"
)
const (

    /*
    	Increase this to ignore etag of last versions to crawl and parse all
    	packages.

    	ChangeLog:
    	    0    First version
    	    1    Add TestImports/XTestImports to Imports
    	    2    Parse markdown readme to text before selecting synopsis
    		     from it
    		3    Add exported tokens to indexes
    		4    Move TestImports/XTestImports out of Imports, to TestImports
    		4    A bug of checking CrawlerVersion is fixed
    */
    CrawlerVersion = 5
)
const (
    DOCS_PARTS = 128
)

Variables

var (
    ErrPackageNotModifed = errors.New("package not modified")
    ErrInvalidPackage    = errors.New("invalid package")
)
var GithubSpider *github.Spider

func AddBiValueAndProcess Uses

func AddBiValueAndProcess(aggr bi.AggregateMethod, name string, value int)

func AppendPackages Uses

func AppendPackages(pkgs []string) bool

AppendPackages appends a list packages to imports folder for crawler backend to read

func AppendTokens Uses

func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set

Tokenizes text into the current token set.

func AuthorOfPackage Uses

func AuthorOfPackage(pkg string) string

func CalcMatchScore Uses

func CalcMatchScore(doc *HitInfo, tokenList []string, textIdfs, nameIdfs []float64) float64

func CalcPackagePartition Uses

func CalcPackagePartition(pkg string, totalParts int) int

func CalcStaticScore Uses

func CalcStaticScore(doc *HitInfo) float64

func CalcTestStaticScore Uses

func CalcTestStaticScore(doc *HitInfo, realImported []string) float64

func CheckCamel Uses

func CheckCamel(last, current rune) index.RuneType

func CheckRuneType Uses

func CheckRuneType(last, current rune) index.RuneType

func ChooseImportantSentenses Uses

func ChooseImportantSentenses(text string, name, pkg string) []string

func ClearWatcherEvents Uses

func ClearWatcherEvents(watcher *fsnotify.Watcher)

func CrawlRepoInfo Uses

func CrawlRepoInfo(site, user, name string) *sppb.RepoInfo

func FullProjectOfPackage Uses

func FullProjectOfPackage(pkg string) string

func GenHttpClient Uses

func GenHttpClient(proxy string) doc.HttpClient

func HostOfPackage Uses

func HostOfPackage(pkg string) string

func IdOfPerson Uses

func IdOfPerson(site, username string) string

func Index Uses

func Index(docDB mr.Input, outDir string) (*index.TokenSetSearcher, error)

func IsBadPackage Uses

func IsBadPackage(err error) bool

func LikeButton Uses

func LikeButton(httpClient doc.HttpClient, Url string) (int, error)

func NewDocInfo Uses

func NewDocInfo() sophie.Sophier

Returns a new instance of DocInfo as a sophie.Sophier

func NewNewDocAction Uses

func NewNewDocAction() sophie.Sophier

Returns a new instance of *NewDocAction as a Sophier

func NormWord Uses

func NormWord(word string) string

func ParsePersonId Uses

func ParsePersonId(id string) (site, username string)

func Plusone Uses

func Plusone(httpClient doc.HttpClient, url string) (int, error)

func ProjectOfPackage Uses

func ProjectOfPackage(pkg string) string

core project of a packaage

func ReadPackages Uses

func ReadPackages(segm utils.Segment) ([]string, error)

func ReadmeToText Uses

func ReadmeToText(fn, data string) string

func SplitSentences Uses

func SplitSentences(text string) []string

func TrimPackageName Uses

func TrimPackageName(pkg string) string

func WaitForWatcherEvents Uses

func WaitForWatcherEvents(watcher *fsnotify.Watcher)

type BlackRequest Uses

type BlackRequest struct {
    sync.RWMutex
    // contains filtered or unexported fields
}

func (*BlackRequest) Do Uses

func (br *BlackRequest) Do(req *http.Request) (*http.Response, error)

type CrawlerDB Uses

type CrawlerDB struct {
    PackageDB *MemDB
    PersonDB  *MemDB
}

* CrawlerDB including all crawler entires database.

func LoadCrawlerDB Uses

func LoadCrawlerDB() *CrawlerDB

LoadCrawlerDB loads PackageDB and PersonDB and returns a new *CrawlerDB

func (*CrawlerDB) AppendPackage Uses

func (cdb *CrawlerDB) AppendPackage(pkg string, inDocs func(pkg string) bool)

AppendPackage appends a package. If the package did not exist in either PackageDB or Docs, schedule it (immediately).

func (*CrawlerDB) AppendPerson Uses

func (cdb *CrawlerDB) AppendPerson(site, username string) bool

AppendPerson appends a person to the PersonDB, schedules to crawl immediately for a new person

func (*CrawlerDB) PushToCrawlPackage Uses

func (cdb *CrawlerDB) PushToCrawlPackage(pkg string)

SchedulePackage schedules a package to be crawled at a specific time if not specified earlier.

func (*CrawlerDB) SchedulePackage Uses

func (cdb *CrawlerDB) SchedulePackage(pkg string, sTime time.Time, etag string) error

SchedulePackage schedules a package to be crawled at a specific time.

func (*CrawlerDB) SchedulePerson Uses

func (cdb *CrawlerDB) SchedulePerson(id string, sTime time.Time) error

SchedulePerson schedules a person to be crawled at a specific time.

func (*CrawlerDB) Sync Uses

func (cdb *CrawlerDB) Sync() error

Sync syncs both PackageDB and PersonDB. Returns error if any of the sync failed.

type CrawlingEntry Uses

type CrawlingEntry struct {
    ScheduleTime time.Time
    // if gcse.CrawlerVersion is different from this value, etag is ignored
    Version int
    Etag    string
}

func (*CrawlingEntry) ReadFrom Uses

func (c *CrawlingEntry) ReadFrom(r sophie.Reader, l int) error

func (*CrawlingEntry) WriteTo Uses

func (c *CrawlingEntry) WriteTo(w sophie.Writer) error

type DocDB Uses

type DocDB interface {
    Sync() error
    Export(root villa.Path, kind string) error

    Get(key string, data interface{}) bool
    Put(key string, data interface{})
    Delete(key string)
    Iterate(output func(key string, val interface{}) error) error
}

type DocInfo Uses

type DocInfo struct {
    Name        string // Package name
    Package     string // Package path
    Author      string
    LastUpdated time.Time
    StarCount   int
    Synopsis    string
    Description string
    ProjectURL  string
    ReadmeFn    string
    ReadmeData  string
    Imports     []string
    TestImports []string
    Exported    []string // exported tokens(funcs/types)
}

DocInfo is the information stored in backend docDB

func (*DocInfo) ReadFrom Uses

func (d *DocInfo) ReadFrom(r sophie.Reader, l int) error

func (*DocInfo) WriteTo Uses

func (d *DocInfo) WriteTo(w sophie.Writer) error

type HitInfo Uses

type HitInfo struct {
    DocInfo

    Imported    []string
    ImportedLen int

    TestImported    []string
    TestImportedLen int

    ImportantSentences []string

    AssignedStarCount float64
    StaticScore       float64
    TestStaticScore   float64
    StaticRank        int // zero-based
}

HitInfo is the information provided to frontend

type MemDB Uses

type MemDB struct {
    sync.RWMutex
    // contains filtered or unexported fields
}

func NewMemDB Uses

func NewMemDB(root villa.Path, kind string) *MemDB

func (*MemDB) Count Uses

func (mdb *MemDB) Count() int

Count returns the number of entries in the DB

func (*MemDB) Delete Uses

func (mdb *MemDB) Delete(key string)

func (*MemDB) Export Uses

func (mdb *MemDB) Export(root villa.Path, kind string) error

Export saves the data to some space, but not affecting the modified property.

func (*MemDB) Get Uses

func (mdb *MemDB) Get(key string, data interface{}) bool

Get fetches an entry of specified key. data is a pointer. Return false if not exists

func (*MemDB) Iterate Uses

func (mdb *MemDB) Iterate(output func(key string, val interface{}) error) error

func (*MemDB) LastModified Uses

func (mdb *MemDB) LastModified() time.Time

func (*MemDB) Load Uses

func (mdb *MemDB) Load() error

func (*MemDB) Modified Uses

func (mdb *MemDB) Modified() bool

func (*MemDB) Put Uses

func (mdb *MemDB) Put(key string, data interface{})

func (*MemDB) Sync Uses

func (mdb *MemDB) Sync() error

type NewDocAction Uses

type NewDocAction struct {
    Action sophie.VInt
    DocInfo
}

* If Action equals NDA_DEL, DocInfo is undefined.

func (*NewDocAction) ReadFrom Uses

func (nda *NewDocAction) ReadFrom(r sophie.Reader, l int) error

func (*NewDocAction) WriteTo Uses

func (nda *NewDocAction) WriteTo(w sophie.Writer) error

type Package Uses

type Package struct {
    Package     string
    Name        string
    Synopsis    string
    Doc         string
    ProjectURL  string
    StarCount   int
    ReadmeFn    string
    ReadmeData  string
    Imports     []string
    TestImports []string
    Exported    []string // exported tokens(funcs/types)

    References []string
    Etag       string
}

Package stores information from crawler

func CrawlPackage Uses

func CrawlPackage(httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*sppb.FolderInfo, err error)

type PackedDocDB Uses

type PackedDocDB struct {
    *MemDB
}

func (PackedDocDB) Get Uses

func (db PackedDocDB) Get(key string, data interface{}) bool

func (PackedDocDB) Iterate Uses

func (db PackedDocDB) Iterate(
    output func(key string, val interface{}) error) error

func (PackedDocDB) Put Uses

func (db PackedDocDB) Put(key string, data interface{})

type Person Uses

type Person struct {
    Id       string
    Packages []string
}

func CrawlPerson Uses

func CrawlPerson(httpClient doc.HttpClient, id string) (*Person, error)

type TokenIndexer Uses

type TokenIndexer struct {
    index.TokenIndexer

    sync.RWMutex
    // contains filtered or unexported fields
}

TokenIndexer is thread-safe.

func NewTokenIndexer Uses

func NewTokenIndexer(root villa.Path, kind string) *TokenIndexer

func (*TokenIndexer) Export Uses

func (ti *TokenIndexer) Export(root villa.Path, kind string) error

func (*TokenIndexer) IdsOfToken Uses

func (ti *TokenIndexer) IdsOfToken(token string) []string

func (*TokenIndexer) LastModified Uses

func (ti *TokenIndexer) LastModified() time.Time

func (*TokenIndexer) Load Uses

func (ti *TokenIndexer) Load() error

func (*TokenIndexer) Modified Uses

func (ti *TokenIndexer) Modified() bool

func (*TokenIndexer) Put Uses

func (ti *TokenIndexer) Put(id string, tokens stringsp.Set)

func (*TokenIndexer) Sync Uses

func (ti *TokenIndexer) Sync() error

func (*TokenIndexer) TokensOfId Uses

func (ti *TokenIndexer) TokensOfId(id string) []string

Directories

PathSynopsis
configsPackage configs define and load all configurations.
proto/spiderPackage sppb is a generated protocol buffer package.
proto/storePackage stpb is a generated protocol buffer package.
serverGCSE HTTP server.
spider
spider/github
spider/godocorg
storePackage store handlings all the storage in GCSE backend.
utils

Package gcse imports 44 packages (graph) and is imported by 58 packages. Updated 2016-11-03. Refresh now. Tools for package owners.