types

package
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 3, 2024 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ByKey added in v0.1.1

type ByKey []*KeyValue

for sorting by key.

func (ByKey) Len added in v0.1.1

func (a ByKey) Len() int

for sorting by key.

func (ByKey) Less added in v0.1.1

func (a ByKey) Less(i, j int) bool

func (ByKey) Swap added in v0.1.1

func (a ByKey) Swap(i, j int)

type Data2Starrocks added in v0.1.1

type Data2Starrocks struct {
	DocId int64   `json:"doc_id"`
	Url   string  `json:"url"`
	Title string  `json:"title"`
	Desc  string  `json:"desc"`
	Score float64 `json:"score"` // 质量分
}

type DictTireTree added in v0.1.1

type DictTireTree struct {
	Value string `json:"value"`
	Score int64  `json:"score"`
}

type Document added in v0.1.1

type Document struct {
	DocId int64  `json:"doc_id"`
	Title string `json:"title"`
	Body  string `json:"body"`
}

Document 文档格式

func (Document) MarshalEasyJSON added in v0.1.1

func (v Document) MarshalEasyJSON(w *jwriter.Writer)

MarshalEasyJSON supports easyjson.Marshaler interface

func (Document) MarshalJSON added in v0.1.1

func (v Document) MarshalJSON() ([]byte, error)

MarshalJSON supports json.Marshaler interface

func (*Document) UnmarshalEasyJSON added in v0.1.1

func (v *Document) UnmarshalEasyJSON(l *jlexer.Lexer)

UnmarshalEasyJSON supports easyjson.Unmarshaler interface

func (*Document) UnmarshalJSON added in v0.1.1

func (v *Document) UnmarshalJSON(data []byte) error

UnmarshalJSON supports json.Unmarshaler interface

type InputDataList added in v0.1.1

type InputDataList struct {
	DocId int64   `json:"doc_id"`
	Title string  `json:"title"`
	Url   string  `json:"url"`
	Body  string  `json:"body"`
	Score float64 `json:"score"`
}

type InvertedIndexValue added in v0.1.1

type InvertedIndexValue struct {
	Token         string        `json:"token"`
	PostingsList  *PostingsList `json:"postings_list"`
	DocCount      int64         `json:"doc_count"`
	PositionCount int64         `json:"position_count"` // 查询使用,写入的时候暂时不用
	TermValues    *TermValue    `json:"term_values"`
}

InvertedIndexValue 倒排索引

type InvertedInfo added in v0.1.1

type InvertedInfo struct {
	Token  string          `json:"token"`
	DocIds *roaring.Bitmap `json:"doc_ids"`
}

type KeyValue added in v0.1.1

type KeyValue struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

type MapReduceTask added in v0.1.1

type MapReduceTask struct {
	Input         string   `json:"input"`         // 输入的文件
	TaskState     State    `json:"task_state"`    // 状态
	NReducer      int      `json:"n_reducer"`     // reducer 数量
	TaskNumber    int      `json:"task_number"`   // 任务数量
	Intermediates []string `json:"intermediates"` // map 之后的文件存储地址
	Output        string   `json:"output"`        // output的输出地址
}

type MasterTask added in v0.1.1

type MasterTask struct {
	TaskStatus    MasterTaskStatus
	StartTime     time.Time
	TaskReference *MapReduceTask
}

type MasterTaskStatus added in v0.1.1

type MasterTaskStatus int
const (
	Idle       MasterTaskStatus = iota + 1 // 未开始
	InProgress                             // 进行中
	Completed                              // 已完成
)

type PostingsList added in v0.1.1

type PostingsList struct {
	Term      string          `json:"term"`
	Position  []int64         `json:"position"`   // 位置。为了标红
	TermCount int64           `json:"term_count"` // 个数,为了排序计算,这个词在文档中越多就可能越重要
	DocIds    *roaring.Bitmap `json:"doc_ids"`
}

type SearchItem added in v0.1.1

type SearchItem struct {
	DocId        int64   `json:"doc_id"`
	Content      string  `json:"content"`
	Title        string  `json:"title"`
	Score        float64 `json:"score"`         // 这个词对于这篇文章的评分,也就是这个词到底重不重要
	DocCount     int64   `json:"doc_count"`     // 这个词在文中出现了多少次
	ContentScore float64 `json:"content_score"` // 这篇文章的评分
}

SearchItem 查询结果

type SearchItemList added in v0.1.1

type SearchItemList []*SearchItem

func (SearchItemList) Len added in v0.1.1

func (ds SearchItemList) Len() int

func (SearchItemList) Less added in v0.1.1

func (ds SearchItemList) Less(i, j int) bool

func (SearchItemList) Swap added in v0.1.1

func (ds SearchItemList) Swap(i, j int)

type State added in v0.1.1

type State int
const (
	Map State = iota + 1
	Reduce
	Exit
	Wait
)

type Task added in v0.1.1

type Task struct {
	Columns    []string `json:"columns"`
	BiTable    string   `json:"bi_table"`
	SourceType int      `json:"source_type"` // 来源 1 爬虫 2 csv导入
}

type TermValue added in v0.1.1

type TermValue struct {
	DocCount int64 `json:"doc_count"`
	Offset   int64 `json:"offset"`
	Size     int64 `json:"size"`
}

type Tokenization added in v0.1.1

type Tokenization struct {
	Token string // 词条
	// Position int64  // 词条在文本的位置 // TODO 后面再补上
	// Offset   int64  // 偏移量
	DocId int64
}

Tokenization 分词返回结构

type UserTokenData

type UserTokenData struct {
	User         interface{} `json:"user"`
	AccessToken  string      `json:"access_token"`
	RefreshToken string      `json:"refresh_token"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL