magictext

package module
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 8, 2023 License: MIT Imports: 16 Imported by: 0

README

magictext

Generate a high-quality summary of a given text input.

TODO

  • Support goroutines to improve performance
  • Generate separate levels of summary and write to different files
  • Support explicitly set OpenAI APIKey
  • Generate parent summary with fixed number of chunks
  • Calculate tokens with tiktoken-go
  • Add retry logic for goroutines
  • fix token regex performance issues
  • remove video header and tail from rst file
  • return the error instance, don't call log's fatal function directly
  • Write project documentation

Documentation

Index

Constants

View Source
const (
	MaxReqTokens512  = 512
	MaxReqTokens2048 = 2048
)
View Source
const (
	MaxRetryTimes = 3
	SleepSeconds  = 3
)
View Source
const (
	DefaultPrompt = "`reset` `no quotes` `no explanations` `no prompt` `no self-reference` `no apologies` `no filler` `just answer` "

	GenerateSummaryPrompt = DefaultPrompt + `
I will give you text content, you will rewrite it and output that in a short summarized version of my text. Keep the meaning the same. Ensure that the revised content has fewer characters than the original text and no more than 150 Chinese words.

Only give me the output and nothing else. Now, using the concepts above, summarize the following text. Respond in Chinese language.

[text]

%s

[output]
`

	GenerateSummaryPromptWithTopic = DefaultPrompt + `
I will give you text content, you will rewrite it and output that in a short summarized version of my text. Keep the meaning the same. Ensure that the revised content has fewer characters than the original text and no more than 150 Chinese words.

When generating text summaries, expand around the following topics as much as possible: %s` + `

Only give me the output and nothing else. Now, using the concepts above, summarize the following text. Respond in Chinese language.

[text]

%s

[output]
`

	GenerateTitlePrompt = DefaultPrompt + `
Create a title for the paragraph below. The title should be concise and to the point. The number of characters should not exceed 15 Chinese characters. This title will be used as the title of the video. Respond in Chinese language.

[text]

%s
	
[output]
`

	ExtractNounsPrompt = DefaultPrompt + `
Find all user names, company names, product names, course names, and book names from the following text, and output them in the json format. Respond in Chinese language.

[output format]
{
	"usernames": [],
	"company_names": [],
	"product_names": [],
	"course_names": [],
	"book_names": [],
}

[text]

%s

[output]
`
)
View Source
const (
	MaxChunksPerGroup = 3 // TODO: this variable should be calculated dynamically
	MaxConcurrent     = 5
)

Variables

View Source
var (
	Debug        = false
	MockOpenAI   = false
	OpenAIClient *openai.Client
	TikToken     *tiktoken.Tiktoken
)

Functions

func CountTokens

func CountTokens(text string) int

func DumpChunksToJSON

func DumpChunksToJSON(filename string, chunks interface{}) error

DumpChunksToJSON writes chunk slice to the given file.

func DumpSummary

func DumpSummary(filename, summary string, captionSummaries []*CaptionSummary) error

func ExtractNouns

func ExtractNouns(text string) (string, error)

ExtractNouns extracts nouns from a string, the max length of input text is 2048, the output is a json string, see following example for more information.

Output string:

{
   "usernames": ["吴三桂", "皇太极", "弘历"],
   "company_names": ["得到"],
   "product_names": [],
   "course_names": ["硅谷来信"],
   "book_names": ["万历十五年", "湘行散记", "货币未来"]
}

func GenerateTitle

func GenerateTitle(text string) (string, error)

GenerateTitle generates a title for the given text, the max length of input text is 512.

func SplitText

func SplitText(text string, chunkSize, chunkOverlap int) ([]string, error)

Types

type CaptionChunk

type CaptionChunk struct {
	From time.Time `json:"from"`
	To   time.Time `json:"to"`
	TextChunk
}

func NewCaptionChunk

func NewCaptionChunk(seq int, text string, from, to time.Time) *CaptionChunk

func SplitSubtitle

func SplitSubtitle(subtitle subtitles.Subtitle) ([]*CaptionChunk, error)

func (CaptionChunk) String

func (c CaptionChunk) String() string

type CaptionSummary added in v0.1.1

type CaptionSummary struct {
	From time.Time
	To   time.Time
	Summary
}

func GenerateSummaryBySubtitle

func GenerateSummaryBySubtitle(topic string, subtitle subtitles.Subtitle) ([]*CaptionSummary, string, error)

GenerateSummaryBySubtitle generates a summary for the given subtitles

func (CaptionSummary) FromInSeconds added in v0.1.3

func (cs CaptionSummary) FromInSeconds() int

func (CaptionSummary) FromInString added in v0.1.3

func (cs CaptionSummary) FromInString() string

func (CaptionSummary) ToInSeconds added in v0.1.3

func (cs CaptionSummary) ToInSeconds() int

func (CaptionSummary) ToInString added in v0.1.3

func (cs CaptionSummary) ToInString() string

type Chunk

type Chunk struct {
	ID       string     `json:"id"`
	Height   int        `json:"height"`
	Seq      int        `json:"seq"`
	Text     string     `json:"text"`
	Tokens   int        `json:"tokens"`
	Children ChunkSlice `json:"children"`
}

func NewChunk

func NewChunk(seq int, text string) *Chunk

func (*Chunk) String

func (c *Chunk) String() string

type ChunkSlice

type ChunkSlice []*Chunk

func (ChunkSlice) String

func (cs ChunkSlice) String() string

func (ChunkSlice) Text added in v0.1.1

func (cs ChunkSlice) Text() string

func (ChunkSlice) Tokens

func (cs ChunkSlice) Tokens() int

type Summary

type Summary struct {
	ID   string
	Seq  int
	Text string
}

type TextChunk

type TextChunk struct {
	ID     string `json:"id"`
	Seq    int    `json:"seq"`
	Text   string `json:"text"`
	Tokens int    `json:"tokens"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL