sub_timeline_fixer

package
v0.55.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 1, 2023 License: MIT Imports: 27 Imported by: 0

Documentation

Index

Constants

View Source
const BackUpExt = ".csf-bk"
View Source
const DefaultMaxOffsetSeconds = 120
View Source
const MaxFramerateRatio = 1.1
View Source
const MinFramerateRatio = 0.9
View Source
const SampleRate = 100
View Source
const TmpExt = ".csf-tmp"

Variables

View Source
var CnStopWords = []string{}/* 842 elements not displayed */
View Source
var EnStopWords = []string{} /* 318 elements not displayed */
View Source
var FramerateRatios = []float64{24. / 23.976, 25. / 23.976, 25. / 24.}

Functions

func NewTFIDF

func NewTFIDF(testCorpus []string) (*nlp.Pipeline, mat.Matrix, error)

NewTFIDF 初始化 TF-IDF

func Restore

func Restore(log *logrus.Logger, movieDirs, seriesDirs []string) (int, error)

Restore 从备份还原自动校正的字幕文件

func SaveStaticLineV1

func SaveStaticLineV1(saveFPath string, infoBaseName, infoSrcName string,
	per, oldMean, OldSd, NewMean, NewSd float64, xAxis []string,
	startDiffTimeLineData, endDiffTimeLineData []opts.LineData) error

func SaveStaticLineV2

func SaveStaticLineV2(name, saveFPath string, xAxis []string, timeLineOrgData []opts.LineData) error

func SaveStaticLineV3

func SaveStaticLineV3(name, saveFPath string, xAxis []string, timeLineOrgData, fftData []opts.LineData) error

Types

type CalcOffsetTimeData added in v0.55.3

type CalcOffsetTimeData struct {
	PerValue float64
	Wg       *sync.WaitGroup // 并发锁
}

type FFTAligner

type FFTAligner struct {
	// contains filtered or unexported fields
}

复现 https://github.com/smacke/ffsubsync 的 FFTAligner 算法

func NewFFTAligner

func NewFFTAligner(maxOffsetSeconds, sampleRate int) *FFTAligner

func (FFTAligner) Fit

func (f FFTAligner) Fit(refFloats, subFloats []float64) (int, float64)

Fit 给出最佳的偏移,还需要根据实际情况进行转换(比如,1 步 是 10 ms),输入的数组只能是 1 -1 这样的值,需要在外部做好归一化

type FixResult

type FixResult struct {
	StartVADIndex    int
	EndVADIndex      int
	OldMean          float64
	OldSD            float64
	NewMean          float64
	NewSD            float64
	Per              float64           // 占比
	IsOverParts      bool              // 是否有越接处
	MatchWindowInfos []MatchWindowInfo // 需要从 MatchInfo 的 IndexMatchWindowInfoMap 中按顺序提取
}

func (FixResult) InRange

func (f FixResult) InRange(baseTimeDouble, timeStartDouble float64) (bool, float64)

type InputData

type InputData struct {
	Index            int                // 为了让并发处理的数据能够按顺序重新排序
	BaseUnit         sub_helper.SubUnit // 基准 VAD
	BaseAudioVADList []float64          // 基准 VAD
	SrcUnit          sub_helper.SubUnit // 需要匹配的 VAD
	OffsetIndex      int                // 滑动窗体的移动偏移索引
	Wg               *sync.WaitGroup    // 并发锁
}

InputData 修复函数传入多线程的数据结构

type MatchIndex

type MatchIndex struct {
	BaseNowIndex int
	SrcNowIndex  int
	Similarity   float64
}

type MatchInfo

type MatchInfo struct {
	IndexMatchWindowInfoMap map[int]MatchWindowInfo // 匹配列表的顺序列表
	StartDiffTimeList       []float64
	StartDiffTimeMap        *treemap.Map
	StartDiffTimeListEx     stat.Float64Slice
}

MatchInfo 匹配的信息

type MatchWindowInfo

type MatchWindowInfo struct {
	TimeDiffStartCorrelation float64 // 对白开始的时间偏移
	StartVADIndex            int
	EndVADIndex              int
	OP                       OverParts // 越接处信息
}

type OverParts

type OverParts struct {
	XLen  float64 // 分段处长度
	YLen  float64 // 分段处长度
	XMean float64 // X 段的 Mean 值
	YMean float64 // Y 段的 Mean 值
}

OverParts 总长度 D = XLen + YLen

type PipeResult

type PipeResult struct {
	BestOffset     int
	ScaleFactor    float64
	ScaledFileInfo *subparser.FileInfo
	SkipPerBase    float64
	// contains filtered or unexported fields
}

func (PipeResult) GetOffsetTime

func (p PipeResult) GetOffsetTime() float64

GetOffsetTime 从偏移得到偏移时间

func (PipeResult) Score

func (p PipeResult) Score() float64

type PipeResults

type PipeResults []PipeResult

func (PipeResults) Len

func (d PipeResults) Len() int

func (PipeResults) Less

func (d PipeResults) Less(i, j int) bool

func (PipeResults) Swap

func (d PipeResults) Swap(i, j int)

type Pipeline

type Pipeline struct {
	MaxOffsetSeconds int
	// contains filtered or unexported fields
}

func NewPipeline

func NewPipeline(maxOffsetSeconds int) *Pipeline

func (*Pipeline) CalcOffsetTime

func (p *Pipeline) CalcOffsetTime(infoBase, infoSrc *subparser.FileInfo, audioVadList []vad.VADInfo, useGSS bool, skipFrontAndEndPerBase float64) (PipeResult, error)

func (*Pipeline) CalcOffsetTimeEx added in v0.55.3

func (p *Pipeline) CalcOffsetTimeEx(infoBase, infoSrc *subparser.FileInfo, audioVadList []vad.VADInfo, useGSS bool, threadCount int) (PipeResult, error)

CalcOffsetTimeEx 进行并发计算获取最佳的偏移位置

func (*Pipeline) FixSubFileTimeline

func (p *Pipeline) FixSubFileTimeline(infoSrc, scaledInfoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error)

FixSubFileTimeline 这里传入的 scaledInfoSrc 是从 pipeResults 筛选出来的最大分数的 FileInfo infoSrc 是从源文件读取出来的,这样才能正确匹配 Content 中的时间戳

type StopWordsPair

type StopWordsPair struct {
	Name  string
	Count int
}

type StopWordsPairList

type StopWordsPairList []StopWordsPair

func (StopWordsPairList) Len

func (a StopWordsPairList) Len() int

func (StopWordsPairList) Less

func (a StopWordsPairList) Less(i, j int) bool

func (StopWordsPairList) Swap

func (a StopWordsPairList) Swap(i, j int)

type SubCompare

type SubCompare struct {
	// contains filtered or unexported fields
}

func NewSubCompare

func NewSubCompare(maxCompareDialogue int) *SubCompare

func (*SubCompare) Add

func (s *SubCompare) Add(baseNowIndex, srcNowIndex int) bool

Add 添加元素进来比较,这里有个细节,如果理论上需要判断是 OffsetIndex 是 1-5 ,那么如果 1 add了,2 add 失败的时候,是应该清理后再 add 2 还有一种情况,从 1-5,添加到 4 的时候false了,那么应该回退到 2 进行 add,而不是从 4 开始

func (*SubCompare) Check

func (s *SubCompare) Check() bool

Check 是否 Add 的元素已经足够满足 maxCompareDialogue 的数量要求了 这里有个细节,如果理论上需要判断是 OffsetIndex 是 1-5 ,如果 add 5 check 的时候 false,那么应该清理后,回退到 2 进行 add,而不是 6 开始

func (*SubCompare) Clear

func (s *SubCompare) Clear()

func (*SubCompare) GetStartIndex

func (s *SubCompare) GetStartIndex() (int, int)

type SubFixInfo

type SubFixInfo struct {
	FixContent string // 修复后的内容
	FileName   string // 字幕的名称,包含后缀名
}

func NewSubFixInfo

func NewSubFixInfo(fileName, fixContent string) *SubFixInfo

type SubVADBlockInfo

type SubVADBlockInfo struct {
	Index      int
	StartIndex int
	EndIndex   int
}

SubVADBlockInfo 字幕分块信息

type WindowInfo

type WindowInfo struct {
	BaseAudioFloatList []float64           // 基准 VAD
	BaseUnit           *sub_helper.SubUnit // 基准 VAD
	SrcUnit            *sub_helper.SubUnit // 需要匹配的 VAD
	MatchedTimes       int                 // 匹配上的次数
	SrcWindowLen       int                 // 滑动窗体长度
	SrcSlideStartIndex int                 // 滑动起始索引
	SrcSlideLen        int                 // 滑动距离
	OneStep            int                 // 每次滑动的长度
}

WindowInfo 滑动窗体信息

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL