dm

package

v0.0.0-...-3b7a742 Latest Latest Go to latest Published: May 25, 2015 License: MIT Imports: 14 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/jamessynge/diffmerge

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
func BlockPairsAreNeighbors(p, o *BlockPair) bool
func BlockPairsAreSameType(p, o *BlockPair) bool
func BlockPairsLess(p, o *BlockPair) bool
func ComputeIsProbablyCommon(normalizedLine []byte) bool
func DigitCount(i int) int
func FileRangeIsEmpty(p FileRange) bool
func FileRangeLength(p FileRange) int
func FindGapsInRangePair(frp FileRangePair, blockPairs BlockPairs) (aRanges []FileRange, bRanges []FileRange)
func FindMiddleAndSharedEnds(frp FileRangePair, config DifferencerConfig) *middleAndSharedEnds
func FormatInterleaved(pairs []*BlockPair, aIsPrimary bool, aFile, bFile *File, w io.Writer, ...) error
func FormatLineNum(i, maxDigits int) string
func FormatSideBySide(aFile, bFile *File, pairs []*BlockPair, aIsPrimary bool, w io.Writer, ...)
func FormatSideBySideToString(aFile, bFile *File, pairs []*BlockPair, aIsPrimary bool, ...) string
func GetLPHash(lp LinePos) uint32
func GetLPNormalizedHash(lp LinePos) uint32
func IsSentinal(p *BlockPair) bool
func MakeBlockPairAdjacencies(blockPairs BlockPairs) (adjacencies map[*BlockPair]*BlockPairAdjacency)
func MatchCommonEnds(aRange, bRange FileRange, prefix, suffix, normalized bool) (aRest, bRest FileRange, pairs []*BlockPair)
func MatchCommonPrefix(aRange, bRange FileRange, normalized bool) (aRemaining, bRemaining FileRange, commonPrefix *BlockPair)
func MatchCommonSuffix(aRange, bRange FileRange, normalized bool) (aRemaining, bRemaining FileRange, commonSuffix *BlockPair)
func MaxFloat32(u float32, vs ...float32) float32
func MaxInt(i, j int) int
func MinFloat32(u float32, vs ...float32) float32
func MinInt(i, j int) int
func PerformLCS(fileRangePair FileRangePair, config DifferencerConfig, sf SimilarityFactors) *lcsOfFileRangePair
func SelectAllBlockPairs(pair *BlockPair) bool
func SelectHashGetter(normalized bool) func(lp LinePos) uint32
func SortBlockMatchesByAIndex(a []BlockMatch)
func SortBlockMatchesByBIndex(a []BlockMatch)
func SortBlockPairsByAIndex(a []*BlockPair)
func SortBlockPairsByBIndex(a []*BlockPair)
func SortIndexPairsByIndex1(a []IndexPair)
func SortIndexPairsByIndex2(a []IndexPair)
type BlockMatch
- func BasicTichyMaximalBlockMoves(aLines, bLines []LinePos, getHash func(lp LinePos) uint32) []BlockMatch
type BlockMatchByAIndex
- func (a BlockMatchByAIndex) Len() int
- func (a BlockMatchByAIndex) Less(i, j int) bool
- func (a BlockMatchByAIndex) Swap(i, j int)
type BlockMatchByBIndex
- func (a BlockMatchByBIndex) Len() int
- func (a BlockMatchByBIndex) Less(i, j int) bool
- func (a BlockMatchByBIndex) Swap(i, j int)
type BlockPair
- func CombineBlockPairs(sortedInput []*BlockPair) (output []*BlockPair)
- func FillGapsWithEasyMatches(frp FileRangePair, blockPairs []*BlockPair) (filledGaps []*BlockPair)
- func MatchingRangePairOffsetsToBlockPairs(frp FileRangePair, matchingOffsets []IndexPair, matchedNormalizedLines bool, ...) (blockPairs []*BlockPair)
- func PerformDiff(aFile, bFile *File, config DifferencerConfig) (pairs []*BlockPair)
- func PerformDiff2(aFile, bFile *File, config DifferencerConfig) (pairs []*BlockPair)
- func WeightedLCSBlockPairsOfRangePair(pair FileRangePair, sf SimilarityFactors) (blockPairs []*BlockPair, score float32)
- func (p *BlockPair) ABeyond() int
- func (p *BlockPair) BBeyond() int
- func (p *BlockPair) IsSentinal() bool
type BlockPairAdjacency
type BlockPairByAIndex
- func (a BlockPairByAIndex) Len() int
- func (a BlockPairByAIndex) Less(i, j int) bool
- func (a BlockPairByAIndex) Swap(i, j int)
type BlockPairByBIndex
- func (a BlockPairByBIndex) Len() int
- func (a BlockPairByBIndex) Less(i, j int) bool
- func (a BlockPairByBIndex) Swap(i, j int)
type BlockPairs
- func ExtendMatchesBackward(filePair FilePair, inputPairs BlockPairs) (outputPairs BlockPairs)
- func ExtendMatchesForward(filePair FilePair, inputPairs BlockPairs) (outputPairs BlockPairs)
- func FillRemainingBGapsWithMismatches(filePair FilePair, inputPairs BlockPairs) (outputPairs BlockPairs)
- func PerformMoveDetectionInGaps(frp FileRangePair, blockPairs BlockPairs, config DifferencerConfig, ...) (outputBlockPairs BlockPairs)
- func PerformSmallEditDetectionInGaps(frp FileRangePair, blockPairs BlockPairs, config DifferencerConfig) (outputBlockPairs BlockPairs)
- func (s BlockPairs) AssignMoveId()
- func (s BlockPairs) CountLinesInPairs() (numALines, numBLines int)
- func (s BlockPairs) IsInStrictOrder() bool
- func (s BlockPairs) Len() int
- func (s BlockPairs) LimitIndexPairs() (limitsInA, limitsInB IndexPair)
- func (s BlockPairs) MakeReverseIndex() (pair2Index map[*BlockPair]int)
- func (s BlockPairs) Swap(i, j int)
type Diff3Triple
type Diff3TripleType
type Diff3Triples
- func PerformDiff3(yours, base, theirs *File, b2yPairs, b2tPairs BlockPairs, ...) (triples Diff3Triples, conflictsExist bool)
type DifferencerConfig
- func (p *DifferencerConfig) CreateFlags(f *flag.FlagSet)
type File
- func ReadFile(name string) (*File, error)
- func (p *File) BriefDebugString() string
- func (p *File) GetFullRange() FileRange
- func (p *File) GetHashOfLine(n int) uint32
- func (p *File) GetLineBytes(n int) []byte
- func (p *File) GetNormalizedHashOfLine(n int) uint32
- func (p *File) GetUnindentedLineBytes(n int) []byte
- func (p *File) LineCount() int
- func (p *File) MakeSubRange(start, length int) FileRange
- func (p *File) Select(fn func(lp LinePos) bool) []LinePos
type FilePair
- func MakeFilePair(aFile, bFile *File) FilePair
type FileRange
- func CreateFileRange(file *File, start, length int) FileRange
type FileRangePair
type Float32UnaryFunction
type FourIndices
type GeneralizedLogisticFunction
- func MakeSymmetricLogisticFunction(inputLo, inputHi, outputLo, outputHi float64) *GeneralizedLogisticFunction
- func (p *GeneralizedLogisticFunction) Compute(input float64) float64
type GetIntervalFn
- func MakeGetAInterval(selector SelectBlockPairFn) GetIntervalFn
- func MakeGetBInterval(selector SelectBlockPairFn) GetIntervalFn
type HashPositions
- func (m HashPositions) CopyMap() HashPositions
type IndexPair
- func WeightedLCS(aLength, bLength int, getSimilarity func(aIndex, bIndex int) float32) (result []IndexPair, score float32)
- func WeightedLCSOffsetsOfRangePair(pair FileRangePair, sf SimilarityFactors) (lcsOffsetPairs []IndexPair, score float32)
type IndexPairByIndex1
- func (a IndexPairByIndex1) Len() int
- func (a IndexPairByIndex1) Less(i, j int) bool
- func (a IndexPairByIndex1) Swap(i, j int)
type IndexPairByIndex2
- func (a IndexPairByIndex2) Len() int
- func (a IndexPairByIndex2) Less(i, j int) bool
- func (a IndexPairByIndex2) Swap(i, j int)
type IntervalSet
- func AIndexBlockPairsToIntervalSet(blockPairs BlockPairs, selector func(pair *BlockPair) bool) IntervalSet
- func BIndexBlockPairsToIntervalSet(blockPairs BlockPairs, selector func(pair *BlockPair) bool) IntervalSet
- func BlockPairsToIntervalSet(blockPairs BlockPairs, getInterval GetIntervalFn) IntervalSet
- func MakeIntervalSet() IntervalSet
type LeadingWhitespaceStatistics
- func MeasureLeadingWhitespace(files ...*File) (stats LeadingWhitespaceStatistics)
- func (stats *LeadingWhitespaceStatistics) AddFile(file *File)
- func (stats *LeadingWhitespaceStatistics) ComputeFractions()
- func (stats *LeadingWhitespaceStatistics) Totals() (totalLeadingTabs, totalLeadingSpaces, totalLeadingSpacesAfterTab uint64)
type LineHasher
- func GetLineHasher() LineHasher
type LinePos
- func FindRareLinesInRanges(aRange, bRange FileRange, normalizedMatch, sameCount, omitProbablyCommon bool, ...) (aRareLines, bRareLines []LinePos)
- func (p *LinePos) ValidLeadingWhiteSpace() bool
type MatchCommonXFunc
type MoveCandidate
- func MakeMoveCandidate(aRange, bRange FileRange, pairs []*BlockPair) *MoveCandidate
- func (p *MoveCandidate) AExtent() int
- func (p *MoveCandidate) ExtendPairs(state *diffState)
type MoveCandidate2
- func (p *MoveCandidate2) AExtent() int
- func (p *MoveCandidate2) BExtent() int
- func (p *MoveCandidate2) SetScore()
type MoveCandidate2s
- func (v MoveCandidate2s) Len() int
- func (v MoveCandidate2s) Less(i, j int) bool
- func (v MoveCandidate2s) SetScores()
- func (v MoveCandidate2s) Swap(i, j int)
type MoveCandidates
- func (v MoveCandidates) Len() int
- func (v MoveCandidates) Less(i, j int) bool
- func (v MoveCandidates) Swap(i, j int)
type SelectBlockPairFn
type SharedEndsData
- func (p *SharedEndsData) GetPrefixAndSuffixLengths(rareEndsOnly bool) (prefixLength, suffixLength int)
- func (p *SharedEndsData) HasPrefixOrSuffix() bool
- func (p *SharedEndsData) HasRarePrefixOrSuffix() bool
- func (p *SharedEndsData) PrefixAndSuffixOverlap(rareEndsOnly bool) bool
type SharedEndsKey
type SideBySideConfig
type SimilarityFactors
- func (s *SimilarityFactors) SimilarityOfRangeLines(pair FileRangePair, aOffset, bOffset int) float32

Constants ¶

This section is empty.

Variables ¶

View Source

var DefaultSideBySideConfig = SideBySideConfig{
	DisplayColumns:       80,
	DisplayLineNumbers:   true,
	WrapLongLines:        true,
	SpacesPerTab:         8,
	ContextLines:         3,
	ZeroBasedLineNumbers: false,
}

View Source

var TO_BE_DELETED = glog.CopyStandardLogTo

Functions ¶

func BlockPairsAreNeighbors ¶

func BlockPairsAreNeighbors(p, o *BlockPair) bool

Is p immediately before o, in both A and B.

func BlockPairsAreSameType ¶

func BlockPairsAreSameType(p, o *BlockPair) bool

func ComputeIsProbablyCommon ¶

func ComputeIsProbablyCommon(normalizedLine []byte) bool

func DigitCount ¶

func DigitCount(i int) int

func FileRangeIsEmpty ¶

func FileRangeIsEmpty(p FileRange) bool

func FileRangeLength ¶

func FileRangeLength(p FileRange) int

func FindGapsInRangePair ¶

func FindGapsInRangePair(
	frp FileRangePair, blockPairs BlockPairs) (aRanges []FileRange, bRanges []FileRange)

Creates two slices, aRanges and bRanges, each the same length. For each index N in the slices, at most one of the two slices will have a nil value.

func FindMiddleAndSharedEnds ¶

func FindMiddleAndSharedEnds(frp FileRangePair, config DifferencerConfig) *middleAndSharedEnds

func FormatInterleaved ¶

func FormatInterleaved(pairs []*BlockPair, aIsPrimary bool, aFile, bFile *File,
	w io.Writer, printLineNumbers bool) error

func FormatLineNum ¶

func FormatLineNum(i, maxDigits int) string

func FormatSideBySide ¶

func FormatSideBySide(aFile, bFile *File, pairs []*BlockPair, aIsPrimary bool,
	w io.Writer, config SideBySideConfig)

func FormatSideBySideToString ¶

func FormatSideBySideToString(aFile, bFile *File, pairs []*BlockPair,
	aIsPrimary bool, config SideBySideConfig) string

func GetLPHash ¶

func GetLPHash(lp LinePos) uint32

func GetLPNormalizedHash ¶

func GetLPNormalizedHash(lp LinePos) uint32

func IsSentinal ¶

func IsSentinal(p *BlockPair) bool

func MakeBlockPairAdjacencies ¶

func MakeBlockPairAdjacencies(blockPairs BlockPairs) (
	adjacencies map[*BlockPair]*BlockPairAdjacency)

func MatchCommonEnds ¶

func MatchCommonEnds(aRange, bRange FileRange, prefix, suffix, normalized bool) (
	aRest, bRest FileRange, pairs []*BlockPair)

func MatchCommonPrefix ¶

func MatchCommonPrefix(aRange, bRange FileRange, normalized bool) (
	aRemaining, bRemaining FileRange, commonPrefix *BlockPair)

Find all lines at the start that are the same (the common prefix). Produces at most one match; if normalized==true, then that match may contain both full and normalized line matches (separating those will need to happen elsewhere).

func MatchCommonSuffix ¶

func MatchCommonSuffix(aRange, bRange FileRange, normalized bool) (
	aRemaining, bRemaining FileRange, commonSuffix *BlockPair)

Find all lines at the end that are the same (the common suffix). Produces at most one match; if normalized==true, then that match may contain both full and normalized line matches (separating those will need to happen elsewhere).

func MaxFloat32 ¶

func MaxFloat32(u float32, vs ...float32) float32

func MaxInt ¶

func MaxInt(i, j int) int

func MinFloat32 ¶

func MinFloat32(u float32, vs ...float32) float32

func MinInt ¶

func MinInt(i, j int) int

func PerformLCS ¶

func PerformLCS(fileRangePair FileRangePair, config DifferencerConfig, sf SimilarityFactors) *lcsOfFileRangePair

Compute Longest Common Subsequence of lines in two file ranges. Returns nil if there is no match at all (i.e. the LCS is empty).

func SelectAllBlockPairs ¶

func SelectAllBlockPairs(pair *BlockPair) bool

func SelectHashGetter ¶

func SelectHashGetter(normalized bool) func(lp LinePos) uint32

func SortBlockMatchesByAIndex ¶

func SortBlockMatchesByAIndex(a []BlockMatch)

func SortBlockMatchesByBIndex ¶

func SortBlockMatchesByBIndex(a []BlockMatch)

func SortBlockPairsByAIndex ¶

func SortBlockPairsByAIndex(a []*BlockPair)

func SortBlockPairsByBIndex ¶

func SortBlockPairsByBIndex(a []*BlockPair)

func SortIndexPairsByIndex1 ¶

func SortIndexPairsByIndex1(a []IndexPair)

func SortIndexPairsByIndex2 ¶

func SortIndexPairsByIndex2(a []IndexPair)

Types ¶

type BlockMatch ¶

type BlockMatch struct {
	// Index is same as LinePos.Index of starting line of match.
	// Length is number of lines that match.
	AIndex, BIndex, Length int
}

Represents a match between files A and B.

func BasicTichyMaximalBlockMoves ¶

func BasicTichyMaximalBlockMoves(
	aLines, bLines []LinePos, getHash func(lp LinePos) uint32) []BlockMatch

Find maximal blocks that can be matched between a and b, where each line in b is matched with at most one in a; vice versa is not necessarily true, unless a and b consist only of lines that are locally unique (i.e. no hash appears twice in a, nor twice in b).

type BlockMatchByAIndex ¶

type BlockMatchByAIndex []BlockMatch

BlockMatchByAIndex implements sort.Interface for []BlockMatch based on the AIndex field, then BIndex.

func (BlockMatchByAIndex) Len ¶

func (a BlockMatchByAIndex) Len() int

func (BlockMatchByAIndex) Less ¶

func (a BlockMatchByAIndex) Less(i, j int) bool

func (BlockMatchByAIndex) Swap ¶

func (a BlockMatchByAIndex) Swap(i, j int)

type BlockMatchByBIndex ¶

type BlockMatchByBIndex []BlockMatch

BlockMatchByBIndex implements sort.Interface for []BlockMatch based on the BIndex field, then AIndex.

func (BlockMatchByBIndex) Len ¶

func (a BlockMatchByBIndex) Len() int

func (BlockMatchByBIndex) Less ¶

func (a BlockMatchByBIndex) Less(i, j int) bool

func (BlockMatchByBIndex) Swap ¶

func (a BlockMatchByBIndex) Swap(i, j int)

type BlockPair ¶

type BlockPair struct {
	AIndex, ALength int
	BIndex, BLength int
	MoveId          int // An attempt to tell one move from another.
	// If IsMatch and IsNormalizedMatch are both true, this means that the
	// lines match after normalization, and it is possible that some or even
	// all of them are exact mathes, but we've not recorded that.
	IsMatch           bool
	IsNormalizedMatch bool
	IsMove            bool // Does this represent a move?
}

Represents a pairing of ranges in files A and B, primarily for output, as we can produce different pairings based on which file we consider primary (i.e. in the face of block moves we may print A in order, but B out of order).

func CombineBlockPairs ¶

func CombineBlockPairs(sortedInput []*BlockPair) (output []*BlockPair)

Sort by AIndex or BIndex before calling CombineBlockPairs.

func FillGapsWithEasyMatches ¶

func FillGapsWithEasyMatches(frp FileRangePair, blockPairs []*BlockPair) (filledGaps []*BlockPair)

func MatchingRangePairOffsetsToBlockPairs ¶

func MatchingRangePairOffsetsToBlockPairs(
	frp FileRangePair, matchingOffsets []IndexPair, matchedNormalizedLines bool,
	maxRareOccurrences uint8) (blockPairs []*BlockPair)

Assuming here that there are no moves (relative to aRange and bRange).

func PerformDiff ¶

func PerformDiff(aFile, bFile *File, config DifferencerConfig) (pairs []*BlockPair)

func PerformDiff2 ¶

func PerformDiff2(aFile, bFile *File, config DifferencerConfig) (pairs []*BlockPair)

func WeightedLCSBlockPairsOfRangePair ¶

func WeightedLCSBlockPairsOfRangePair(
	pair FileRangePair, sf SimilarityFactors) (blockPairs []*BlockPair, score float32)

func (*BlockPair) ABeyond ¶

func (p *BlockPair) ABeyond() int

func (*BlockPair) BBeyond ¶

func (p *BlockPair) BBeyond() int

func (*BlockPair) IsSentinal ¶

func (p *BlockPair) IsSentinal() bool

type BlockPairAdjacency ¶

type BlockPairAdjacency struct {
	// contains filtered or unexported fields
}

type BlockPairByAIndex ¶

type BlockPairByAIndex []*BlockPair

BlockPairByAIndex implements sort.Interface for []BlockPair based on the AIndex field, then BIndex.

func (BlockPairByAIndex) Len ¶

func (a BlockPairByAIndex) Len() int

func (BlockPairByAIndex) Less ¶

func (a BlockPairByAIndex) Less(i, j int) bool

func (BlockPairByAIndex) Swap ¶

func (a BlockPairByAIndex) Swap(i, j int)

type BlockPairByBIndex ¶

type BlockPairByBIndex []*BlockPair

BlockPairByBIndex implements sort.Interface for []BlockPair based on the BIndex field, then AIndex.

func (BlockPairByBIndex) Len ¶

func (a BlockPairByBIndex) Len() int

func (BlockPairByBIndex) Less ¶

func (a BlockPairByBIndex) Less(i, j int) bool

func (BlockPairByBIndex) Swap ¶

func (a BlockPairByBIndex) Swap(i, j int)

type BlockPairs ¶

type BlockPairs []*BlockPair

func ExtendMatchesBackward ¶

func ExtendMatchesBackward(filePair FilePair, inputPairs BlockPairs) (outputPairs BlockPairs)

func ExtendMatchesForward ¶

func ExtendMatchesForward(filePair FilePair, inputPairs BlockPairs) (outputPairs BlockPairs)

func FillRemainingBGapsWithMismatches ¶

func FillRemainingBGapsWithMismatches(filePair FilePair, inputPairs BlockPairs) (
	outputPairs BlockPairs)

func PerformMoveDetectionInGaps ¶

func PerformMoveDetectionInGaps(
	frp FileRangePair, blockPairs BlockPairs, config DifferencerConfig,
	sf SimilarityFactors) (
	outputBlockPairs BlockPairs)

func PerformSmallEditDetectionInGaps ¶

func PerformSmallEditDetectionInGaps(
	frp FileRangePair, blockPairs BlockPairs, config DifferencerConfig) (
	outputBlockPairs BlockPairs)

func (BlockPairs) AssignMoveId ¶

func (s BlockPairs) AssignMoveId()

func (BlockPairs) CountLinesInPairs ¶

func (s BlockPairs) CountLinesInPairs() (numALines, numBLines int)

func (BlockPairs) IsInStrictOrder ¶

func (s BlockPairs) IsInStrictOrder() bool

func (BlockPairs) Len ¶

func (s BlockPairs) Len() int

func (BlockPairs) LimitIndexPairs ¶

func (s BlockPairs) LimitIndexPairs() (limitsInA, limitsInB IndexPair)

func (BlockPairs) MakeReverseIndex ¶

func (s BlockPairs) MakeReverseIndex() (pair2Index map[*BlockPair]int)

func (BlockPairs) Swap ¶

func (s BlockPairs) Swap(i, j int)

type Diff3Triple ¶

type Diff3Triple struct {
	TripleType Diff3TripleType

	// The lines in base that are the anchor for this block.
	BaseStart, BaseBeyond int

	// The BlockPairs that are the basis of this triple.
	B2YPair, B2TPair *BlockPair
}

type Diff3TripleType ¶

type Diff3TripleType int

const (
	UnchangedTriple Diff3TripleType = iota
	YoursChangedTriple
	TheirsChangedTriple
	BothSameTriple
	ConflictTriple
)

type Diff3Triples ¶

type Diff3Triples []*Diff3Triple

func PerformDiff3 ¶

func PerformDiff3(
	yours, base, theirs *File, b2yPairs, b2tPairs BlockPairs,
	cfg DifferencerConfig) (triples Diff3Triples, conflictsExist bool)

Given 3 files where both yours and theirs are different from base, compute a 3-way diff, analogous to diff3. The goal is to identify those blocks that are unchanged, are different in only one file, or are changed in both yours and theirs. No attempt is made to resolve the differences. Every BlockPair in the inputs will appear in at least one of the Diff3Triples return, possibly more than one (based on the alignment of changes/moves/conflicts).

type DifferencerConfig ¶

type DifferencerConfig struct {
	// Before computing the alignment between lines of two files, should
	// the common prefix and suffix be identified, reducing the number of
	// lines being aligned by the more general technique? (Improves the
	// alignment of inserted functions in C-like languages, as the trailing
	// curly braces get matched to the correct function more often.)
	MatchEnds bool

	// When matching the common prefix and suffix, after matching full lines,
	// should common normalized prefix and suffix lines be matched?
	MatchNormalizedEnds bool

	// When computing an alignment between files, should lines be normalized
	// before comparing (i.e. compare hashes of normalized lines, not of full
	// lines).
	AlignNormalizedLines bool

	// When computing an alignment between files, should unique/rare lines be
	// used for computing the alignment, or all lines?
	AlignRareLines bool

	// When deciding which lines are rare in a region being aligned, how many
	// times may a line appear (actually, how many times may its hash appear)
	// and still be considered rare?
	MaxRareLineOccurrencesInRange int

	// When selecting rare lines, discard those lines whose hash, after
	// normalization, appears more than this many times. If 0, not applied.
	MaxRareLineOccurrencesInFile int

	// When deciding which lines are rare in two regions being aligned,
	// must those lines appear the same number of times in each region?
	RequireSameRarity bool

	// When computing an alignment between files, should blocks of moved lines
	// be detected (i.e. detect re-ordering of paragraphs/functions).
	DetectBlockMoves bool

	// When computing the longest common subsequence of two file ranges,
	// how similar are two normalized lines to be considered, where 0 is
	// completely dissimilar, and 1 is equal.
	LcsNormalizedSimilarity float64

	// When computing an LCS alignment between files, should longer equal lines
	// be weighted more heavily that short lines?
	LengthWeightedSimilarity bool

	// When doing alignment (initial or move/copy detection), omit from
	// consideration the lines that are probably common (e.g. "/*" or "}").
	OmitProbablyCommonLines bool
}

func (*DifferencerConfig) CreateFlags ¶

func (p *DifferencerConfig) CreateFlags(f *flag.FlagSet)

type File ¶

type File struct {
	Name  string    // Command line arg
	Body  []byte    // Body of the file
	Lines []LinePos // Locations and hashes of the file lines.

	FullRange  FileRange
	FileRanges map[IndexPair]FileRange
}

func ReadFile ¶

func ReadFile(name string) (*File, error)

func (*File) BriefDebugString ¶

func (p *File) BriefDebugString() string

func (*File) GetFullRange ¶

func (p *File) GetFullRange() FileRange

func (*File) GetHashOfLine ¶

func (p *File) GetHashOfLine(n int) uint32

func (*File) GetLineBytes ¶

func (p *File) GetLineBytes(n int) []byte

func (*File) GetNormalizedHashOfLine ¶

func (p *File) GetNormalizedHashOfLine(n int) uint32

func (*File) GetUnindentedLineBytes ¶

func (p *File) GetUnindentedLineBytes(n int) []byte

func (*File) LineCount ¶

func (p *File) LineCount() int

func (*File) MakeSubRange ¶

func (p *File) MakeSubRange(start, length int) FileRange

func (*File) Select ¶

func (p *File) Select(fn func(lp LinePos) bool) []LinePos

type FilePair ¶

type FilePair interface {
	AFile() *File
	BFile() *File

	FullFileRangePair() FileRangePair
	MakeSubRangePair(aIndex, aLength, bIndex, bLength int) FileRangePair

	MakeFileRangePair(aRange, bRange FileRange) FileRangePair

	ALength() int
	BLength() int

	BriefDebugString() string

	CompareFileLines(aIndex, bIndex int, maxRareOccurrences uint8) (equal, approx, rare bool)

	CanFillGapWithMatches(pair1, pair2 *BlockPair) (equal, approx bool)
}

func MakeFilePair ¶

func MakeFilePair(aFile, bFile *File) FilePair

type FileRange ¶

type FileRange interface {
	File() *File

	// Is the FileRange empty (GetLineCount() == 0)?
	IsEmpty() bool

	// Returns the number of lines in the range.
	Length() int

	// Returns the index of the first line (zero for the whole file).
	FirstIndex() int

	// Returns index of line immediately after the range.
	BeyondIndex() int

	// Returns the LinePos for the line at offset within this range (where zero
	// is the first line in the range).
	LinePosAtOffset(offsetInRange int) LinePos

	// Returns the hash of the line (full or normalized) at the offset within this
	// range (where zero is the first line in the range).
	LineHashAtOffset(offsetInRange int, normalized bool) uint32

	// Returns those lines for which fn returns true.
	Select(fn func(lp LinePos) bool) []LinePos

	// Returns the positions (line numbers, zero-based) within
	// the underlying file at which the full line hashes appear.
	HashPositions() map[uint32][]int

	// Returns the positions (line numbers, zero-based) within
	// the underlying file at which the normalized line hashes appear.
	NormalizedHashPositions() map[uint32][]int

	// Returns a FileRange for the specified subset.
	MakeSubRange(startOffsetInRange, length int) FileRange

	ToFileIndex(offsetInRange int) (indexInFile int)
	ToRangeOffset(indexInFile int) (offsetInRange int)
}

func CreateFileRange ¶

func CreateFileRange(file *File, start, length int) FileRange

type FileRangePair ¶

type FileRangePair interface {
	// The files.
	BaseFilePair() FilePair

	ARange() FileRange
	BRange() FileRange

	ALength() int
	BLength() int

	ToFileIndices(aOffset, bOffset int) (aIndex, bIndex int)
	ToRangeOffsets(aIndex, bIndex int) (aOffset, bOffset int)
	MakeSubRangePair(aOffset, aLength, bOffset, bLength int) FileRangePair

	BriefDebugString() string

	MeasureSharedEnds(onlyExactMatches bool, maxRareOccurrences uint8) SharedEndsData
	CompareLines(aOffset, bOffset int, maxRareOccurrences uint8) (equal, approx, rare bool)
	MakeSharedEndBlockPairs(rareEndsOnly, onlyExactMatches bool, maxRareOccurrences uint8) (prefixPairs, suffixPairs []*BlockPair)
	MakeMiddleRangePair(rareEndsOnly, onlyExactMatches bool, maxRareOccurrences uint8) FileRangePair
}

type Float32UnaryFunction ¶

type Float32UnaryFunction interface {
	Compute(input float64) float64
}

type FourIndices ¶

type FourIndices [4]int

type GeneralizedLogisticFunction ¶

type GeneralizedLogisticFunction struct {
	A, K float64 // Lower and upper asymptotes which the output will approach.
	M    float64 // Origin of input.
	B    float64 // Exponential growth rate, used in: e ^ (-B(t-M))
	Q    float64 // Scalar growth rate, and Y(M) anchor, used in denominator: 1 + Q e ^ (-B(t-M))
	V    float64 // Asymmetry Factor (skews the curve left or right, so that one asymptote or another is "longer")
}

func MakeSymmetricLogisticFunction ¶

func MakeSymmetricLogisticFunction(
	inputLo, inputHi, outputLo, outputHi float64) *GeneralizedLogisticFunction

func (*GeneralizedLogisticFunction) Compute ¶

func (p *GeneralizedLogisticFunction) Compute(input float64) float64

type GetIntervalFn ¶

type GetIntervalFn func(pair *BlockPair) (begin, beyond int)

func MakeGetAInterval ¶

func MakeGetAInterval(selector SelectBlockPairFn) GetIntervalFn

func MakeGetBInterval ¶

func MakeGetBInterval(selector SelectBlockPairFn) GetIntervalFn

type HashPositions ¶

type HashPositions map[uint32][]int

func (HashPositions) CopyMap ¶

func (m HashPositions) CopyMap() HashPositions

type IndexPair ¶

type IndexPair struct {
	Index1, Index2 int
}

func WeightedLCS ¶

func WeightedLCS(aLength, bLength int, getSimilarity func(aIndex, bIndex int) float32) (
	result []IndexPair, score float32)

Dynamic programming solution to produce a weighted LCS of two "strings" A and B of length aLength and bLength, respectively. getSimilarity must return a value between 0 and 1, inclusive. TODO In some senarios we prefer the shortest of several LCS (i.e. they have the same number of symbols in the string, but one may have fewer other symbols in it, insertions/deletions, between the first match and the last than other LCS candidates). Would be best if we could produce all candidates, along with their weights, and then score them. Basically the short-coming of the basic LCS approach is that it doesn't consider any measure other than number of symbols (and here the weight of pairs of symbols), but doesn't have any other objective function. TODO Lots of opportunity here for (well known) optimizations.

func WeightedLCSOffsetsOfRangePair ¶

func WeightedLCSOffsetsOfRangePair(pair FileRangePair, sf SimilarityFactors) (lcsOffsetPairs []IndexPair, score float32)

type IndexPairByIndex1 ¶

type IndexPairByIndex1 []IndexPair

IndexPairByIndex1 implements sort.Interface for []IndexPair based on the Index1 field, then Index2.

func (IndexPairByIndex1) Len ¶

func (a IndexPairByIndex1) Len() int

func (IndexPairByIndex1) Less ¶

func (a IndexPairByIndex1) Less(i, j int) bool

func (IndexPairByIndex1) Swap ¶

func (a IndexPairByIndex1) Swap(i, j int)

type IndexPairByIndex2 ¶

type IndexPairByIndex2 []IndexPair

IndexPairByIndex2 implements sort.Interface for []IndexPair based on the Index2 field, then Index1.

func (IndexPairByIndex2) Len ¶

func (a IndexPairByIndex2) Len() int

func (IndexPairByIndex2) Less ¶

func (a IndexPairByIndex2) Less(i, j int) bool

func (IndexPairByIndex2) Swap ¶

func (a IndexPairByIndex2) Swap(i, j int)

type IntervalSet ¶

type IntervalSet interface {
	InsertInterval(begin, beyond int)
	ContainsSome(begin, beyond int) bool
	ContainsAll(begin, beyond int) bool
	Contains(position int) bool

	// If position is in an interval, returns that interval. If position is
	// between two intervals, returns those two; if beyond last interval, returns
	// the last interval; if before first interval, returns the first interval.
	IntervalsAround(position int) (result []IndexPair, isContained bool)
}

Simplistic interval set, which is supports determining if a point or interval is covered by the inserted intervals, but not which intervals; doesn't support removal. Effectively therefore, an integer set.

func AIndexBlockPairsToIntervalSet ¶

func AIndexBlockPairsToIntervalSet(
	blockPairs BlockPairs, selector func(pair *BlockPair) bool) IntervalSet

func BIndexBlockPairsToIntervalSet ¶

func BIndexBlockPairsToIntervalSet(
	blockPairs BlockPairs, selector func(pair *BlockPair) bool) IntervalSet

func BlockPairsToIntervalSet ¶

func BlockPairsToIntervalSet(
	blockPairs BlockPairs, getInterval GetIntervalFn) IntervalSet

func MakeIntervalSet ¶

func MakeIntervalSet() IntervalSet

type LeadingWhitespaceStatistics ¶

type LeadingWhitespaceStatistics struct {
	NumFilesAdded             uint64
	NumValidLines             uint64
	NumInvalidLines           uint64
	NumLeadingTabs            map[uint8]int
	NumLeadingSpaces          map[uint8]int
	NumLeadingSpacesAfterTab  map[uint8]int
	FracLeadingTabs           map[uint8]float32
	FracLeadingSpaces         map[uint8]float32
	FracLeadingSpacesAfterTab map[uint8]float32
}

func MeasureLeadingWhitespace ¶

func MeasureLeadingWhitespace(files ...*File) (stats LeadingWhitespaceStatistics)

func (*LeadingWhitespaceStatistics) AddFile ¶

func (stats *LeadingWhitespaceStatistics) AddFile(file *File)

func (*LeadingWhitespaceStatistics) ComputeFractions ¶

func (stats *LeadingWhitespaceStatistics) ComputeFractions()

func (*LeadingWhitespaceStatistics) Totals ¶

func (stats *LeadingWhitespaceStatistics) Totals() (totalLeadingTabs, totalLeadingSpaces, totalLeadingSpacesAfterTab uint64)

type LineHasher ¶

type LineHasher interface {
	Compute(line []byte) (fullHash, normalizedHash uint32)
	Compute2(line, normalizedLine []byte) (fullHash, normalizedHash uint32)
}

func GetLineHasher ¶

func GetLineHasher() LineHasher

type LinePos ¶

type LinePos struct {
	Start, Length, Index int

	// Hash of the full line (including newline and/or carriage return at end).
	Hash uint32
	// Hash for a "normalized" version of the line, with the thought
	// that if there is a very large amount of difference between two files, it
	// maybe due to relatively minor formatting changes (e.g. indentation or
	// justification) rather than other kinds of changes.
	// Possible normalizations:
	// * leading and trailing whitespace removed
	// * all interior whitespace runs collapsed to a single space
	//   or maybe completely removed
	// * convert all letters characters to a single case (very aggressive)
	NormalizedHash uint32

	// Count of the normalized hash in the file.
	// Maximum is 255, but that is OK for rare-ness checking.
	CountInFile uint8

	// Length of line after normalization.
	// Maximum is 255, but that is OK for deciding whether a line is short or not.
	NormalizedLength uint8

	// Number of leading tabs and then leading spaces of a line. Capped at 255,
	// and set to 255 if the leading whitespace is not well-formed.
	LeadingTabs, LeadingSpaces uint8

	// Is this a well known common line (e.g. "/*" or "#", or an empty line).
	ProbablyCommon bool // Based solely on normalized content, not other lines.
}

func FindRareLinesInRanges ¶

func FindRareLinesInRanges(aRange, bRange FileRange,
	normalizedMatch, sameCount, omitProbablyCommon bool,
	maxCountInRange, maxCountInFile int) (aRareLines, bRareLines []LinePos)

Given two files, and a ranges of lines in each file, find rare lines in those two ranges that are equally rare in both of the ranges. normalizedMatch == true to use the hashes of the normalized lines. sameCount == true to require the rare lines to appear the same number of times in each range. maxCountInRange is the maximum number of times a hash may appear in the range and still be considered rare; maxCountInRange==1 is the Patience Diff approach.

func (*LinePos) ValidLeadingWhiteSpace ¶

func (p *LinePos) ValidLeadingWhiteSpace() bool

type MatchCommonXFunc ¶

type MatchCommonXFunc func(aRange, bRange FileRange, normalized bool) (
	aRest, bRest FileRange, bp *BlockPair)

type MoveCandidate ¶

type MoveCandidate struct {
	// contains filtered or unexported fields
}

func MakeMoveCandidate ¶

func MakeMoveCandidate(aRange, bRange FileRange, pairs []*BlockPair) *MoveCandidate

Pairs must be sorted ascending in both AIndex and BIndex (i.e. no crossings).

func (*MoveCandidate) AExtent ¶

func (p *MoveCandidate) AExtent() int

func (*MoveCandidate) ExtendPairs ¶

func (p *MoveCandidate) ExtendPairs(state *diffState)

Find common prefixes and suffixes between the pairs. Should really refactor diffState etc. to allow recursive use of diffState.

type MoveCandidate2 ¶

type MoveCandidate2 struct {
	// contains filtered or unexported fields
}

func (*MoveCandidate2) AExtent ¶

func (p *MoveCandidate2) AExtent() int

func (*MoveCandidate2) BExtent ¶

func (p *MoveCandidate2) BExtent() int

func (*MoveCandidate2) SetScore ¶

func (p *MoveCandidate2) SetScore()

We want to order the candidates by various factors, not strictly ordered, so we compute a score for each candidate.

Higher LCS similarity score is better, but this can be due to lots of common lines, depending on the SimilarityFactors.
Similar A and B extents is good, but they don't have to be exact.
Proximity to the original location is preferred, but not essential.

type MoveCandidate2s ¶

type MoveCandidate2s []*MoveCandidate2

func (MoveCandidate2s) Len ¶

func (v MoveCandidate2s) Len() int

func (MoveCandidate2s) Less ¶

func (v MoveCandidate2s) Less(i, j int) bool

func (MoveCandidate2s) SetScores ¶

func (v MoveCandidate2s) SetScores()

func (MoveCandidate2s) Swap ¶

func (v MoveCandidate2s) Swap(i, j int)

type MoveCandidates ¶

type MoveCandidates []*MoveCandidate

func (MoveCandidates) Len ¶

func (v MoveCandidates) Len() int

func (MoveCandidates) Less ¶

func (v MoveCandidates) Less(i, j int) bool

Sort by ascending number of matched lines (higher is better), then by descending number length of limits (lower is better), then by AIndex.

func (MoveCandidates) Swap ¶

func (v MoveCandidates) Swap(i, j int)

type SelectBlockPairFn ¶

type SelectBlockPairFn func(pair *BlockPair) bool

type SharedEndsData ¶

type SharedEndsData struct {
	SharedEndsKey
	// If the lines in the range are equal, or equal after normalization
	// (approximately equal), then one or both of these booleans are true,
	// and the prefix and suffix lengths are 0.
	RangesAreEqual, RangesAreApproximatelyEqual bool

	// The FileRangePair which was measured to produce this.
	Source FileRangePair

	NonRarePrefixLength, NonRareSuffixLength int
	RarePrefixLength, RareSuffixLength       int
}

func (*SharedEndsData) GetPrefixAndSuffixLengths ¶

func (p *SharedEndsData) GetPrefixAndSuffixLengths(rareEndsOnly bool) (prefixLength, suffixLength int)

func (*SharedEndsData) HasPrefixOrSuffix ¶

func (p *SharedEndsData) HasPrefixOrSuffix() bool

func (*SharedEndsData) HasRarePrefixOrSuffix ¶

func (p *SharedEndsData) HasRarePrefixOrSuffix() bool

func (*SharedEndsData) PrefixAndSuffixOverlap ¶

func (p *SharedEndsData) PrefixAndSuffixOverlap(rareEndsOnly bool) bool

type SharedEndsKey ¶

type SharedEndsKey struct {
	OnlyExactMatches   bool
	MaxRareOccurrences uint8
}

type SideBySideConfig ¶

type SideBySideConfig struct {
	// How many columns (mono-spaced characters) does the output 'device' have?
	DisplayColumns int

	DisplayLineNumbers bool
	WrapLongLines      bool // Wrap (vs. truncate) long lines.

	SpacesPerTab int // Defaults to 8

	// Number of lines of context (exact match lines) to output adjacent to
	// changes. If 0, then all exact match lines are output.
	ContextLines int

	ZeroBasedLineNumbers bool
}

Inputs to display process, unrelated to the actual files.

type SimilarityFactors ¶

type SimilarityFactors struct {
	ExactRare          float32
	NormalizedRare     float32
	ExactNonRare       float32
	NormalizedNonRare  float32
	MaxRareOccurrences uint8
}

func (*SimilarityFactors) SimilarityOfRangeLines ¶

func (s *SimilarityFactors) SimilarityOfRangeLines(pair FileRangePair, aOffset, bOffset int) float32

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL