cfr

package module
v0.0.0-...-13f8a94 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 7, 2022 License: GPL-3.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ChanceNode

type ChanceNode interface {
	// Get the probability of the ith child of this node.
	// May only be called for nodes with Type == Chance.
	GetChildProbability(i int) float64

	// Sample a single child from this Chance node according to the probability
	// distribution over children.
	//
	// Implementations may reuse sampling.SampleChanceNode to sample from the CDF,
	// (by scanning over GetChildProbability) or implement their own more efficient
	// sampling.
	SampleChild() (child GameTreeNode, p float64)
}

ChanceNode is a node that has a pre-defined probability distribution over its children.

type DiscountParams

type DiscountParams struct {
	UseRegretMatchingPlus bool    // CFR+
	LinearWeighting       bool    // Linear CFR
	DiscountAlpha         float32 // Discounted CFR
	DiscountBeta          float32 // Discounted CFR
	DiscountGamma         float32 // Discounted CFR
}

DiscountParams modify how regret is accumulated. An empty DiscountParams is valid and corresponds to traditional (MC)CFR without weighting.

func (DiscountParams) GetDiscountFactors

func (p DiscountParams) GetDiscountFactors(iter int) (positive, negative, sum float32)

Gets the discount factors as configured by the parameters for the various CFR weighting schemes: CFR+, linear CFR, etc.

type GameTreeNode

type GameTreeNode interface {
	// Get node given a history string
	GetNode(history string) GameTreeNode
	// NodeType returns the type of game node.
	Type() NodeType
	// Release resources held by this node (including any children).
	Close()

	TreeNode
	ChanceNode
	PlayerNode
}

GameTreeNode is the interface for a node in an extensive-form game tree.

type InfoSet

type InfoSet interface {
	// Key is an identifier used to uniquely look up this InfoSet
	// when accumulating probabilities in tabular CFR.
	//
	// It may be an arbitrary string of bytes and does not need to be
	// human-readable. For example, it could be a simplified abstraction
	// or hash of the full game history.
	Key() []byte
	encoding.BinaryMarshaler
	encoding.BinaryUnmarshaler
}

InfoSet is the observable game history from the point of view of one player.

type MCCFR

type MCCFR struct {
	// contains filtered or unexported fields
}

func NewMCCFR

func NewMCCFR(strategyProfile StrategyProfile, sampler Sampler) *MCCFR

func (*MCCFR) Run

func (c *MCCFR) Run(node GameTreeNode) float32

type NodePolicy

type NodePolicy interface {
	// AddRegret provides new observed instantaneous regrets
	// to add to the total accumulated regret with the given weight.
	AddRegret(w float32, samplingQ, instantaneousRegrets []float32)
	// GetStrategy gets the current vector of probabilities with which the ith
	// available action should be played.
	GetStrategy() []float32
	SetStrategy(strat []float32)

	NextStrategy(discountPositiveRegret, discountNegativeRegret, discountstrategySum float32)

	// GetBaseline gets the current vector of action-dependend baseline values,
	// used in VR-MCCFR.
	GetBaseline() []float32
	// UpdateBaseline updates the current vector of baseline values.
	UpdateBaseline(w float32, action int, value float32)

	// AddStrategyWeight adds the current strategy with weight w to the average.
	AddStrategyWeight(w float32)
	// GetAverageStrategy returns the average strategy over all iterations.
	GetAverageStrategy() []float32

	// IsEmpty returns true if the NodePolicy is new and has no accumulated regret.
	IsEmpty() bool
}

NodePolicy maintains the action policy for a single Player node.

type NodeType

type NodeType uint8

NodeType is the type of node in an extensive-form game tree.

const (
	ChanceNodeType NodeType = iota
	TerminalNodeType
	PlayerNodeType
)

type PlayerNode

type PlayerNode interface {
	// Player returns this current node's acting player.
	// It may only be called for nodes with IsChance() == false.
	Player() int
	// InfoSet returns the information set for this node for the given player.
	InfoSet(player int) InfoSet
	// InfoSetKey returns the equivalent of InfoSet(player).Key(),
	// but can be used to avoid allocations incurred by the InfoSet interface.
	InfoSetKey(player int) []byte
	// Utility returns this node's utility for the given player.
	// It must only be called for nodes with type == Terminal.
	Utility(player int) float64
}

PlayerNode is a node in which one of the player's acts.

type PolicyTable

type PolicyTable struct {

	// Map of InfoSet Key -> the policy for that infoset.
	PoliciesByKey map[string]*policy.Policy
	// contains filtered or unexported fields
}

PolicyTable implements traditional (tabular) CFR by storing accumulated regrets and strategy sums for each InfoSet, which is looked up by its Key().

func NewPolicyTable

func NewPolicyTable(params DiscountParams) *PolicyTable

NewPolicyTable creates a new PolicyTable with the given DiscountParams.

func (*PolicyTable) Close

func (pt *PolicyTable) Close() error

func (*PolicyTable) GetPolicy

func (pt *PolicyTable) GetPolicy(node GameTreeNode) NodePolicy

func (*PolicyTable) GetPolicyByKey

func (pt *PolicyTable) GetPolicyByKey(key string) (NodePolicy, bool)

func (*PolicyTable) GetPolicyTable

func (pt *PolicyTable) GetPolicyTable() map[string]*policy.Policy

func (*PolicyTable) Iter

func (pt *PolicyTable) Iter() int

func (*PolicyTable) Iterate

func (pt *PolicyTable) Iterate(iterator func(key string, strat []float32))

func (*PolicyTable) MarshalBinary

func (pt *PolicyTable) MarshalBinary() ([]byte, error)

MarshalBinary implements encoding.BinaryMarshaler.

func (*PolicyTable) SetIter

func (pt *PolicyTable) SetIter(val int)

func (*PolicyTable) SetStrategy

func (pt *PolicyTable) SetStrategy(key string, strat []float32)

func (*PolicyTable) UnmarshalBinary

func (pt *PolicyTable) UnmarshalBinary(buf []byte) error

UnmarshalBinary implements encoding.BinaryUnmarshaler.

func (*PolicyTable) Update

func (pt *PolicyTable) Update()

Update performs regret matching for all nodes within this strategy profile that have been touched since the lapt call to Update().

type Sampler

type Sampler interface {
	// Sample returns a vector of sampling probabilities for a
	// subset of the N children of this NodePolicy. Children with
	// p > 0 will be traversed. The returned slice may be reused
	// between calls to sample; a caller must therefore copy the
	// values before the next call to Sample.
	Sample(GameTreeNode, NodePolicy) []float32
}

Sampler selects a subset of child nodes to traverse.

type StrategyProfile

type StrategyProfile interface {
	// GetPolicy returns the NodePolicy for the given node.
	GetPolicy(node GameTreeNode) NodePolicy
	GetPolicyByKey(key string) (NodePolicy, bool)
	SetStrategy(key string, strat []float32)

	// Calculate the next strategy profile for all visited nodes.
	Update()
	// Get the current iteration (number of times update has been called).
	Iter() int

	encoding.BinaryMarshaler
	encoding.BinaryUnmarshaler
	io.Closer
}

StrategyProfile maintains a collection of regret-matching policies for each player node in the game tree.

The policytable and deepcfr packages provide implementations of StrategyProfile.

type TreeNode

type TreeNode interface {
	// The number of direct children of this node.
	NumChildren() int
	// Get the ith child of this node.
	GetChild(i int) GameTreeNode
	// Get the parent of this node.
	Parent() GameTreeNode
}

Tree node represents a node in a directed rooted tree.

Directories

Path Synopsis
internal
f32

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL