cfr

package module
v0.0.0-...-2c859fb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 8, 2023 License: GPL-3.0 Imports: 10 Imported by: 7

README

go-cfr

go-cfr is a package that implements several forms of Counterfactual Regret Minimization in Go. CFR can be used to solve for an approximate Nash Equilibrium in an imperfect information extensive-form game.

This project is a research library used to study different forms of CFR. For a similar alternative in C++/Python/Swift, see OpenSpiel.

No Maintenance Intended GoDoc Build Status Coverage Status Go Report Card

Usage

To use CFR, you must implement the extensive-form game tree for your game, by implementing the GameTreeNode interface.

An implementation of Kuhn Poker is included as an example.

package main

import (
	"fmt"

	"github.com/timpalpant/go-cfr"
	"github.com/timpalpant/go-cfr/kuhn"
	"github.com/timpalpant/go-cfr/tree"
)

func main() {
	poker := kuhn.NewGame()
	policy := cfr.NewPolicyTable(cfr.DiscountParams{})
	vanillaCFR := cfr.New(policy)
	nIter := 10000
	expectedValue := float32(0.0)
	for i := 1; i <= nIter; i++ {
		expectedValue += vanillaCFR.Run(poker)
	}

	expectedValue /= float32(nIter)
	fmt.Printf("Expected value is: %v\n", expectedValue)

	seen := make(map[string]struct{})
	tree.Visit(poker, func(node cfr.GameTreeNode) {
		if node.Type() != cfr.PlayerNodeType {
			return
		}

		key := node.InfoSet(node.Player()).Key()
		if _, ok := seen[string(key)]; ok {
			return
		}

		actionProbs := policy.GetPolicy(node).GetAverageStrategy()
		if actionProbs != nil {
			fmt.Printf("[player %d] %6s: check=%.2f bet=%.2f\n",
				node.Player(), key, actionProbs[0], actionProbs[1])
		}

		seen[string(key)] = struct{}{}
	})
}

Variants implemented

License

go-cfr is released under the GNU Lesser General Public License, Version 3.0.

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CFR

type CFR struct {
	// contains filtered or unexported fields
}

func New

func New(strategyProfile StrategyProfile) *CFR

func (*CFR) Run

func (c *CFR) Run(node GameTreeNode) float32

type ChanceNode

type ChanceNode interface {
	// Get the probability of the ith child of this node.
	// May only be called for nodes with Type == Chance.
	GetChildProbability(i int) float64

	// Sample a single child from this Chance node according to the probability
	// distribution over children.
	//
	// Implementations may reuse sampling.SampleChanceNode to sample from the CDF,
	// (by scanning over GetChildProbability) or implement their own more efficient
	// sampling.
	SampleChild() (child GameTreeNode, p float64)
}

ChanceNode is a node that has a pre-defined probability distribution over its children.

type ChanceSamplingCFR

type ChanceSamplingCFR struct {
	// contains filtered or unexported fields
}

func NewChanceSampling

func NewChanceSampling(strategyProfile StrategyProfile) *ChanceSamplingCFR

func (*ChanceSamplingCFR) Run

func (c *ChanceSamplingCFR) Run(node GameTreeNode) float32

type DiscountParams

type DiscountParams struct {
	UseRegretMatchingPlus bool    // CFR+
	LinearWeighting       bool    // Linear CFR
	DiscountAlpha         float32 // Discounted CFR
	DiscountBeta          float32 // Discounted CFR
	DiscountGamma         float32 // Discounted CFR
}

DiscountParams modify how regret is accumulated. An empty DiscountParams is valid and corresponds to traditional (MC)CFR without weighting.

func (DiscountParams) GetDiscountFactors

func (p DiscountParams) GetDiscountFactors(iter int) (positive, negative, sum float32)

Gets the discount factors as configured by the parameters for the various CFR weighting schemes: CFR+, linear CFR, etc.

type GameTreeNode

type GameTreeNode interface {
	// NodeType returns the type of game node.
	Type() NodeType
	// Release resources held by this node (including any children).
	Close()

	TreeNode
	ChanceNode
	PlayerNode
}

GameTreeNode is the interface for a node in an extensive-form game tree.

type GeneralizedSamplingCFR

type GeneralizedSamplingCFR struct {
	// contains filtered or unexported fields
}

func NewGeneralizedSampling

func NewGeneralizedSampling(strategyProfile StrategyProfile, sampler Sampler) *GeneralizedSamplingCFR

func (*GeneralizedSamplingCFR) Run

type InfoSet

type InfoSet interface {
	// Key is an identifier used to uniquely look up this InfoSet
	// when accumulating probabilities in tabular CFR.
	//
	// It may be an arbitrary string of bytes and does not need to be
	// human-readable. For example, it could be a simplified abstraction
	// or hash of the full game history.
	Key() []byte
	encoding.BinaryMarshaler
	encoding.BinaryUnmarshaler
}

InfoSet is the observable game history from the point of view of one player.

type MCCFR

type MCCFR struct {
	// contains filtered or unexported fields
}

func NewMCCFR

func NewMCCFR(strategyProfile StrategyProfile, sampler Sampler) *MCCFR

func (*MCCFR) Run

func (c *MCCFR) Run(node GameTreeNode) float32

type NodePolicy

type NodePolicy interface {
	// AddRegret provides new observed instantaneous regrets
	// to add to the total accumulated regret with the given weight.
	AddRegret(w float32, samplingQ, instantaneousRegrets []float32)
	// GetStrategy gets the current vector of probabilities with which the ith
	// available action should be played.
	GetStrategy() []float32

	// GetBaseline gets the current vector of action-dependend baseline values,
	// used in VR-MCCFR.
	GetBaseline() []float32
	// UpdateBaseline updates the current vector of baseline values.
	UpdateBaseline(w float32, action int, value float32)

	// AddStrategyWeight adds the current strategy with weight w to the average.
	AddStrategyWeight(w float32)
	// GetAverageStrategy returns the average strategy over all iterations.
	GetAverageStrategy() []float32

	// IsEmpty returns true if the NodePolicy is new and has no accumulated regret.
	IsEmpty() bool
}

NodePolicy maintains the action policy for a single Player node.

type NodeType

type NodeType int

NodeType is the type of node in an extensive-form game tree.

const (
	ChanceNodeType NodeType = iota
	TerminalNodeType
	PlayerNodeType
)

type OnlineOutcomeSamplingCFR

type OnlineOutcomeSamplingCFR struct {
	// contains filtered or unexported fields
}

func NewOnlineOutcomeSamplingCFR

func NewOnlineOutcomeSamplingCFR(strategyProfile StrategyProfile, sampler Sampler) *OnlineOutcomeSamplingCFR

func (*OnlineOutcomeSamplingCFR) Run

type PlayerNode

type PlayerNode interface {
	// Player returns this current node's acting player.
	// It may only be called for nodes with IsChance() == false.
	Player() int
	// InfoSet returns the information set for this node for the given player.
	InfoSet(player int) InfoSet
	// InfoSetKey returns the equivalent of InfoSet(player).Key(),
	// but can be used to avoid allocations incurred by the InfoSet interface.
	InfoSetKey(player int) []byte
	// Utility returns this node's utility for the given player.
	// It must only be called for nodes with type == Terminal.
	Utility(player int) float64
}

PlayerNode is a node in which one of the player's acts.

type PolicyTable

type PolicyTable struct {
	// contains filtered or unexported fields
}

PolicyTable implements traditional (tabular) CFR by storing accumulated regrets and strategy sums for each InfoSet, which is looked up by its Key().

func NewPolicyTable

func NewPolicyTable(params DiscountParams) *PolicyTable

NewPolicyTable creates a new PolicyTable with the given DiscountParams.

func (*PolicyTable) Close

func (pt *PolicyTable) Close() error

func (*PolicyTable) GetPolicy

func (pt *PolicyTable) GetPolicy(node GameTreeNode) NodePolicy

func (*PolicyTable) Iter

func (pt *PolicyTable) Iter() int

func (*PolicyTable) MarshalBinary

func (pt *PolicyTable) MarshalBinary() ([]byte, error)

MarshalBinary implements encoding.BinaryMarshaler.

func (*PolicyTable) UnmarshalBinary

func (pt *PolicyTable) UnmarshalBinary(buf []byte) error

UnmarshalBinary implements encoding.BinaryUnmarshaler.

func (*PolicyTable) Update

func (pt *PolicyTable) Update()

Update performs regret matching for all nodes within this strategy profile that have been touched since the lapt call to Update().

type Sampler

type Sampler interface {
	// Sample returns a vector of sampling probabilities for a
	// subset of the N children of this NodePolicy. Children with
	// p > 0 will be traversed. The returned slice may be reused
	// between calls to sample; a caller must therefore copy the
	// values before the next call to Sample.
	Sample(GameTreeNode, NodePolicy) []float32
}

Sampler selects a subset of child nodes to traverse.

type StrategyProfile

type StrategyProfile interface {
	// GetPolicy returns the NodePolicy for the given node.
	GetPolicy(node GameTreeNode) NodePolicy

	// Calculate the next strategy profile for all visited nodes.
	Update()
	// Get the current iteration (number of times update has been called).
	Iter() int

	encoding.BinaryMarshaler
	encoding.BinaryUnmarshaler
	io.Closer
}

StrategyProfile maintains a collection of regret-matching policies for each player node in the game tree.

The policytable and deepcfr packages provide implementations of StrategyProfile.

type TreeNode

type TreeNode interface {
	// The number of direct children of this node.
	NumChildren() int
	// Get the ith child of this node.
	GetChild(i int) GameTreeNode
	// Get the parent of this node.
	Parent() GameTreeNode
}

Tree node represents a node in a directed rooted tree.

type VRMCCFR

type VRMCCFR struct {
	// contains filtered or unexported fields
}

func NewVRMCCFR

func NewVRMCCFR(strategyProfile StrategyProfile, traversingSampler, notTraversingSampler Sampler) *VRMCCFR

func (*VRMCCFR) Run

func (c *VRMCCFR) Run(node GameTreeNode) float32

Directories

Path Synopsis
internal
f32
Package kuhn implements an extensive-form game tree for Kuhn Poker, adapted from: https://justinsermeno.com/posts/cfr/.
Package kuhn implements an extensive-form game tree for Kuhn Poker, adapted from: https://justinsermeno.com/posts/cfr/.
Package rdbstore implements CFR storage components that keep data in a RocksDB database, rather than in memory datastructures.
Package rdbstore implements CFR storage components that keep data in a RocksDB database, rather than in memory datastructures.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL