cu

package
v0.9.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 20, 2018 License: GPL-3.0 Imports: 3 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// The default, decides to yield or not based on active CUDA threads and processors.
	DeviceAuto = C.cudaDeviceScheduleAuto
	// Actively spin while waiting for device.
	DeviceSpin = C.cudaDeviceScheduleSpin
	// Yield when waiting.
	DeviceYield = C.cudaDeviceScheduleYield
	// ScheduleBlockingSync block CPU on sync.
	DeviceScheduleBlockingSync = C.cudaDeviceScheduleBlockingSync
	// ScheduleBlockingSync block CPU on sync.  Deprecated since cuda 4.0
	DeviceBlockingSync = C.cudaDeviceBlockingSync
	// For use with pinned host memory
	DeviceMapHost = C.cudaDeviceMapHost
	// Do not reduce local memory to try and prevent thrashing
	DeviceLmemResizeToMax = C.cudaDeviceLmemResizeToMax
)

Flags for SetDeviceFlags

View Source
const (
	// Host to Host
	HtoH = C.cudaMemcpyHostToHost
	// Host to Device
	HtoD = C.cudaMemcpyHostToDevice
	// Device to Host
	DtoH = C.cudaMemcpyDeviceToHost
	// Device to Device
	DtoD = C.cudaMemcpyDeviceToDevice
	// Default, unified virtual address space
	Virt = C.cudaMemcpyDefault
)

Flags for memory copy types

Variables

This section is empty.

Functions

func DeviceComputeCapability

func DeviceComputeCapability(device Device) (major, minor int)

Returns the compute capability of the device.

func DeviceGetAttribute

func DeviceGetAttribute(attrib DeviceAttribute, dev Device) int

Gets the value of a device attribute.

func DeviceGetCount

func DeviceGetCount() int

Returns the number of devices with compute capability greater than or equal to 1.0 that are available for execution.

func DeviceGetName

func DeviceGetName(dev Device) string

Gets the name of the device.

func DeviceReset

func DeviceReset()

Reset the state of the current device.

func DeviceTotalMem

func DeviceTotalMem(device Device) int64

Returns the total amount of memory available on the device in bytes.

func FreeHost

func FreeHost(ptr unsafe.Pointer)

func Init

func Init(flags int)

Initialize the CUDA driver API. Currently, flags must be 0. If Init() has not been called, any function from the driver API will panic with ERROR_NOT_INITIALIZED.

func MallocHost

func MallocHost(bytes int64) unsafe.Pointer

func MemCpy

func MemCpy(dst, src unsafe.Pointer, bytes int64, flags uint)

Copies a number of bytes in the direction specified by flags

func SetDevice

func SetDevice(device Device)

Set the device as current.

func SetDeviceFlags

func SetDeviceFlags(flags uint)

Set CUDA device flags.

func Version

func Version() int

Returns the CUDA driver version.

Types

type DevProp

type DevProp struct {
	MaxThreadsPerBlock  int
	MaxThreadsDim       [3]int
	MaxGridSize         [3]int
	SharedMemPerBlock   int
	TotalConstantMemory int
	SIMDWidth           int
	MemPitch            int
	RegsPerBlock        int
	ClockRate           int
	TextureAlign        int
}

Device properties

func DeviceGetProperties

func DeviceGetProperties(dev Device) (prop DevProp)

Returns the device's properties.

type Device

type Device int

CUDA Device number.

func DeviceGet

func DeviceGet(ordinal int) Device

Returns in a device handle given an ordinal in the range [0, DeviceGetCount()-1].

func (Device) Attribute

func (dev Device) Attribute(attrib DeviceAttribute) int

Gets the value of a device attribute.

func (Device) ComputeCapability

func (device Device) ComputeCapability() (major, minor int)

Returns the compute capability of the device.

func (Device) Name

func (dev Device) Name() string

Gets the name of the device.

func (Device) Properties

func (dev Device) Properties() DevProp

Returns the device's properties.

func (Device) TotalMem

func (device Device) TotalMem() int64

Returns the total amount of memory available on the device in bytes.

type DeviceAttribute

type DeviceAttribute int
const (
	MAX_THREADS_PER_BLOCK            DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK            // Maximum number of threads per block
	MAX_BLOCK_DIM_X                  DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X                  // Maximum block dimension X
	MAX_BLOCK_DIM_Y                  DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y                  // Maximum block dimension Y
	MAX_BLOCK_DIM_Z                  DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z                  // Maximum block dimension Z
	MAX_GRID_DIM_X                   DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X                   // Maximum grid dimension X
	MAX_GRID_DIM_Y                   DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y                   // Maximum grid dimension Y
	MAX_GRID_DIM_Z                   DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z                   // Maximum grid dimension Z
	MAX_SHARED_MEMORY_PER_BLOCK      DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK      // Maximum shared memory available per block in bytes
	TOTAL_CONSTANT_MEMORY            DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY            // Memory available on device for __constant__ variables in a CUDA C kernel in bytes
	WARP_SIZE                        DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_WARP_SIZE                        // Warp size in threads
	MAX_PITCH                        DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_PITCH                        // Maximum pitch in bytes allowed by memory copies
	MAX_REGISTERS_PER_BLOCK          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK          // Maximum number of 32-bit registers available per block
	CLOCK_RATE                       DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_CLOCK_RATE                       // Peak clock frequency in kilohertz
	TEXTURE_ALIGNMENT                DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT                // Alignment requirement for textures
	MULTIPROCESSOR_COUNT             DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT             // Number of multiprocessors on device
	KERNEL_EXEC_TIMEOUT              DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT              // Specifies whether there is a run time limit on kernels
	INTEGRATED                       DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_INTEGRATED                       // Device is integrated with host memory
	CAN_MAP_HOST_MEMORY              DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY              // Device can map host memory into CUDA address space
	COMPUTE_MODE                     DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE                     // Compute mode (See ::CUcomputemode for details)
	MAXIMUM_TEXTURE1D_WIDTH          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH          // Maximum 1D texture width
	MAXIMUM_TEXTURE2D_WIDTH          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH          // Maximum 2D texture width
	MAXIMUM_TEXTURE2D_HEIGHT         DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT         // Maximum 2D texture height
	MAXIMUM_TEXTURE3D_WIDTH          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH          // Maximum 3D texture width
	MAXIMUM_TEXTURE3D_HEIGHT         DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT         // Maximum 3D texture height
	MAXIMUM_TEXTURE3D_DEPTH          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH          // Maximum 3D texture depth
	MAXIMUM_TEXTURE2D_LAYERED_WIDTH  DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH  // Maximum 2D layered texture width
	MAXIMUM_TEXTURE2D_LAYERED_HEIGHT DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT // Maximum 2D layered texture height
	MAXIMUM_TEXTURE2D_LAYERED_LAYERS DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS // Maximum layers in a 2D layered texture
	SURFACE_ALIGNMENT                DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT                // Alignment requirement for surfaces
	CONCURRENT_KERNELS               DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS               // Device can possibly execute multiple kernels concurrently
	ECC_ENABLED                      DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_ECC_ENABLED                      // Device has ECC support enabled
	PCI_BUS_ID                       DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID                       // PCI bus ID of the device
	PCI_DEVICE_ID                    DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID                    // PCI device ID of the device
	TCC_DRIVER                       DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_TCC_DRIVER                       // Device is using TCC driver model
	MEMORY_CLOCK_RATE                DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE                // Peak memory clock frequency in kilohertz
	GLOBAL_MEMORY_BUS_WIDTH          DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH          // Global memory bus width in bits
	L2_CACHE_SIZE                    DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE                    // Size of L2 cache in bytes
	MAX_THREADS_PER_MULTIPROCESSOR   DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR   // Maximum resident threads per multiprocessor
	ASYNC_ENGINE_COUNT               DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT               // Number of asynchronous engines
	UNIFIED_ADDRESSING               DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING               // Device uses shares a unified address space with the host
	MAXIMUM_TEXTURE1D_LAYERED_WIDTH  DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH  // Maximum 1D layered texture width
	MAXIMUM_TEXTURE1D_LAYERED_LAYERS DeviceAttribute = C.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS // Maximum layers in a 1D layered texture
)

type DevicePtr

type DevicePtr uintptr

func Malloc

func Malloc(bytes int64) DevicePtr

type Result

type Result int

CUDA error status. CUDA error statuses are not returned by functions but checked and passed to panic() when not successful. If desired, they can be caught by recover().

const (
	SUCCESS                              Result = C.CUDA_SUCCESS
	ERROR_INVALID_VALUE                  Result = C.CUDA_ERROR_INVALID_VALUE
	ERROR_OUT_OF_MEMORY                  Result = C.CUDA_ERROR_OUT_OF_MEMORY
	ERROR_NOT_INITIALIZED                Result = C.CUDA_ERROR_NOT_INITIALIZED
	ERROR_DEINITIALIZED                  Result = C.CUDA_ERROR_DEINITIALIZED
	ERROR_PROFILER_DISABLED              Result = C.CUDA_ERROR_PROFILER_DISABLED
	ERROR_PROFILER_NOT_INITIALIZED       Result = C.CUDA_ERROR_PROFILER_NOT_INITIALIZED
	ERROR_PROFILER_ALREADY_STARTED       Result = C.CUDA_ERROR_PROFILER_ALREADY_STARTED
	ERROR_PROFILER_ALREADY_STOPPED       Result = C.CUDA_ERROR_PROFILER_ALREADY_STOPPED
	ERROR_NO_DEVICE                      Result = C.CUDA_ERROR_NO_DEVICE
	ERROR_INVALID_DEVICE                 Result = C.CUDA_ERROR_INVALID_DEVICE
	ERROR_INVALID_IMAGE                  Result = C.CUDA_ERROR_INVALID_IMAGE
	ERROR_INVALID_CONTEXT                Result = C.CUDA_ERROR_INVALID_CONTEXT
	ERROR_CONTEXT_ALREADY_CURRENT        Result = C.CUDA_ERROR_CONTEXT_ALREADY_CURRENT
	ERROR_MAP_FAILED                     Result = C.CUDA_ERROR_MAP_FAILED
	ERROR_UNMAP_FAILED                   Result = C.CUDA_ERROR_UNMAP_FAILED
	ERROR_ARRAY_IS_MAPPED                Result = C.CUDA_ERROR_ARRAY_IS_MAPPED
	ERROR_ALREADY_MAPPED                 Result = C.CUDA_ERROR_ALREADY_MAPPED
	ERROR_NO_BINARY_FOR_GPU              Result = C.CUDA_ERROR_NO_BINARY_FOR_GPU
	ERROR_ALREADY_ACQUIRED               Result = C.CUDA_ERROR_ALREADY_ACQUIRED
	ERROR_NOT_MAPPED                     Result = C.CUDA_ERROR_NOT_MAPPED
	ERROR_NOT_MAPPED_AS_ARRAY            Result = C.CUDA_ERROR_NOT_MAPPED_AS_ARRAY
	ERROR_NOT_MAPPED_AS_POINTER          Result = C.CUDA_ERROR_NOT_MAPPED_AS_POINTER
	ERROR_ECC_UNCORRECTABLE              Result = C.CUDA_ERROR_ECC_UNCORRECTABLE
	ERROR_UNSUPPORTED_LIMIT              Result = C.CUDA_ERROR_UNSUPPORTED_LIMIT
	ERROR_CONTEXT_ALREADY_IN_USE         Result = C.CUDA_ERROR_CONTEXT_ALREADY_IN_USE
	ERROR_INVALID_SOURCE                 Result = C.CUDA_ERROR_INVALID_SOURCE
	ERROR_FILE_NOT_FOUND                 Result = C.CUDA_ERROR_FILE_NOT_FOUND
	ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND Result = C.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND
	ERROR_SHARED_OBJECT_INIT_FAILED      Result = C.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
	ERROR_OPERATING_SYSTEM               Result = C.CUDA_ERROR_OPERATING_SYSTEM
	ERROR_INVALID_HANDLE                 Result = C.CUDA_ERROR_INVALID_HANDLE
	ERROR_NOT_FOUND                      Result = C.CUDA_ERROR_NOT_FOUND
	ERROR_NOT_READY                      Result = C.CUDA_ERROR_NOT_READY
	ERROR_LAUNCH_FAILED                  Result = C.CUDA_ERROR_LAUNCH_FAILED
	ERROR_LAUNCH_OUT_OF_RESOURCES        Result = C.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
	ERROR_LAUNCH_TIMEOUT                 Result = C.CUDA_ERROR_LAUNCH_TIMEOUT
	ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  Result = C.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING
	ERROR_PEER_ACCESS_ALREADY_ENABLED    Result = C.CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
	ERROR_PEER_ACCESS_NOT_ENABLED        Result = C.CUDA_ERROR_PEER_ACCESS_NOT_ENABLED
	ERROR_PRIMARY_CONTEXT_ACTIVE         Result = C.CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
	ERROR_CONTEXT_IS_DESTROYED           Result = C.CUDA_ERROR_CONTEXT_IS_DESTROYED
	ERROR_ASSERT                         Result = C.CUDA_ERROR_ASSERT
	ERROR_TOO_MANY_PEERS                 Result = C.CUDA_ERROR_TOO_MANY_PEERS
	ERROR_HOST_MEMORY_ALREADY_REGISTERED Result = C.CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
	ERROR_HOST_MEMORY_NOT_REGISTERED     Result = C.CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED
	ERROR_HARDWARE_STACK_ERROR           Result = 714 //C.CUDA_ERROR_HARDWARE_STACK_ERROR
	ERROR_ILLEGAL_INSTRUCTION            Result = 715 //C.CUDA_ERROR_ILLEGAL_INSTRUCTION
	ERROR_MISALIGNED_ADDRESS             Result = 716 //C.CUDA_ERROR_MISALIGNED_ADDRESS
	ERROR_INVALID_ADDRESS_SPACE          Result = 717 //C.CUDA_ERROR_INVALID_ADDRESS_SPACE
	ERROR_INVALID_PC                     Result = 718 //C.CUDA_ERROR_INVALID_PC
	ERROR_NOT_PERMITTED                  Result = 800 //C.CUDA_ERROR_NOT_PERMITTED
	ERROR_NOT_SUPPORTED                  Result = 801 //C.CUDA_ERROR_NOT_SUPPORTED
	ERROR_UNKNOWN                        Result = C.CUDA_ERROR_UNKNOWN
)

func (Result) String

func (err Result) String() string

Message string for the error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL