cudart

package
v0.0.0-...-c9f06ed Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 13, 2020 License: MIT Imports: 6 Imported by: 16

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func GetDeviceCount

func GetDeviceCount() (n int32, err error)

GetDeviceCount returns the number of devices.

func Malloc

func Malloc(mem cutil.Mem, sizet uint) error

Malloc will allocate memory to the device the size that was passed. It will also set the finalizer for GC

func Malloc3D

func Malloc3D(p *PitchedPtr, e Extent) error

Malloc3D - Allocates logical 1D, 2D, or 3D memory objects on the device.

func Malloc3dArray

func Malloc3dArray(a *Array, desc *ChannelFormatDesc, e Extent, flag ArrayFlag) error

Malloc3dArray - Allocate an array on the device.

func MallocArray

func MallocArray(a *Array, desc *ChannelFormatDesc, width, height uint, flag ArrayFlag) error

MallocArray - Allocate an array on the device.

func MallocEx

func MallocEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error

MallocEx is like Malloc but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like Malloc

func MallocHost

func MallocHost(mem cutil.Mem, sizet uint) error

MallocHost will allocate memory on the host for cuda use.

func MallocHostEx

func MallocHostEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error

MallocHostEx is like MallocHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocHost

func MallocManagedGlobal

func MallocManagedGlobal(mem cutil.Mem, size uint) error

MallocManagedGlobal Allocates memory on current devices.

func MallocManagedGlobalEx

func MallocManagedGlobalEx(w *gocu.Worker, mem cutil.Mem, size uint) error

MallocManagedGlobalEx is like MallocManagedGlobal but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedGlobal

func MallocManagedHost

func MallocManagedHost(mem cutil.Mem, size uint) error

MallocManagedHost uses the Unified memory mangement system and starts it off in the host. Memory is set to 0. It will also set a finalizer on the memory for GC

func MallocManagedHostEx

func MallocManagedHostEx(w *gocu.Worker, mem cutil.Mem, size uint) error

MallocManagedHostEx is like MallocManagedHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedHost

func Memcpy

func Memcpy(dest, src cutil.Pointer, sizet uint, kind MemcpyKind) error

Memcpy copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.

func Memcpy2D

func Memcpy2D(dest cutil.Pointer, dpitch uint, src cutil.Pointer, spitch uint, width, height uint, kind MemcpyKind) error

Memcpy2D copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.

func Memcpy3D

func Memcpy3D(m *Memcpy3DParams) error

Memcpy3D -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams

func Memcpy3DAsync

func Memcpy3DAsync(m *Memcpy3DParams, s gocu.Streamer) error

Memcpy3DAsync -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams

func MemcpyAsync

func MemcpyAsync(dest, src cutil.Pointer, sizet uint, kind MemcpyKind, stream gocu.Streamer) error

MemcpyAsync Copies data between host and device.

func MemcpyPeer

func MemcpyPeer(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint) error

MemcpyPeer Copies memory between two devices.

func MemcpyPeerAsync

func MemcpyPeerAsync(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint, stream gocu.Streamer) error

MemcpyPeerAsync copies memory between two devices async.

func MemcpyUS

func MemcpyUS(dest, src unsafe.Pointer, sizet uint, kind MemcpyKind) error

MemcpyUS will do a memcopy using unsafe pointers. It's a little lower level than the regular MemCpy

func Memset

func Memset(mem cutil.Mem, value int32, count uint) error

Memset sets the value for each byte in device memory

func MemsetUS

func MemsetUS(mem unsafe.Pointer, value int32, count uint) error

MemsetUS is like Memset but with unsafe.pointer

func SetValidDevices

func SetValidDevices(devices []Device) error

SetValidDevices takes a list of devices in terms of user priority for cuda execution

func SyncNillStream

func SyncNillStream() error

SyncNillStream will sync the nill stream

Types

type Array

type Array struct {
	// contains filtered or unexported fields
}

Array is a cudaArray_t

type ArrayFlag

type ArrayFlag C.uint

ArrayFlag are flags used for array

func (*ArrayFlag) Cubemap

func (a *ArrayFlag) Cubemap() ArrayFlag

Cubemap - Allocates a cubemap CUDA array. Width must be equal to height, and depth must be six. If the cudaArrayLayered flag is also set, depth must be a multiple of six.

func (*ArrayFlag) Default

func (a *ArrayFlag) Default() ArrayFlag

Default - This flag's value is defined to be 0 and provides default array allocation

func (*ArrayFlag) Layered

func (a *ArrayFlag) Layered() ArrayFlag

Layered - Allocates a layered CUDA array, with the depth extent indicating the number of layers

func (*ArrayFlag) SurfaceLoadStore

func (a *ArrayFlag) SurfaceLoadStore() ArrayFlag

SurfaceLoadStore - Allocates a CUDA array that could be read from or written to using a surface reference.

func (*ArrayFlag) TextureGather

func (a *ArrayFlag) TextureGather() ArrayFlag

TextureGather - This flag indicates that texture gather operations will be performed on the CUDA array. Texture gather can only be performed on 2D CUDA arrays.

type Atribs

type Atribs struct {
	Type    MemType
	Device  int32
	DPtr    unsafe.Pointer
	HPtr    unsafe.Pointer
	Managed bool
}

Atribs are a memories attributes on the device side

func PointerGetAttributes

func PointerGetAttributes(mem cutil.Pointer) (Atribs, error)

PointerGetAttributes returns the atributes

type ChannelFormatDesc

type ChannelFormatDesc C.struct_cudaChannelFormatDesc

ChannelFormatDesc describes a channels format

func CreateChannelFormatDesc

func CreateChannelFormatDesc(x, y, z, w int32, f ChannelFormatKind) ChannelFormatDesc

CreateChannelFormatDesc - Returns a channel descriptor with format f and number of bits of each component x, y, z, and w.

So a float needs to be 32bits.

unsigned is 8 ,32 bits

signed is 8,32 bits

type ChannelFormatKind

type ChannelFormatKind C.enum_cudaChannelFormatKind

ChannelFormatKind is the kind of format the channel is in

func (*ChannelFormatKind) Float

Float - sets the channel format to Float

func (*ChannelFormatKind) Signed

Signed - sets the channel format to Signed

func (*ChannelFormatKind) UnSigned

func (c *ChannelFormatKind) UnSigned() ChannelFormatKind

UnSigned - sets the channel format to UnSigned

type Device

type Device C.int

Device is a struct that holds a device info.

func CreateDevice

func CreateDevice(device int32) Device

CreateDevice just creates a device it doesn't set it

func GetDevice

func GetDevice() (Device, error)

GetDevice gets the currently set device being used

func (Device) AttrAsyncEngineCount

func (d Device) AttrAsyncEngineCount() (int, error)

AttrAsyncEngineCount - Number of asynchronous engines

func (Device) AttrCanFlushRemoteWrites

func (d Device) AttrCanFlushRemoteWrites() (int, error)

AttrCanFlushRemoteWrites - Device supports flushing of outstanding remote writes.

func (Device) AttrCanMapHostMemory

func (d Device) AttrCanMapHostMemory() (int, error)

AttrCanMapHostMemory - Device can map host memory into CUDA address space

func (Device) AttrCanUseHostPointerForRegisteredMem

func (d Device) AttrCanUseHostPointerForRegisteredMem() (int, error)

AttrCanUseHostPointerForRegisteredMem - Device can access host registered memory at the same virtual address as the CPU

func (Device) AttrClockRate

func (d Device) AttrClockRate() (int, error)

AttrClockRate - Peak clock frequency in kilohertz

func (Device) AttrComputeCapabilityMajor

func (d Device) AttrComputeCapabilityMajor() (int, error)

AttrComputeCapabilityMajor - Major compute capability version number

func (Device) AttrComputeCapabilityMinor

func (d Device) AttrComputeCapabilityMinor() (int, error)

AttrComputeCapabilityMinor - Minor compute capability version number

func (Device) AttrComputeMode

func (d Device) AttrComputeMode() (int, error)

AttrComputeMode - Compute mode (See cudaComputeMode for details)

func (Device) AttrComputePreemptionSupported

func (d Device) AttrComputePreemptionSupported() (int, error)

AttrComputePreemptionSupported - Device supports Compute Preemption

func (Device) AttrConcurrentKernels

func (d Device) AttrConcurrentKernels() (int, error)

AttrConcurrentKernels - Device can possibly execute multiple kernels concurrently

func (Device) AttrConcurrentManagedAccess

func (d Device) AttrConcurrentManagedAccess() (int, error)

AttrConcurrentManagedAccess - Device can coherently access managed memory concurrently with the CPU

func (Device) AttrCooperativeLaunch

func (d Device) AttrCooperativeLaunch() (int, error)

AttrCooperativeLaunch - Device supports launching cooperative kernels via cudaLaunchCooperativeKernel

func (Device) AttrCooperativeMultiDeviceLaunch

func (d Device) AttrCooperativeMultiDeviceLaunch() (int, error)

AttrCooperativeMultiDeviceLaunch - Device can participate in cooperative kernels launched via cudaLaunchCooperativeKernelMultiDevice

func (Device) AttrDirectManagedMemAccessFromHost

func (d Device) AttrDirectManagedMemAccessFromHost() (int, error)

AttrDirectManagedMemAccessFromHost - Host can directly access managed memory on the device without migration.

func (Device) AttrEccEnabled

func (d Device) AttrEccEnabled() (int, error)

AttrEccEnabled - Device has ECC support enabled

func (Device) AttrGlobalL1CacheSupported

func (d Device) AttrGlobalL1CacheSupported() (int, error)

AttrGlobalL1CacheSupported - Device supports caching globals in L1

func (Device) AttrGlobalMemoryBusWidth

func (d Device) AttrGlobalMemoryBusWidth() (int, error)

AttrGlobalMemoryBusWidth - Global memory bus width in bits

func (Device) AttrGpuOverlap

func (d Device) AttrGpuOverlap() (int, error)

AttrGpuOverlap - Device can possibly copy memory and execute a kernel concurrently

func (Device) AttrHostNativeAtomicSupported

func (d Device) AttrHostNativeAtomicSupported() (int, error)

AttrHostNativeAtomicSupported - Link between the device and the host supports native atomic operations

func (Device) AttrHostRegisterSupported

func (d Device) AttrHostRegisterSupported() (int, error)

AttrHostRegisterSupported - Device supports host memory registration via cudaHostRegister.

func (Device) AttrIntegrated

func (d Device) AttrIntegrated() (int, error)

AttrIntegrated - Device is integrated with host memory

func (Device) AttrIsMultiGpuBoard

func (d Device) AttrIsMultiGpuBoard() (int, error)

AttrIsMultiGpuBoard - Device is on a multi-GPU board

func (Device) AttrKernelExecTimeout

func (d Device) AttrKernelExecTimeout() (int, error)

AttrKernelExecTimeout - Specifies whether there is a run time limit on kernels

func (Device) AttrL2CacheSize

func (d Device) AttrL2CacheSize() (int, error)

AttrL2CacheSize - Size of L2 cache in bytes

func (Device) AttrLocalL1CacheSupported

func (d Device) AttrLocalL1CacheSupported() (int, error)

AttrLocalL1CacheSupported - Device supports caching locals in L1

func (Device) AttrManagedMemory

func (d Device) AttrManagedMemory() (int, error)

AttrManagedMemory - Device can allocate managed memory on this system

func (Device) AttrMaxBlockDimX

func (d Device) AttrMaxBlockDimX() (int, error)

AttrMaxBlockDimX - Maximum block dimension X

func (Device) AttrMaxBlockDimY

func (d Device) AttrMaxBlockDimY() (int, error)

AttrMaxBlockDimY - Maximum block dimension Y

func (Device) AttrMaxBlockDimZ

func (d Device) AttrMaxBlockDimZ() (int, error)

AttrMaxBlockDimZ - Maximum block dimension Z

func (Device) AttrMaxGridDimX

func (d Device) AttrMaxGridDimX() (int, error)

AttrMaxGridDimX - Maximum grid dimension X

func (Device) AttrMaxGridDimY

func (d Device) AttrMaxGridDimY() (int, error)

AttrMaxGridDimY - Maximum grid dimension Y

func (Device) AttrMaxGridDimZ

func (d Device) AttrMaxGridDimZ() (int, error)

AttrMaxGridDimZ - Maximum grid dimension Z

func (Device) AttrMaxPitch

func (d Device) AttrMaxPitch() (int, error)

AttrMaxPitch - Maximum pitch in bytes allowed by memory copies

func (Device) AttrMaxRegistersPerBlock

func (d Device) AttrMaxRegistersPerBlock() (int, error)

AttrMaxRegistersPerBlock - Maximum number of 32-bit registers available per block

func (Device) AttrMaxRegistersPerMultiprocessor

func (d Device) AttrMaxRegistersPerMultiprocessor() (int, error)

AttrMaxRegistersPerMultiprocessor - Maximum number of 32-bit registers available per multiprocessor

func (Device) AttrMaxSharedMemoryPerBlock

func (d Device) AttrMaxSharedMemoryPerBlock() (int, error)

AttrMaxSharedMemoryPerBlock - Maximum shared memory available per block in bytes

func (Device) AttrMaxSharedMemoryPerBlockOptin

func (d Device) AttrMaxSharedMemoryPerBlockOptin() (int, error)

AttrMaxSharedMemoryPerBlockOptin - The maximum optin shared memory per block. This value may vary by chip. See cudaFuncSetAttribute

func (Device) AttrMaxSharedMemoryPerMultiprocessor

func (d Device) AttrMaxSharedMemoryPerMultiprocessor() (int, error)

AttrMaxSharedMemoryPerMultiprocessor - Maximum shared memory available per multiprocessor in bytes

func (Device) AttrMaxSurface1DLayeredLayers

func (d Device) AttrMaxSurface1DLayeredLayers() (int, error)

AttrMaxSurface1DLayeredLayers - Maximum layers in a 1D layered surface

func (Device) AttrMaxSurface1DLayeredWidth

func (d Device) AttrMaxSurface1DLayeredWidth() (int, error)

AttrMaxSurface1DLayeredWidth - Maximum 1D layered surface width

func (Device) AttrMaxSurface1DWidth

func (d Device) AttrMaxSurface1DWidth() (int, error)

AttrMaxSurface1DWidth - Maximum 1D surface width

func (Device) AttrMaxSurface2DHeight

func (d Device) AttrMaxSurface2DHeight() (int, error)

AttrMaxSurface2DHeight - Maximum 2D surface height

func (Device) AttrMaxSurface2DLayeredHeight

func (d Device) AttrMaxSurface2DLayeredHeight() (int, error)

AttrMaxSurface2DLayeredHeight - Maximum 2D layered surface height

func (Device) AttrMaxSurface2DLayeredLayers

func (d Device) AttrMaxSurface2DLayeredLayers() (int, error)

AttrMaxSurface2DLayeredLayers - Maximum layers in a 2D layered surface

func (Device) AttrMaxSurface2DLayeredWidth

func (d Device) AttrMaxSurface2DLayeredWidth() (int, error)

AttrMaxSurface2DLayeredWidth - Maximum 2D layered surface width

func (Device) AttrMaxSurface2DWidth

func (d Device) AttrMaxSurface2DWidth() (int, error)

AttrMaxSurface2DWidth - Maximum 2D surface width

func (Device) AttrMaxSurface3DDepth

func (d Device) AttrMaxSurface3DDepth() (int, error)

AttrMaxSurface3DDepth - Maximum 3D surface depth

func (Device) AttrMaxSurface3DHeight

func (d Device) AttrMaxSurface3DHeight() (int, error)

AttrMaxSurface3DHeight - Maximum 3D surface height

func (Device) AttrMaxSurface3DWidth

func (d Device) AttrMaxSurface3DWidth() (int, error)

AttrMaxSurface3DWidth - Maximum 3D surface width

func (Device) AttrMaxSurfaceCubemapLayeredLayers

func (d Device) AttrMaxSurfaceCubemapLayeredLayers() (int, error)

AttrMaxSurfaceCubemapLayeredLayers - Maximum layers in a cubemap layered surface

func (Device) AttrMaxSurfaceCubemapLayeredWidth

func (d Device) AttrMaxSurfaceCubemapLayeredWidth() (int, error)

AttrMaxSurfaceCubemapLayeredWidth - Maximum cubemap layered surface width

func (Device) AttrMaxSurfaceCubemapWidth

func (d Device) AttrMaxSurfaceCubemapWidth() (int, error)

AttrMaxSurfaceCubemapWidth - Maximum cubemap surface width

func (Device) AttrMaxTexture1DLayeredLayers

func (d Device) AttrMaxTexture1DLayeredLayers() (int, error)

AttrMaxTexture1DLayeredLayers - Maximum layers in a 1D layered texture

func (Device) AttrMaxTexture1DLayeredWidth

func (d Device) AttrMaxTexture1DLayeredWidth() (int, error)

AttrMaxTexture1DLayeredWidth - Maximum 1D layered texture width

func (Device) AttrMaxTexture1DLinearWidth

func (d Device) AttrMaxTexture1DLinearWidth() (int, error)

AttrMaxTexture1DLinearWidth - Maximum 1D linear texture width

func (Device) AttrMaxTexture1DMipmappedWidth

func (d Device) AttrMaxTexture1DMipmappedWidth() (int, error)

AttrMaxTexture1DMipmappedWidth - Maximum mipmapped 1D texture width

func (Device) AttrMaxTexture1DWidth

func (d Device) AttrMaxTexture1DWidth() (int, error)

AttrMaxTexture1DWidth - Maximum 1D texture width

func (Device) AttrMaxTexture2DGatherHeight

func (d Device) AttrMaxTexture2DGatherHeight() (int, error)

AttrMaxTexture2DGatherHeight - Maximum 2D texture height if cudaArrayTextureGather is set

func (Device) AttrMaxTexture2DGatherWidth

func (d Device) AttrMaxTexture2DGatherWidth() (int, error)

AttrMaxTexture2DGatherWidth - Maximum 2D texture width if cudaArrayTextureGather is set

func (Device) AttrMaxTexture2DHeight

func (d Device) AttrMaxTexture2DHeight() (int, error)

AttrMaxTexture2DHeight - Maximum 2D texture height

func (Device) AttrMaxTexture2DLayeredHeight

func (d Device) AttrMaxTexture2DLayeredHeight() (int, error)

AttrMaxTexture2DLayeredHeight - Maximum 2D layered texture height

func (Device) AttrMaxTexture2DLayeredLayers

func (d Device) AttrMaxTexture2DLayeredLayers() (int, error)

AttrMaxTexture2DLayeredLayers - Maximum layers in a 2D layered texture

func (Device) AttrMaxTexture2DLayeredWidth

func (d Device) AttrMaxTexture2DLayeredWidth() (int, error)

AttrMaxTexture2DLayeredWidth - Maximum 2D layered texture width

func (Device) AttrMaxTexture2DLinearHeight

func (d Device) AttrMaxTexture2DLinearHeight() (int, error)

AttrMaxTexture2DLinearHeight - Maximum 2D linear texture height

func (Device) AttrMaxTexture2DLinearPitch

func (d Device) AttrMaxTexture2DLinearPitch() (int, error)

AttrMaxTexture2DLinearPitch - Maximum 2D linear texture pitch in bytes

func (Device) AttrMaxTexture2DLinearWidth

func (d Device) AttrMaxTexture2DLinearWidth() (int, error)

AttrMaxTexture2DLinearWidth - Maximum 2D linear texture width

func (Device) AttrMaxTexture2DMipmappedHeight

func (d Device) AttrMaxTexture2DMipmappedHeight() (int, error)

AttrMaxTexture2DMipmappedHeight - Maximum mipmapped 2D texture height

func (Device) AttrMaxTexture2DMipmappedWidth

func (d Device) AttrMaxTexture2DMipmappedWidth() (int, error)

AttrMaxTexture2DMipmappedWidth - Maximum mipmapped 2D texture width

func (Device) AttrMaxTexture2DWidth

func (d Device) AttrMaxTexture2DWidth() (int, error)

AttrMaxTexture2DWidth - Maximum 2D texture width

func (Device) AttrMaxTexture3DDepth

func (d Device) AttrMaxTexture3DDepth() (int, error)

AttrMaxTexture3DDepth - Maximum 3D texture depth

func (Device) AttrMaxTexture3DDepthAlt

func (d Device) AttrMaxTexture3DDepthAlt() (int, error)

AttrMaxTexture3DDepthAlt - Alternate maximum 3D texture depth

func (Device) AttrMaxTexture3DHeight

func (d Device) AttrMaxTexture3DHeight() (int, error)

AttrMaxTexture3DHeight - Maximum 3D texture height

func (Device) AttrMaxTexture3DHeightAlt

func (d Device) AttrMaxTexture3DHeightAlt() (int, error)

AttrMaxTexture3DHeightAlt - Alternate maximum 3D texture height

func (Device) AttrMaxTexture3DWidth

func (d Device) AttrMaxTexture3DWidth() (int, error)

AttrMaxTexture3DWidth - Maximum 3D texture width

func (Device) AttrMaxTexture3DWidthAlt

func (d Device) AttrMaxTexture3DWidthAlt() (int, error)

AttrMaxTexture3DWidthAlt - Alternate maximum 3D texture width

func (Device) AttrMaxTextureCubemapLayeredLayers

func (d Device) AttrMaxTextureCubemapLayeredLayers() (int, error)

AttrMaxTextureCubemapLayeredLayers - Maximum layers in a cubemap layered texture

func (Device) AttrMaxTextureCubemapLayeredWidth

func (d Device) AttrMaxTextureCubemapLayeredWidth() (int, error)

AttrMaxTextureCubemapLayeredWidth - Maximum cubemap layered texture width/height

func (Device) AttrMaxTextureCubemapWidth

func (d Device) AttrMaxTextureCubemapWidth() (int, error)

AttrMaxTextureCubemapWidth - Maximum cubemap texture width/height

func (Device) AttrMaxThreadsPerBlock

func (d Device) AttrMaxThreadsPerBlock() (int, error)

AttrMaxThreadsPerBlock - Maximum number of threads per block

func (Device) AttrMaxThreadsPerMultiProcessor

func (d Device) AttrMaxThreadsPerMultiProcessor() (int, error)

AttrMaxThreadsPerMultiProcessor - Maximum resident threads per multiprocessor

func (Device) AttrMemoryClockRate

func (d Device) AttrMemoryClockRate() (int, error)

AttrMemoryClockRate - Peak memory clock frequency in kilohertz

func (Device) AttrMultiGpuBoardGroupID

func (d Device) AttrMultiGpuBoardGroupID() (int, error)

AttrMultiGpuBoardGroupID - Unique identifier for a group of devices on the same multi-GPU board

func (Device) AttrMultiProcessorCount

func (d Device) AttrMultiProcessorCount() (int, error)

AttrMultiProcessorCount - Number of multiprocessors on device

func (Device) AttrPageableMemoryAccess

func (d Device) AttrPageableMemoryAccess() (int, error)

AttrPageableMemoryAccess - Device supports coherently accessing pageable memory without calling cudaHostRegister on it

func (Device) AttrPageableMemoryAccessUsesHostPageTables

func (d Device) AttrPageableMemoryAccessUsesHostPageTables() (int, error)

AttrPageableMemoryAccessUsesHostPageTables - Device accesses pageable memory via the host page tables.

func (Device) AttrPciBusID

func (d Device) AttrPciBusID() (int, error)

AttrPciBusID - PCI bus ID of the device

func (Device) AttrPciDeviceID

func (d Device) AttrPciDeviceID() (int, error)

AttrPciDeviceID - PCI device ID of the device

func (Device) AttrPciDomainID

func (d Device) AttrPciDomainID() (int, error)

AttrPciDomainID - PCI domain ID of the device

func (Device) AttrSingleToDoublePrecisionPerfRatio

func (d Device) AttrSingleToDoublePrecisionPerfRatio() (int, error)

AttrSingleToDoublePrecisionPerfRatio - Ratio of single precision performance (in floating-point operations per second) to double precision performance

func (Device) AttrStreamPrioritiesSupported

func (d Device) AttrStreamPrioritiesSupported() (int, error)

AttrStreamPrioritiesSupported - Device supports stream priorities

func (Device) AttrSurfaceAlignment

func (d Device) AttrSurfaceAlignment() (int, error)

AttrSurfaceAlignment - Alignment requirement for surfaces

func (Device) AttrTccDriver

func (d Device) AttrTccDriver() (int, error)

AttrTccDriver - Device is using TCC driver model

func (Device) AttrTextureAlignment

func (d Device) AttrTextureAlignment() (int, error)

AttrTextureAlignment - Alignment requirement for textures

func (Device) AttrTexturePitchAlignment

func (d Device) AttrTexturePitchAlignment() (int, error)

AttrTexturePitchAlignment - Pitch alignment requirement for textures

func (Device) AttrTotalConstantMemory

func (d Device) AttrTotalConstantMemory() (int, error)

AttrTotalConstantMemory - Memory available on device for __constant__ variables in a CUDA C kernel in bytes

func (Device) AttrUnifiedAddressing

func (d Device) AttrUnifiedAddressing() (int, error)

AttrUnifiedAddressing - Device shares a unified address space with the host

func (Device) AttrWarpSize

func (d Device) AttrWarpSize() (int, error)

AttrWarpSize - Warp size in threads

func (Device) CanAccessPeer

func (d Device) CanAccessPeer(peer Device) (bool, error)

CanAccessPeer checks to see if peer's memory can be accessed by device called by method. Deivce calling method doesn't get set.

func (Device) DeviceSync

func (d Device) DeviceSync() error

DeviceSync Blocks until the device has completed all preceding requested tasks. DeviceSync() returns an error if one of the preceding tasks has failed. If the cudaDeviceScheduleBlockingSync flag was set for this device, the host thread will block until the device has finished its work. Will Set Device

func (Device) DisablePeerAccess

func (d Device) DisablePeerAccess(peer Device) error

DisablePeerAccess check cudaDeviceDisablePeerAccess Device calling method will be set

func (Device) EnablePeerAccess

func (d Device) EnablePeerAccess(peer Device) error

EnablePeerAccess enables memory access between device Device calling method will be set

func (Device) Major

func (d Device) Major() (int, error)

Major returns the major compute capability of device

func (Device) MaxBlockDimXYZ

func (d Device) MaxBlockDimXYZ() ([]int32, error)

MaxBlockDimXYZ returns an array of the values of blocks xyz in that order and an error

Will not set device

func (Device) MaxGridDimXYZ

func (d Device) MaxGridDimXYZ() ([]int32, error)

MaxGridDimXYZ returns an array of the values of blocks xyz in that order and an error Will not set device

func (Device) MaxThreadsPerBlock

func (d Device) MaxThreadsPerBlock() (int32, error)

MaxThreadsPerBlock returns the max number of threads per block and the rutime error Will not set device

func (Device) MaxThreadsPerMultiProcessor

func (d Device) MaxThreadsPerMultiProcessor() (int32, error)

MaxThreadsPerMultiProcessor returns the number of threads that run a multiprocessor on device and the runtime error Will not set device

func (Device) MemGetInfo

func (d Device) MemGetInfo() (free, total int, err error)

MemGetInfo returns the free and total memory for device called Will Set Device

func (Device) MemPrefetchAsync

func (d Device) MemPrefetchAsync(mem cutil.Mem, size uint, s gocu.Streamer) error

MemPrefetchAsync - Prefetches memory to the specified destination device.

From Cuda Documentation:

Prefetches memory to the specified destination device. devPtr is the base device pointer of the memory to be prefetched and dstDevice is the destination device. count specifies the number of bytes to copy. stream is the stream in which the operation is enqueued. The memory range must refer to managed memory allocated via cudaMallocManaged or declared via __managed__ variables.

Passing in cudaCpuDeviceId for dstDevice will prefetch the data to host memory. If dstDevice is a GPU, then the device attribute cudaDevAttrConcurrentManagedAccess must be non-zero. Additionally, stream must be associated with a device that has a non-zero value for the device attribute cudaDevAttrConcurrentManagedAccess.

The start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the prefetch operation is enqueued in the stream.

If no physical memory has been allocated for this region, then this memory region will be populated and mapped on the destination device. If there's insufficient memory to prefetch the desired region, the Unified Memory driver may evict pages from other cudaMallocManaged allocations to host memory in order to make room. Device memory allocated using cudaMalloc or cudaMallocArray will not be evicted.

By default, any mappings to the previous location of the migrated pages are removed and mappings for the new location are only setup on dstDevice. The exact behavior however also depends on the settings applied to this memory range via cudaMemAdvise as described below:

If cudaMemAdviseSetReadMostly was set on any subset of this memory range, then that subset will create a read-only copy of the pages on dstDevice.

If cudaMemAdviseSetPreferredLocation was called on any subset of this memory range, then the pages will be migrated to dstDevice even if dstDevice is not the preferred location of any pages in the memory range.

If cudaMemAdviseSetAccessedBy was called on any subset of this memory range, then mappings to those pages from all the appropriate processors are updated to refer to the new location if establishing such a mapping is possible. Otherwise, those mappings are cleared.

Note that this API is not required for functionality and only serves to improve performance by allowing the application to migrate data to a suitable location before it is accessed. Memory accesses to this range are always coherent and are allowed even when the data is actively being migrated.

Note that this function is asynchronous with respect to the host and all work on other devices.

func (Device) Minor

func (d Device) Minor() (int, error)

Minor returns the minor comnute capability of device

func (Device) MultiProcessorCount

func (d Device) MultiProcessorCount() (int32, error)

MultiProcessorCount returns the number of multiproccessors on device and the runtime error Will not set device

func (Device) Reset

func (d Device) Reset() error

Reset resets the device. If device isn't set on current host thread. This function will auto set it. Make sure that the device that was currently using the host thread is set back onto host

func (Device) Set

func (d Device) Set() error

Set sets the device to use. This will change the device that is residing on the current host thread. There is no sychronization, with the previous or new device on the host thread.

type Error

type Error struct {
	// Context is typically a C function name.
	Context string

	// Name is the C constant name for the error,
	// such as "CURAND_STATUS_INTERNAL_ERROR".
	Name string

	// Message is the main error message.
	//
	// This may be human-readable, although it may often be
	// the same as Name.
	Message string
}

Error is a CUDA-related error.

func (*Error) Error

func (e *Error) Error() string

Error generates a message "context: message".

type Event

type Event struct {
	// contains filtered or unexported fields
}

Event is a cuda event

func CreateEvent

func CreateEvent() (event *Event, err error)

CreateEvent will create and return an Event

func (*Event) ElapsedTime

func (e *Event) ElapsedTime(previous *Event) (float32, error)

ElapsedTime takes the current event and compares it to a previous event and returns the time difference. in ms

func (*Event) Record

func (e *Event) Record(s gocu.Streamer) error

Record records an event

func (*Event) Status

func (e *Event) Status() (bool, error)

Status is the function cudaEventQuery. I didn't like the name and how the function was handled. error will returned as nil if cudaSuccess and cudaErrorNotReady are returned. It will return a 1 of event is completed. It will return a 0 if event is not complete

func (*Event) Sync

func (e *Event) Sync() error

Sync waits for an event to complete

type Extent

type Extent C.struct_cudaExtent

Extent is a cuda struct cudaExtent

func MakeCudaExtent

func MakeCudaExtent(w, h, d uint) Extent

MakeCudaExtent -returns a cudaExtent based on input parameters.

func (Extent) Depth

func (e Extent) Depth() uint

Depth returns e.depth

func (Extent) Height

func (e Extent) Height() uint

Height returns e.height

func (Extent) Width

func (e Extent) Width() uint

Width returns e.width

type MemAttach

type MemAttach C.uint

MemAttach - This is a new type derived from a list of the defines for cudart

func (*MemAttach) Global

func (m *MemAttach) Global() MemAttach

Global sets m to Global and returns m - Memory can be accessed by any stream on any device

func (*MemAttach) Host

func (m *MemAttach) Host() MemAttach

Host sets m to Active and returns m - Memory cannot be accessed by any stream on any device

func (*MemAttach) Single

func (m *MemAttach) Single() MemAttach

Single sets m to Single and returns m - Memory can only be accessed by a single stream on the associated device

func (MemAttach) String

func (m MemAttach) String() string

type MemManager

type MemManager struct {
	// contains filtered or unexported fields
}

MemManager allocates memory to a cuda context/device under the unified memory management, and handles memory copies between memory under the unified memory mangement, and copies to and from Go memory.

func CreateMemManager

func CreateMemManager(w *gocu.Worker) (*MemManager, error)

CreateMemManager creates an allocator that is bounded to cudas unified memory management.

func (*MemManager) AsyncCopy

func (m *MemManager) AsyncCopy(dest, src cutil.Pointer, sib uint, s gocu.Streamer) error

AsyncCopy does an AsyncCopy with the mem manager.

func (*MemManager) Copy

func (m *MemManager) Copy(dest, src cutil.Pointer, sib uint) error

Copy copies memory with amount of bytes passed in sib from src to dest

func (*MemManager) Malloc

func (m *MemManager) Malloc(sib uint) (cuda cutil.Mem, err error)

Malloc allocates memory to either the host or the device. sib = size in bytes

func (*MemManager) SetHost

func (m *MemManager) SetHost(onhost bool)

SetHost sets a host allocation flag. SetHost can be changed at anytime.

	-onhost=true all mallocs with allocator will allocate to host
 -onhost=false all mallocs with allocator will allocate to device assigned to allocater. (default)

type MemType

type MemType C.cudaMemoryType

MemType is a typedefed C.cudaMemoryType

type Memcpy3DParams

type Memcpy3DParams C.struct_cudaMemcpy3DParms

Memcpy3DParams is used for Memcpy3d

func CreateMemcpy3DParams

func CreateMemcpy3DParams(srcArray *Array, srcPos Pos, srcPtr PitchedPtr, dstArray *Array, dstPos Pos, dstPtr PitchedPtr, ext Extent, kind MemcpyKind) (m *Memcpy3DParams)

CreateMemcpy3DParams srcpp and destpp are optional and can be zero

type MemcpyKind

type MemcpyKind C.enum_cudaMemcpyKind

MemcpyKind are enum flags for mem copy can be passed using methdos

func (*MemcpyKind) Default

func (m *MemcpyKind) Default() MemcpyKind

Default return MemcpyKind(C.cudaMemcpyDefault )

func (*MemcpyKind) DeviceToDevice

func (m *MemcpyKind) DeviceToDevice() MemcpyKind

DeviceToDevice return MemcpyKind(C.cudaMemcpyDeviceToDevice )

func (*MemcpyKind) DeviceToHost

func (m *MemcpyKind) DeviceToHost() MemcpyKind

DeviceToHost return MemcpyKind(C.cudaMemcpyDeviceToHost )

func (*MemcpyKind) HostToDevice

func (m *MemcpyKind) HostToDevice() MemcpyKind

HostToDevice return MemcpyKind(C.cudaMemcpyHostToDevice )

func (*MemcpyKind) HostToHost

func (m *MemcpyKind) HostToHost() MemcpyKind

HostToHost return MemcpyKind(C.cudaMemcpyHostToHost )

type PitchedPtr

type PitchedPtr C.struct_cudaPitchedPtr

PitchedPtr is a cudaPitchedPtr

func MakeCudaPitchedPtr

func MakeCudaPitchedPtr(ptr cutil.Pointer, pitch, xsize, ysize uint) PitchedPtr

MakeCudaPitchedPtr makes a pitched pointer

func (PitchedPtr) Pitch

func (p PitchedPtr) Pitch() uint

Pitch returns the pitch

func (PitchedPtr) Pointer

func (p PitchedPtr) Pointer() cutil.Pointer

Pointer returns the ptiched pointer

func (*PitchedPtr) Ptr

func (p *PitchedPtr) Ptr() unsafe.Pointer

Ptr satisfies the cutil.Pointer interface

func (PitchedPtr) Xsize

func (p PitchedPtr) Xsize() uint

Xsize returns the xsize

func (PitchedPtr) Ysize

func (p PitchedPtr) Ysize() uint

Ysize returns the ysize

type Pos

type Pos C.struct_cudaPos

Pos is a cuda struct cudaPos

func MakeCudaPos

func MakeCudaPos(x, y, z uint) Pos

MakeCudaPos returns a cudaPos based on input parameters.

func (Pos) X

func (p Pos) X() uint

X returns x position

func (Pos) Y

func (p Pos) Y() uint

Y returns y position

func (Pos) Z

func (p Pos) Z() uint

Z returns z position

type Stream

type Stream struct {
	// contains filtered or unexported fields
}

Stream holds a C.cudaStream_t

func CreateBlockingPriorityStream

func CreateBlockingPriorityStream(priority int32) (*Stream, error)

CreateBlockingPriorityStream creates a blocking stream

func CreateBlockingStream

func CreateBlockingStream() (*Stream, error)

CreateBlockingStream creats an asyncronus stream stream for the user

func CreateNonBlockingPriorityStream

func CreateNonBlockingPriorityStream(priority int32) (*Stream, error)

CreateNonBlockingPriorityStream creates a non blocking Priority Stream

func CreateNonBlockingStream

func CreateNonBlockingStream() (*Stream, error)

CreateNonBlockingStream creates a blocking stream

func ExternalWrapper

func ExternalWrapper(x unsafe.Pointer) *Stream

ExternalWrapper is used for other packages that might return a C.cudaStream_t

func (*Stream) AttachMemAsync

func (s *Stream) AttachMemAsync(mem cutil.Pointer, size uint, attachmode MemAttach) error

AttachMemAsync - Enqueues an operation in stream to specify stream association of length bytes of memory starting from devPtr. This function is a stream-ordered operation, meaning that it is dependent on, and will only take effect when, previous work in stream has completed. Any previous association is automatically replaced.

From Cuda documentation:

devPtr must point to an one of the following types of memories:

managed memory declared using the __managed__ keyword or allocated with cudaMallocManaged.

a valid host-accessible region of system-allocated pageable memory. This type of memory may only be specified if the device associated with the stream reports a non-zero value for the device attribute cudaDevAttrPageableMemoryAccess.

For managed allocations, length must be either zero or the entire allocation's size. Both indicate that the entire allocation's stream association is being changed. Currently, it is not possible to change stream association for a portion of a managed allocation.

For pageable allocations, length must be non-zero.

The stream association is specified using flags which must be one of cudaMemAttachGlobal, cudaMemAttachHost or cudaMemAttachSingle. The default value for flags is cudaMemAttachSingle If the cudaMemAttachGlobal flag is specified, the memory can be accessed by any stream on any device. If the cudaMemAttachHost flag is specified, the program makes a guarantee that it won't access the memory on the device from any stream on a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess. If the cudaMemAttachSingle flag is specified and stream is associated with a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess, the program makes a guarantee that it will only access the memory on the device from stream. It is illegal to attach singly to the NULL stream, because the NULL stream is a virtual global stream and not a specific stream. An error will be returned in this case.

When memory is associated with a single stream, the Unified Memory system will allow CPU access to this memory region so long as all operations in stream have completed, regardless of whether other streams are active. In effect, this constrains exclusive ownership of the managed memory region by an active GPU to per-stream activity instead of whole-GPU activity.

Accessing memory on the device from streams that are not associated with it will produce undefined results. No error checking is performed by the Unified Memory system to ensure that kernels launched into other streams do not access this region.

It is a program's responsibility to order calls to cudaStreamAttachMemAsync via events, synchronization or other means to ensure legal access to memory at all times. Data visibility and coherency will be changed appropriately for all kernels which follow a stream-association change.

If stream is destroyed while data is associated with it, the association is removed and the association reverts to the default visibility of the allocation as specified at cudaMallocManaged. For __managed__ variables, the default association is always cudaMemAttachGlobal. Note that destroying a stream is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has completed.

func (*Stream) Ptr

func (s *Stream) Ptr() unsafe.Pointer

Ptr returns an unsafe pointer to the hidden stream. This allows stream to be used with other cuda libraries in other go packages so if a C function calls for a Pointer then you can type case the unsafe pointer into a (C.cudaStream_t)(unsafe.Pointer)

func (*Stream) Query

func (s *Stream) Query() (b bool, err error)

Query - Queries an asynchronous stream for completion status.

returns true if ready, false if not.

if an error occures err will not be nil

func (*Stream) Sync

func (s *Stream) Sync() error

Sync Syncronizes the stream

func (*Stream) WaitEvent

func (s *Stream) WaitEvent(event *Event, flags uint32) error

WaitEvent - Make a compute stream wait on an event.

Flags must be zero

type StreamCaptureMode

type StreamCaptureMode C.enum_cudaStreamCaptureMode

StreamCaptureMode - Possible modes for stream capture thread interactions

func (*StreamCaptureMode) Global

Global sets s to global and returns s

func (*StreamCaptureMode) Relaxed

func (s *StreamCaptureMode) Relaxed() StreamCaptureMode

Relaxed sets s to Relaxed and returns s

func (StreamCaptureMode) String

func (s StreamCaptureMode) String() string

func (*StreamCaptureMode) ThreadLocal

func (s *StreamCaptureMode) ThreadLocal() StreamCaptureMode

ThreadLocal sets s to ThreadLocal and returns s

type StreamCaptureStatus

type StreamCaptureStatus C.enum_cudaStreamCaptureStatus

StreamCaptureStatus - Possible stream capture statuses returned by cudaStreamIsCapturing Even though this is for returns. I think this can still be used for switches.

func (*StreamCaptureStatus) Active

Active sets s to Active and returns s

func (*StreamCaptureStatus) Invalid

Invalid sets s to Invalid and returns s

func (*StreamCaptureStatus) None

None sets s to None and returns s

func (StreamCaptureStatus) String

func (s StreamCaptureStatus) String() string

Directories

Path Synopsis
Package crtutil allows cudart to work with Go's io Reader and Writer interfaces.
Package crtutil allows cudart to work with Go's io Reader and Writer interfaces.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL