| Package | Description |
|---|---|
| org.bytedeco.cuda.cudart | |
| org.bytedeco.cuda.cudnn | |
| org.bytedeco.cuda.global | |
| org.bytedeco.cuda.nppc |
| Modifier and Type | Method and Description |
|---|---|
CUstream_st |
CUDA_LAUNCH_PARAMS.hStream()
Stream identifier
|
CUstream_st |
cudaLaunchParams.stream()
Stream identifier
|
| Modifier and Type | Method and Description |
|---|---|
void |
CUstreamCallback.call(CUstream_st hStream,
int status,
Pointer userData) |
void |
cudaStreamCallback_t.call(CUstream_st stream,
int status,
Pointer userData) |
CUDA_LAUNCH_PARAMS |
CUDA_LAUNCH_PARAMS.hStream(CUstream_st setter) |
cudaLaunchParams |
cudaLaunchParams.stream(CUstream_st setter) |
| Modifier and Type | Method and Description |
|---|---|
CUstream_st |
cudnnDebug_t.stream() |
| Modifier and Type | Method and Description |
|---|---|
cudnnDebug_t |
cudnnDebug_t.stream(CUstream_st setter) |
| Modifier and Type | Field and Description |
|---|---|
static CUstream_st |
cudart.CU_STREAM_LEGACY |
static CUstream_st |
cudart.CU_STREAM_PER_THREAD |
static CUstream_st |
cudart.cudaStreamLegacy |
static CUstream_st |
cudart.cudaStreamPerThread |
| Modifier and Type | Method and Description |
|---|---|
static CUstream_st |
cudart.CU_STREAM_LEGACY()
Legacy stream handle
Stream handle that can be passed as a CUstream to use an implicit stream
with legacy synchronization behavior.
|
static CUstream_st |
cudart.CU_STREAM_PER_THREAD()
Per-thread stream handle
Stream handle that can be passed as a CUstream to use an implicit stream
with per-thread synchronization behavior.
|
static CUstream_st |
cudart.cudaStreamLegacy()
Legacy stream handle
Stream handle that can be passed as a cudaStream_t to use an implicit stream
with legacy synchronization behavior.
|
static CUstream_st |
cudart.cudaStreamPerThread()
Per-thread stream handle
Stream handle that can be passed as a cudaStream_t to use an implicit stream
with per-thread synchronization behavior.
|
static CUstream_st |
nppc.nppGetStream()
Get the NPP CUDA stream.
|
| Modifier and Type | Method and Description |
|---|---|
static int |
cublas.cublasGetMatrixAsync(int rows,
int cols,
int elemSize,
Pointer A,
int lda,
Pointer B,
int ldb,
CUstream_st stream) |
static int |
cublas.cublasGetStream_v2(cublasContext handle,
CUstream_st streamId) |
static int |
cublas.cublasGetVectorAsync(int n,
int elemSize,
Pointer devicePtr,
int incx,
Pointer hostPtr,
int incy,
CUstream_st stream) |
static int |
cublas.cublasLtMatmul(cublasLtContext lightHandle,
cublasLtMatmulDescOpaque_t computeDesc,
Pointer alpha,
Pointer A,
cublasLtMatrixLayoutOpaque_t Adesc,
Pointer B,
cublasLtMatrixLayoutOpaque_t Bdesc,
Pointer beta,
Pointer C,
cublasLtMatrixLayoutOpaque_t Cdesc,
Pointer D,
cublasLtMatrixLayoutOpaque_t Ddesc,
cublasLtMatmulAlgo_t algo,
Pointer workspace,
long workspaceSizeInBytes,
CUstream_st stream)
Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
|
static int |
cublas.cublasLtMatrixTransform(cublasLtContext lightHandle,
cublasLtMatrixTransformDescOpaque_t transformDesc,
Pointer alpha,
Pointer A,
cublasLtMatrixLayoutOpaque_t Adesc,
Pointer beta,
Pointer B,
cublasLtMatrixLayoutOpaque_t Bdesc,
Pointer C,
cublasLtMatrixLayoutOpaque_t Cdesc,
CUstream_st stream)
Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
Can be used to change memory order of data or to scale and shift the values.
|
static int |
cublas.cublasSetKernelStream(CUstream_st stream) |
static int |
cublas.cublasSetMatrixAsync(int rows,
int cols,
int elemSize,
Pointer A,
int lda,
Pointer B,
int ldb,
CUstream_st stream) |
static int |
cublas.cublasSetStream_v2(cublasContext handle,
CUstream_st streamId) |
static int |
cublas.cublasSetVectorAsync(int n,
int elemSize,
Pointer hostPtr,
int incx,
Pointer devicePtr,
int incy,
CUstream_st stream) |
static int |
cudart.cudaEventRecord(CUevent_st event,
CUstream_st stream)
\brief Records an event
Captures in \p event the contents of \p stream at the time of this call.
|
static int |
cudart.cudaGraphicsMapResources(int count,
cudaGraphicsResource resources,
CUstream_st stream)
\brief Map graphics resources for access by CUDA
Maps the \p count graphics resources in \p resources for access by CUDA.
|
static int |
cudart.cudaGraphicsUnmapResources(int count,
cudaGraphicsResource resources,
CUstream_st stream)
\brief Unmap graphics resources.
|
static int |
cudart.cudaGraphLaunch(CUgraphExec_st graphExec,
CUstream_st stream)
\brief Launches an executable graph in a stream
Executes \p graphExec in \p stream.
|
static int |
cudart.cudaLaunchCooperativeKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
Pointer args,
long sharedMem,
CUstream_st stream) |
static int |
cudart.cudaLaunchCooperativeKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
PointerPointer args,
long sharedMem,
CUstream_st stream)
\brief Launches a device function where thread blocks can cooperate and synchronize as they execute
The function invokes kernel \p func on \p gridDim (\p gridDim.x × \p gridDim.y
× \p gridDim.z) grid of blocks.
|
static int |
cudart.cudaLaunchHostFunc(CUstream_st stream,
cudaHostFn_t fn,
Pointer userData)
\brief Enqueues a host function call in a stream
Enqueues a host function to run in a stream.
|
static int |
cudart.cudaLaunchKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
Pointer args,
long sharedMem,
CUstream_st stream) |
static int |
cudart.cudaLaunchKernel(Pointer func,
dim3 gridDim,
dim3 blockDim,
PointerPointer args,
long sharedMem,
CUstream_st stream)
\brief Launches a device function
The function invokes kernel \p func on \p gridDim (\p gridDim.x × \p gridDim.y
× \p gridDim.z) grid of blocks.
|
static int |
cudart.cudaMemcpy2DAsync(Pointer dst,
long dpitch,
Pointer src,
long spitch,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the memory
area pointed to by \p src to the memory area pointed to by \p dst, where
\p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy2DFromArrayAsync(Pointer dst,
long dpitch,
cudaArray src,
long wOffset,
long hOffset,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the CUDA
array \p srcArray starting at the upper left corner
(\p wOffset, \p hOffset) to the memory area pointed to by \p dst, where
\p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy2DToArrayAsync(cudaArray dst,
long wOffset,
long hOffset,
Pointer src,
long spitch,
long width,
long height,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies a matrix (\p height rows of \p width bytes each) from the memory
area pointed to by \p src to the CUDA array \p dst starting at the
upper left corner (\p wOffset, \p hOffset) where \p kind specifies the
direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpy3DAsync(cudaMemcpy3DParms p,
CUstream_st stream)
\brief Copies data between 3D objects
|
static int |
cudart.cudaMemcpy3DPeerAsync(cudaMemcpy3DPeerParms p,
CUstream_st stream)
\brief Copies memory between devices asynchronously.
|
static int |
cudart.cudaMemcpyAsync(Pointer dst,
Pointer src,
long count,
int kind,
CUstream_st stream)
\brief Copies data between host and device
Copies \p count bytes from the memory area pointed to by \p src to the
memory area pointed to by \p dst, where \p kind specifies the
direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault.
|
static int |
cudart.cudaMemcpyFromArrayAsync(Pointer dst,
cudaArray src,
long wOffset,
long hOffset,
long count,
int kind,
CUstream_st stream)
Deprecated.
Copies \p count bytes from the CUDA array \p src starting at the upper
left corner (\p wOffset, hOffset) to the memory area pointed to by \p dst,
where \p kind specifies the direction of the copy, and must be one of
::cudaMemcpyHostToHost, ::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault. Passing
::cudaMemcpyDefault is recommended, in which case the type of transfer is
inferred from the pointer values. However, ::cudaMemcpyDefault is only
allowed on systems that support unified virtual addressing.
::cudaMemcpyFromArrayAsync() is asynchronous with respect to the host, so
the call may return before the copy is complete. The copy can optionally
be associated to a stream by passing a non-zero \p stream argument. If \p
kind is ::cudaMemcpyHostToDevice or ::cudaMemcpyDeviceToHost and \p stream
is non-zero, the copy may overlap with operations in other streams.
|
static int |
cudart.cudaMemcpyFromSymbolAsync(Pointer dst,
Pointer symbol,
long count,
long offset,
int kind,
CUstream_st stream)
\brief Copies data from the given symbol on the device
Copies \p count bytes from the memory area pointed to by \p offset bytes
from the start of symbol \p symbol to the memory area pointed to by \p dst.
|
static int |
cudart.cudaMemcpyPeerAsync(Pointer dst,
int dstDevice,
Pointer src,
int srcDevice,
long count,
CUstream_st stream)
\brief Copies memory between two devices asynchronously.
|
static int |
cudart.cudaMemcpyToArrayAsync(cudaArray dst,
long wOffset,
long hOffset,
Pointer src,
long count,
int kind,
CUstream_st stream)
Deprecated.
Copies \p count bytes from the memory area pointed to by \p src to the
CUDA array \p dst starting at the upper left corner
(\p wOffset, \p hOffset), where \p kind specifies the
direction of the copy, and must be one of ::cudaMemcpyHostToHost,
::cudaMemcpyHostToDevice, ::cudaMemcpyDeviceToHost,
::cudaMemcpyDeviceToDevice, or ::cudaMemcpyDefault. Passing
::cudaMemcpyDefault is recommended, in which case the type of transfer is
inferred from the pointer values. However, ::cudaMemcpyDefault is only
allowed on systems that support unified virtual addressing.
::cudaMemcpyToArrayAsync() is asynchronous with respect to the host, so
the call may return before the copy is complete. The copy can optionally
be associated to a stream by passing a non-zero \p stream argument. If \p
kind is ::cudaMemcpyHostToDevice or ::cudaMemcpyDeviceToHost and \p stream
is non-zero, the copy may overlap with operations in other streams.
|
static int |
cudart.cudaMemcpyToSymbolAsync(Pointer symbol,
Pointer src,
long count,
long offset,
int kind,
CUstream_st stream)
\brief Copies data to the given symbol on the device
Copies \p count bytes from the memory area pointed to by \p src
to the memory area pointed to by \p offset bytes from the start of symbol
\p symbol.
|
static int |
cudart.cudaMemPrefetchAsync(Pointer devPtr,
long count,
int dstDevice,
CUstream_st stream)
\brief Prefetches memory to the specified destination device
Prefetches memory to the specified destination device.
|
static int |
cudart.cudaMemset2DAsync(Pointer devPtr,
long pitch,
int value,
long width,
long height,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Sets to the specified value \p value a matrix (\p height rows of \p width
bytes each) pointed to by \p dstPtr.
|
static int |
cudart.cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr,
int value,
cudaExtent extent,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Initializes each element of a 3D array to the specified value \p value.
|
static int |
cudart.cudaMemsetAsync(Pointer devPtr,
int value,
long count,
CUstream_st stream)
\brief Initializes or sets device memory to a value
Fills the first \p count bytes of the memory area pointed to by \p devPtr
with the constant byte value \p value.
|
static int |
cudart.cudaSignalExternalSemaphoresAsync(CUexternalSemaphore_st extSemArray,
cudaExternalSemaphoreSignalParams paramsArray,
int numExtSems,
CUstream_st stream)
\brief Signals a set of external semaphore objects
Enqueues a signal operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cudart.cudaStreamAddCallback(CUstream_st stream,
cudaStreamCallback_t callback,
Pointer userData,
int flags)
\brief Add a callback to a compute stream
\note This function is slated for eventual deprecation and removal.
|
static int |
cudart.cudaStreamAttachMemAsync(CUstream_st stream,
Pointer devPtr) |
static int |
cudart.cudaStreamAttachMemAsync(CUstream_st stream,
Pointer devPtr,
long length,
int flags)
\brief Attach memory to a stream asynchronously
Enqueues an operation in \p stream to specify stream association of
\p length bytes of memory starting from \p devPtr.
|
static int |
cudart.cudaStreamBeginCapture(CUstream_st stream,
int mode)
\brief Begins graph capture on a stream
Begin graph capture on \p stream.
|
static int |
cudart.cudaStreamCopyAttributes(CUstream_st dst,
CUstream_st src)
\brief Copies attributes from source stream to destination stream.
|
static int |
cudart.cudaStreamCreate(CUstream_st pStream)
\brief Create an asynchronous stream
Creates a new asynchronous stream.
|
static int |
cudart.cudaStreamCreateWithFlags(CUstream_st pStream,
int flags)
\brief Create an asynchronous stream
Creates a new asynchronous stream.
|
static int |
cudart.cudaStreamCreateWithPriority(CUstream_st pStream,
int flags,
int priority)
\brief Create an asynchronous stream with the specified priority
Creates a stream with the specified priority and returns a handle in \p pStream.
|
static int |
cudart.cudaStreamDestroy(CUstream_st stream)
\brief Destroys and cleans up an asynchronous stream
Destroys and cleans up the asynchronous stream specified by \p stream.
|
static int |
cudart.cudaStreamEndCapture(CUstream_st stream,
CUgraph_st pGraph)
\brief Ends capture on a stream, returning the captured graph
End capture on \p stream, returning the captured graph via \p pGraph.
|
static int |
cudart.cudaStreamGetAttribute(CUstream_st hStream,
int attr,
cudaStreamAttrValue value_out)
\brief Queries stream attribute.
|
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
int[] pCaptureStatus,
long[] pId) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntBuffer pCaptureStatus,
LongBuffer pId) |
static int |
cudart.cudaStreamGetCaptureInfo(CUstream_st stream,
IntPointer pCaptureStatus,
LongPointer pId)
\brief Query capture status of a stream
Query the capture status of a stream and get a unique id representing
the capture sequence over the lifetime of the process.
|
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
int[] flags) |
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
IntBuffer flags) |
static int |
cudart.cudaStreamGetFlags(CUstream_st hStream,
IntPointer flags)
\brief Query the flags of a stream
Query the flags of a stream.
|
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
int[] priority) |
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
IntBuffer priority) |
static int |
cudart.cudaStreamGetPriority(CUstream_st hStream,
IntPointer priority)
\brief Query the priority of a stream
Query the priority of a stream.
|
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
int[] pCaptureStatus) |
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
IntBuffer pCaptureStatus) |
static int |
cudart.cudaStreamIsCapturing(CUstream_st stream,
IntPointer pCaptureStatus)
\brief Returns a stream's capture status
Return the capture status of \p stream via \p pCaptureStatus.
|
static int |
cudart.cudaStreamQuery(CUstream_st stream)
\brief Queries an asynchronous stream for completion status
Returns ::cudaSuccess if all operations in \p stream have
completed, or ::cudaErrorNotReady if not.
|
static int |
cudart.cudaStreamSetAttribute(CUstream_st hStream,
int attr,
cudaStreamAttrValue value)
\brief Sets stream attribute.
|
static int |
cudart.cudaStreamSynchronize(CUstream_st stream)
\brief Waits for stream tasks to complete
Blocks until \p stream has completed all operations.
|
static int |
cudart.cudaStreamWaitEvent(CUstream_st stream,
CUevent_st event,
int flags)
\brief Make a compute stream wait on an event
Makes all future work submitted to \p stream wait for all work captured in
\p event.
|
static int |
cudart.cudaWaitExternalSemaphoresAsync(CUexternalSemaphore_st extSemArray,
cudaExternalSemaphoreWaitParams paramsArray,
int numExtSems,
CUstream_st stream)
\brief Waits on a set of external semaphore objects
Enqueues a wait operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cudnn.cudnnGetStream(cudnnContext handle,
CUstream_st streamId) |
static int |
cudnn.cudnnSetStream(cudnnContext handle,
CUstream_st streamId) |
static int |
cudart.cuEventRecord(CUevent_st hEvent,
CUstream_st hStream)
\brief Records an event
Captures in \p hEvent the contents of \p hStream at the time of this call.
|
static int |
cufft.cufftSetStream(int plan,
CUstream_st stream) |
static int |
cudart.cuGraphicsMapResources(int count,
CUgraphicsResource_st resources,
CUstream_st hStream)
\brief Map graphics resources for access by CUDA
Maps the \p count graphics resources in \p resources for access by CUDA.
|
static int |
cudart.cuGraphicsUnmapResources(int count,
CUgraphicsResource_st resources,
CUstream_st hStream)
\brief Unmap graphics resources.
|
static int |
cudart.cuGraphLaunch(CUgraphExec_st hGraphExec,
CUstream_st hStream)
\brief Launches an executable graph in a stream
Executes \p hGraphExec in \p hStream.
|
static int |
cudart.cuLaunchCooperativeKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
Pointer kernelParams) |
static int |
cudart.cuLaunchCooperativeKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
PointerPointer kernelParams)
\brief Launches a CUDA function where thread blocks can cooperate and synchronize as they execute
Invokes the kernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ
grid of blocks.
|
static int |
cudart.cuLaunchGridAsync(CUfunc_st f,
int grid_width,
int grid_height,
CUstream_st hStream)
Deprecated.
Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
blocks. Each block contains the number of threads specified by a previous
call to ::cuFuncSetBlockShape().
The block shape, dynamic shared memory size, and parameter information
must be set using
::cuFuncSetBlockShape(),
::cuFuncSetSharedSize(),
::cuParamSetSize(),
::cuParamSeti(),
::cuParamSetf(), and
::cuParamSetv()
prior to calling this function.
Launching a function via ::cuLaunchKernel() invalidates the function's
block shape, dynamic shared memory size, and parameter information. After
launching via cuLaunchKernel, this state must be re-initialized prior to
calling this function. Failure to do so results in undefined behavior.
|
static int |
cudart.cuLaunchHostFunc(CUstream_st hStream,
CUhostFn fn,
Pointer userData)
\brief Enqueues a host function call in a stream
Enqueues a host function to run in a stream.
|
static int |
cudart.cuLaunchKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
Pointer kernelParams,
Pointer extra) |
static int |
cudart.cuLaunchKernel(CUfunc_st f,
int gridDimX,
int gridDimY,
int gridDimZ,
int blockDimX,
int blockDimY,
int blockDimZ,
int sharedMemBytes,
CUstream_st hStream,
PointerPointer kernelParams,
PointerPointer extra)
\brief Launches a CUDA function
Invokes the kernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ
grid of blocks.
|
static int |
cudart.cuMemcpy2DAsync(CUDA_MEMCPY2D pCopy,
CUstream_st hStream)
\brief Copies memory for 2D arrays
Perform a 2D memory copy according to the parameters specified in \p pCopy.
|
static int |
cudart.cuMemcpy3DAsync(CUDA_MEMCPY3D pCopy,
CUstream_st hStream)
\brief Copies memory for 3D arrays
Perform a 3D memory copy according to the parameters specified in
\p pCopy.
|
static int |
cudart.cuMemcpy3DPeerAsync(CUDA_MEMCPY3D_PEER pCopy,
CUstream_st hStream)
\brief Copies memory between contexts asynchronously.
|
static int |
cudart.cuMemcpyAsync(long dst,
long src,
long ByteCount,
CUstream_st hStream)
\brief Copies memory asynchronously
Copies data between two pointers.
|
static int |
cudart.cuMemcpyAtoHAsync(Pointer dstHost,
CUarray_st srcArray,
long srcOffset,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Array to Host
Copies from one 1D CUDA array to host memory.
|
static int |
cudart.cuMemcpyDtoDAsync(long dstDevice,
long srcDevice,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Device to Device
Copies from device memory to device memory.
|
static int |
cudart.cuMemcpyDtoHAsync(Pointer dstHost,
long srcDevice,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Device to Host
Copies from device to host memory.
|
static int |
cudart.cuMemcpyHtoAAsync(CUarray_st dstArray,
long dstOffset,
Pointer srcHost,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Host to Array
Copies from host memory to a 1D CUDA array.
|
static int |
cudart.cuMemcpyHtoDAsync(long dstDevice,
Pointer srcHost,
long ByteCount,
CUstream_st hStream)
\brief Copies memory from Host to Device
Copies from host memory to device memory.
|
static int |
cudart.cuMemcpyPeerAsync(long dstDevice,
CUctx_st dstContext,
long srcDevice,
CUctx_st srcContext,
long ByteCount,
CUstream_st hStream)
\brief Copies device memory between two contexts asynchronously.
|
static int |
cudart.cuMemPrefetchAsync(long devPtr,
long count,
int dstDevice,
CUstream_st hStream)
\brief Prefetches memory to the specified destination device
Prefetches memory to the specified destination device.
|
static int |
cudart.cuMemsetD16Async(long dstDevice,
short us,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 16-bit values to the specified value
\p us.
|
static int |
cudart.cuMemsetD2D16Async(long dstDevice,
long dstPitch,
short us,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 16-bit values to the specified value
\p us.
|
static int |
cudart.cuMemsetD2D32Async(long dstDevice,
long dstPitch,
int ui,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 32-bit values to the specified value
\p ui.
|
static int |
cudart.cuMemsetD2D8Async(long dstDevice,
long dstPitch,
byte uc,
long Width,
long Height,
CUstream_st hStream)
\brief Sets device memory
Sets the 2D memory range of \p Width 8-bit values to the specified value
\p uc.
|
static int |
cudart.cuMemsetD32Async(long dstDevice,
int ui,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 32-bit values to the specified value
\p ui.
|
static int |
cudart.cuMemsetD8Async(long dstDevice,
byte uc,
long N,
CUstream_st hStream)
\brief Sets device memory
Sets the memory range of \p N 8-bit values to the specified value
\p uc.
|
static int |
curand.curandSetStream(curandGenerator_st generator,
CUstream_st stream)
\brief Set the current stream for CURAND kernel launches.
|
static int |
cudart.cuSignalExternalSemaphoresAsync(CUextSemaphore_st extSemArray,
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS paramsArray,
int numExtSems,
CUstream_st stream)
\brief Signals a set of external semaphore objects
Enqueues a signal operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
cusolver.cusolverDnGetStream(cusolverDnContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverDnSetStream(cusolverDnContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverSpGetStream(cusolverSpContext handle,
CUstream_st streamId) |
static int |
cusolver.cusolverSpSetStream(cusolverSpContext handle,
CUstream_st streamId) |
static int |
cusparse.cusparseGetStream(cusparseContext handle,
CUstream_st streamId) |
static int |
cusparse.cusparseSetStream(cusparseContext handle,
CUstream_st streamId) |
static int |
cudart.cuStreamAddCallback(CUstream_st hStream,
CUstreamCallback callback,
Pointer userData,
int flags)
\brief Add a callback to a compute stream
\note This function is slated for eventual deprecation and removal.
|
static int |
cudart.cuStreamAttachMemAsync(CUstream_st hStream,
long dptr,
long length,
int flags)
\brief Attach memory to a stream asynchronously
Enqueues an operation in \p hStream to specify stream association of
\p length bytes of memory starting from \p dptr.
|
static int |
cudart.cuStreamBatchMemOp(CUstream_st stream,
int count,
CUstreamBatchMemOpParams paramArray,
int flags)
\brief Batch operations to synchronize the stream via memory operations
This is a batch version of ::cuStreamWaitValue32() and ::cuStreamWriteValue32().
|
static int |
cudart.cuStreamBeginCapture(CUstream_st hStream,
int mode)
\brief Begins graph capture on a stream
Begin graph capture on \p hStream.
|
static int |
cudart.cuStreamCopyAttributes(CUstream_st dst,
CUstream_st src)
\brief Copies attributes from source stream to destination stream
Copies attributes from source stream \p src to destination stream \p dst.
|
static int |
cudart.cuStreamCreate(CUstream_st phStream,
int Flags)
\brief Create a stream
Creates a stream and returns a handle in \p phStream.
|
static int |
cudart.cuStreamCreateWithPriority(CUstream_st phStream,
int flags,
int priority)
\brief Create a stream with the given priority
Creates a stream with the specified priority and returns a handle in \p phStream.
|
static int |
cudart.cuStreamDestroy(CUstream_st hStream)
\brief Destroys a stream
Destroys the stream specified by \p hStream.
|
static int |
cudart.cuStreamEndCapture(CUstream_st hStream,
CUgraph_st phGraph)
\brief Ends capture on a stream, returning the captured graph
End capture on \p hStream, returning the captured graph via \p phGraph.
|
static int |
cudart.cuStreamGetAttribute(CUstream_st hStream,
int attr,
CUstreamAttrValue value_out)
\brief Queries stream attribute.
|
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
int[] captureStatus,
int[] id) |
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
IntBuffer captureStatus,
IntBuffer id) |
static int |
cudart.cuStreamGetCaptureInfo(CUstream_st hStream,
IntPointer captureStatus,
IntPointer id)
\brief Query capture status of a stream
Query the capture status of a stream and and get an id for
the capture sequence, which is unique over the lifetime of the process.
|
static int |
cudart.cuStreamGetCtx(CUstream_st hStream,
CUctx_st pctx)
\brief Query the context associated with a stream
Returns the CUDA context that the stream is associated with.
|
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
int[] flags) |
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
IntBuffer flags) |
static int |
cudart.cuStreamGetFlags(CUstream_st hStream,
IntPointer flags)
\brief Query the flags of a given stream
Query the flags of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority
and return the flags in \p flags.
|
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
int[] priority) |
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
IntBuffer priority) |
static int |
cudart.cuStreamGetPriority(CUstream_st hStream,
IntPointer priority)
\brief Query the priority of a given stream
Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority
and return the priority in \p priority.
|
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
int[] captureStatus) |
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
IntBuffer captureStatus) |
static int |
cudart.cuStreamIsCapturing(CUstream_st hStream,
IntPointer captureStatus)
\brief Returns a stream's capture status
Return the capture status of \p hStream via \p captureStatus.
|
static int |
cudart.cuStreamQuery(CUstream_st hStream)
\brief Determine status of a compute stream
Returns ::CUDA_SUCCESS if all operations in the stream specified by
\p hStream have completed, or ::CUDA_ERROR_NOT_READY if not.
|
static int |
cudart.cuStreamSetAttribute(CUstream_st hStream,
int attr,
CUstreamAttrValue value)
\brief Sets stream attribute.
|
static int |
cudart.cuStreamSynchronize(CUstream_st hStream)
\brief Wait until a stream's tasks are completed
Waits until the device has completed all operations in the stream specified
by \p hStream.
|
static int |
cudart.cuStreamWaitEvent(CUstream_st hStream,
CUevent_st hEvent,
int Flags)
\brief Make a compute stream wait on an event
Makes all future work submitted to \p hStream wait for all work captured in
\p hEvent.
|
static int |
cudart.cuStreamWaitValue32(CUstream_st stream,
long addr,
int value,
int flags)
\brief Wait on a memory location
Enqueues a synchronization of the stream on the given memory location.
|
static int |
cudart.cuStreamWaitValue64(CUstream_st stream,
long addr,
int value,
int flags)
\brief Wait on a memory location
Enqueues a synchronization of the stream on the given memory location.
|
static int |
cudart.cuStreamWriteValue32(CUstream_st stream,
long addr,
int value,
int flags)
\brief Write a value to memory
Write a value to memory.
|
static int |
cudart.cuStreamWriteValue64(CUstream_st stream,
long addr,
int value,
int flags)
\brief Write a value to memory
Write a value to memory.
|
static int |
cudart.cuWaitExternalSemaphoresAsync(CUextSemaphore_st extSemArray,
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS paramsArray,
int numExtSems,
CUstream_st stream)
\brief Waits on a set of external semaphore objects
Enqueues a wait operation on a set of externally allocated
semaphore object in the specified stream.
|
static int |
nccl.ncclAllGather(Pointer sendbuff,
Pointer recvbuff,
long sendcount,
int datatype,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclAllReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclBcast(Pointer buff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclBroadcast(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclRecv(Pointer recvbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclReduceScatter(Pointer sendbuff,
Pointer recvbuff,
long recvcount,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.ncclSend(Pointer sendbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nppc.nppSetStream(CUstream_st hStream)
Set the NPP CUDA stream.
|
static int |
nccl.pncclAllGather(Pointer sendbuff,
Pointer recvbuff,
long sendcount,
int datatype,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclAllReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclBcast(Pointer buff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclBroadcast(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclRecv(Pointer recvbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclReduce(Pointer sendbuff,
Pointer recvbuff,
long count,
int datatype,
int op,
int root,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclReduceScatter(Pointer sendbuff,
Pointer recvbuff,
long recvcount,
int datatype,
int op,
ncclComm comm,
CUstream_st stream) |
static int |
nccl.pncclSend(Pointer sendbuff,
long count,
int datatype,
int peer,
ncclComm comm,
CUstream_st stream) |
| Modifier and Type | Method and Description |
|---|---|
CUstream_st |
NppStreamContext.hStream() |
| Modifier and Type | Method and Description |
|---|---|
NppStreamContext |
NppStreamContext.hStream(CUstream_st setter) |
Copyright © 2020. All rights reserved.