C++#
C++ API Changes#
Transition from enqueueV2
to enqueueV3
for C++
1// Create RAII buffer manager object.
2samplesCommon::BufferManager buffers(mEngine);
3
4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
5if (!context)
6{
7 return false;
8}
9
10// Pick a random digit to try to infer.
11srand(time(NULL));
12int32_t const digit = rand() % 10;
13
14// Read the input data into the managed buffers.
15// There should be just 1 input tensor.
16ASSERT(mParams.inputTensorNames.size() == 1);
17
18if (!processInput(buffers, mParams.inputTensorNames[0], digit))
19{
20 return false;
21}
22// Create a CUDA stream to execute this inference.
23cudaStream_t stream;
24CHECK(cudaStreamCreate(&stream));
25
26// Asynchronously copy data from host input buffers to device input
27buffers.copyInputToDeviceAsync(stream);
28
29// Asynchronously enqueue the inference work
30if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
31{
32 return false;
33}
34// Asynchronously copy data from device output buffers to host output buffers.
35buffers.copyOutputToHostAsync(stream);
36
37// Wait for the work in the stream to complete.
38CHECK(cudaStreamSynchronize(stream));
39
40// Release stream.
41CHECK(cudaStreamDestroy(stream));
1// Create RAII buffer manager object.
2samplesCommon::BufferManager buffers(mEngine);
3
4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
5if (!context)
6{
7 return false;
8}
9
10for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
11{
12 auto const name = mEngine->getIOTensorName(i);
13 context->setTensorAddress(name, buffers.getDeviceBuffer(name));
14}
15
16// Pick a random digit to try to infer.
17srand(time(NULL));
18int32_t const digit = rand() % 10;
19
20// Read the input data into the managed buffers.
21// There should be just 1 input tensor.
22ASSERT(mParams.inputTensorNames.size() == 1);
23
24if (!processInput(buffers, mParams.inputTensorNames[0], digit))
25{
26 return false;
27}
28// Create a CUDA stream to execute this inference.
29cudaStream_t stream;
30CHECK(cudaStreamCreate(&stream));
31
32// Asynchronously copy data from host input buffers to device input
33buffers.copyInputToDeviceAsync(stream);
34
35// Asynchronously enqueue the inference work
36if (!context->enqueueV3(stream))
37{
38 return false;
39}
40
41// Asynchronously copy data from device output buffers to host output buffers.
42buffers.copyOutputToHostAsync(stream);
43
44// Wait for the work in the stream to complete.
45CHECK(cudaStreamSynchronize(stream));
46
47// Release stream.
48CHECK(cudaStreamDestroy(stream));
64-Bit Dimension Changes#
The dimensions held by Dims changed from int32_t
to int64_t
. However, in TensorRT 10.x, TensorRT will generally reject networks that use dimensions exceeding the range of int32_t
. The tensor type returned by IShapeLayer
is now DataType::kINT64
. Use ICastLayer
to cast the result to the tensor of type DataType::kINT32
if 32-bit dimensions are required.
Inspect code that bitwise copies to and from Dims to ensure it is correct for int64_t
dimensions.
Added C++ APIs#
Enums
ActivationType::kGELU_ERF
ActivationType::kGELU_TANH
BuilderFlag::kREFIT_IDENTICAL
BuilderFlag::kSTRIP_PLAN
BuilderFlag::kWEIGHT_STREAMING
BuilderFlag::kSTRICT_NANS
Datatype::kINT4
LayerType::kPLUGIN_V3
Types
APILanguage
Dims64
ExecutionContextAllocationStrategy
IGpuAsyncAllocator
InterfaceInfo
IPluginResource
IPluginV3
IStreamReader
IVersionedInterface
Methods and Properties
getInferLibBuildVersion
getInferLibMajorVersion
getInferLibMinorVersion
getInferLibPatchVersion
IBuilderConfig::setMaxNbTactics
IBuilderConfig::getMaxNbTactics
ICudaEngine::createRefitter
IcudaEngine::getMinimumWeightStreamingBudget
IcudaEngine::getStreamableWeightsSize
ICudaEngine::getWeightStreamingBudget
IcudaEngine::isDebugTensor
ICudaEngine::setWeightStreamingBudget
IExecutionContext::getDebugListener
IExecutionContext::getTensorDebugState
IExecutionContext::setAllTensorsDebugState
IExecutionContext::setDebugListener
IExecutionContext::setOuputTensorAddress
IExecutionContext::setTensorDebugState
IExecutionContext::updateDeviceMemorySizeForShapes
IGpuAllocator::allocateAsync
IGpuAllocator::deallocateAsync
INetworkDefinition::addPluginV3
INetworkDefinition::isDebugTensor
INetworkDefinition::markDebug
INetworkDefinition::unmarkDebug
IPluginRegistry::acquirePluginResource
IPluginRegistry::deregisterCreator
IPluginRegistry::getAllCreators
IPluginRegistry::getCreator
IPluginRegistry::registerCreator
IPluginRegistry::releasePluginResource
Removed C++ APIs#
The following C++ APIs are listed next to their superseded API.
BuilderFlag::kENABLE_TACTIC_HEURISTIC
> Builder optimization level 2BuilderFlag::kSTRICT_TYPES
> Use all three flags:kREJECT_EMPTY_ALGORITHMS
,kDIRECT_IO
,kPREFER_PRECISION_CONSTRAINTS
EngineCapability::kDEFAULT
>EngineCapability::kSTANDARD
EngineCapability::kSAFE_DLA
>EngineCapability::kDLA_STANDALONE
EngineCapability::kSAFE_GPU
>EngineCapability::kSAFETY
IAlgorithm::getAlgorithmIOInfo()
>IAlgorithm::getAlgorithmIOInfoByIndex()
IAlgorithmIOInfo::getTensorFormat()
> The strides, data type, and vectorization information are sufficient to identify tensor formats uniquely.IBuilder::buildEngineWithConfig()
>IBuilder::buildSerializedNetwork()
IBuilder::destroy()
>delete ObjectName
IBuilder::getMaxBatchSize()
> Implicit batch support was removedIBuilder::setMaxBatchSize()
> Implicit batch support was removedIBuilderConfig::destroy()
>delete ObjectName
IBuilderConfig::getMaxWorkspaceSize()
>IBuilderConfig::getMemoryPoolLimit()
withMemoryPoolType::kWORKSPACE
IBuilderConfig::getMinTimingIterations()
>IBuilderConfig::getAvgTimingIterations()
IBuilderConfig::setMaxWorkspaceSize()
>IBuilderConfig::setMemoryPoolLimit()
withMemoryPoolType::kWORKSPACE
IBuilderConfig::setMinTimingIterations()
>IBuilderConfig::setAvgTimingIterations()
IConvolutionLayer::getDilation()
>IConvolutionLayer::getDilationNd()
IConvolutionLayer::getKernelSize()
>IConvolutionLayer::getKernelSizeNd()
IConvolutionLayer::getPadding()
>IConvolutionLayer::getPaddingNd()
IConvolutionLayer::getStride()
>IConvolutionLayer::getStrideNd()
IConvolutionLayer::setDilation()
>IConvolutionLayer::setDilationNd()
IConvolutionLayer::setKernelSize()
>IConvolutionLayer::setKernelSizeNd()
IConvolutionLayer::setPadding()
>IConvolutionLayer::setPaddingNd()
IConvolutionLayer::setStride()
>IConvolutionLayer::setStrideNd()
ICudaEngine::bindingIsInput()
>ICudaEngine::getTensorIOMode()
ICudaEngine::destroy()
>delete ObjectName
ICudaEngine::getBindingBytesPerComponent()
>ICudaEngine::getTensorBytesPerComponent()
ICudaEngine::getBindingComponentsPerElement()
>ICudaEngine::getTensorComponentsPerElement()
ICudaEngine::getBindingDataType()
>ICudaEngine::getTensorDataType()
ICudaEngine::getBindingDimensions()
>ICudaEngine::getTensorShape()
ICudaEngine::getBindingFormat()
>ICudaEngine::getTensorFormat()
ICudaEngine::getBindingFormatDesc()
>ICudaEngine::getTensorFormatDesc()
ICudaEngine::getBindingIndex()
> Name-based methodsICudaEngine::getBindingName()
> Name-based methodsICudaEngine::getBindingVectorizedDim()
>ICudaEngine::getTensorVectorizedDim()
ICudaEngine::getLocation()
>ITensor::getLocation()
ICudaEngine::getMaxBatchSize()
> Implicit batch support was removedICudaEngine::getNbBindings()
>ICudaEngine::getNbIOTensors()
ICudaEngine::getProfileDimensions()
>ICudaEngine::getProfileShape()
ICudaEngine::getProfileShapeValues()
>ICudaEngine::getShapeValues()
ICudaEngine::hasImplicitBatchDimension()
> Implicit batch support was removedICudaEngine::isExecutionBinding()
> No name-based equivalent replacementICudaEngine::isShapeBinding()
>ICudaEngine::isShapeInferenceIO()
IDeconvolutionLayer::getKernelSize()
>IDeconvolutionLayer::getKernelSizeNd()
IDeconvolutionLayer::getPadding()
>IDeconvolutionLayer::getPaddingNd()
IDeconvolutionLayer::getStride()
>IDeconvolutionLayer::getStrideNd()
IDeconvolutionLayer::setKernelSize()
>IDeconvolutionLayer::setKernelSizeNd()
IDeconvolutionLayer::setPadding()
>IDeconvolutionLayer::setPaddingNd()
IDeconvolutionLayer::setStride()
>IDeconvolutionLayer::setStrideNd()
IExecutionContext::destroy()
>delete ObjectName
IExecutionContext::enqueue()
>IExecutionContext::enqueueV3()
IExecutionContext::enqueueV2()
>IExecutionContext::enqueueV3()
IExecutionContext::execute()
>IExecutionContext::executeV2()
IExecutionContext::getBindingDimensions()
>IExecutionContext::getTensorShape()
IExecutionContext::getShapeBinding()
>IExecutionContext::getTensorAddress()
orgetOutputTensorAddress()
IExecutionContext::getStrides()
>IExecutionContext::getTensorStrides()
IExecutionContext::setBindingDimensions()
>IExecutionContext::setInputShape()
IExecutionContext::setInputShapeBinding()
>IExecutionContext::setInputTensorAddress()
orsetTensorAddress()
IExecutionContext::setOptimizationProfile()
>IExecutionContext::setOptimizationProfileAsync()
IFullyConnectedLayer
>IMatrixMultiplyLayer
IGpuAllocator::free()
>IGpuAllocator::deallocate()
IHostMemory::destroy()
>delete ObjectName
INetworkDefinition::addConvolution()
>INetworkDefinition::addConvolutionNd()
INetworkDefinition::addDeconvolution()
>INetworkDefinition::addDeconvolutionNd()
INetworkDefinition::addFullyConnected()
>INetworkDefinition::addMatrixMultiply()
INetworkDefinition::addPadding()
>INetworkDefinition::addPaddingNd()
INetworkDefinition::addPooling()
>INetworkDefinition::addPoolingNd()
INetworkDefinition::addRNNv2()
>INetworkDefinition::addLoop()
INetworkDefinition::destroy()
>delete ObjectName
INetworkDefinition::hasExplicitPrecision()
> Explicit precision support was removed in 10.0INetworkDefinition::hasImplicitBatchDimension()
> Implicit batch support was removedIOnnxConfig::destroy()
>delete ObjectName
IPaddingLayer::getPostPadding()
>IPaddingLayer::getPostPaddingNd()
IPaddingLayer::getPrePadding()
>IPaddingLayer::getPrePaddingNd()
IPaddingLayer::setPostPadding()
>IPaddingLayer::setPostPaddingNd()
IPaddingLayer::setPrePadding()
>IPaddingLayer::setPrePaddingNd()
IPoolingLayer::getPadding()
>IPoolingLayer::getPaddingNd()
IPoolingLayer::getStride()
>IPoolingLayer::getStrideNd()
IPoolingLayer::getWindowSize()
>IPoolingLayer::getWindowSizeNd()
IPoolingLayer::setPadding()
>IPoolingLayer::setPaddingNd()
IPoolingLayer::setStride()
>IPoolingLayer::setStrideNd()
IPoolingLayer::setWindowSize()
>IPoolingLayer::setWindowSizeNd()
IRefitter::destroy()
>delete ObjectName
IResizeLayer::getAlignCorners()
>IResizeLayer::getAlignCornersNd()
IResizeLayer::setAlignCorners()
>IResizeLayer::setAlignCornersNd()
IRuntime::deserializeCudaEngine(void const* blob, std::size_t size, IPluginFactory* pluginFactory)
> UsedeserializeCudaEngine
with two parametersIRuntime::destroy()
>delete ObjectName
IRNNv2Layer
>ILoop
kNV_TENSORRT_VERSION_IMPL
>define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L)
. TensorRT version encoding was changed to accommodate a two-digit minor version.NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
> Support was removed in 10.0NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION
> Support was removed in 10.0NV_TENSORRT_SONAME_MAJOR
>NV_TENSORRT_MAJOR
NV_TENSORRT_SONAME_MINOR
>NV_TENSORRT_MINOR
NV_TENSORRT_SONAME_PATCH
>NV_TENSORRT_PATCH
PaddingMode::kCAFFE_ROUND_DOWN
> Caffe support was removedPaddingMode::kCAFFE_ROUND_UP
> Caffe support was removedPreviewFeature::kDISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805
> External tactics are always disabled for core codePreviewFeature::kFASTER_DYNAMIC_SHAPES_080
> This flag is on by defaultProfilingVerbosity::kDEFAULT
>ProfilingVerbosity::kLAYER_NAMES_ONLY
ProfilingVerbosity::kVERBOSE
>ProfilingVerbosity::kDETAILED
ResizeMode
> UseInterpolationMode
. Alias was removed.RNNDirection
> RNN-related data structures were removedRNNGateType
> RNN-related data structures were removedRNNInputMode
> RNN-related data structures were removedRNNOperation
> RNN-related data structures were removedSampleMode::kDEFAULT
>SampleMode::kSTRICT_BOUNDS
SliceMode
> UseSampleMode
. Alias was removed.
Removed C++ Plugins#
The following C++ plugin are listed next to their superseded plugin.
createAnchorGeneratorPlugin()
>GridAnchorPluginCreator::createPlugin()
createBatchedNMSPlugin()
>BatchedNMSPluginCreator::createPlugin()
createInstanceNormalizationPlugin()
>InstanceNormalizationPluginCreator::createPlugin()
createNMSPlugin()
>NMSPluginCreator::createPlugin()
createNormalizePlugin()
>NormalizePluginCreator::createPlugin()
createPriorBoxPlugin()
>PriorBoxPluginCreator::createPlugin()
createRegionPlugin()
>RegionPluginCreator::createPlugin()
createReorgPlugin()
>ReorgPluginCreator::createPlugin()
createRPNROIPlugin()
>RPROIPluginCreator::createPlugin()
createSplitPlugin()
>INetworkDefinition::addSlice()
struct Quadruple
> Related plugins were removed