C++#

C++ API Changes#

Transition from enqueueV2 to enqueueV3 for C++

 1// Create RAII buffer manager object.
 2samplesCommon::BufferManager buffers(mEngine);
 3
 4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
 5if (!context)
 6{
 7    return false;
 8}
 9
10// Pick a random digit to try to infer.
11srand(time(NULL));
12int32_t const digit = rand() % 10;
13
14// Read the input data into the managed buffers.
15// There should be just 1 input tensor.
16ASSERT(mParams.inputTensorNames.size() == 1);
17
18if (!processInput(buffers, mParams.inputTensorNames[0], digit))
19{
20    return false;
21}
22// Create a CUDA stream to execute this inference.
23cudaStream_t stream;
24CHECK(cudaStreamCreate(&stream));
25
26// Asynchronously copy data from host input buffers to device input
27buffers.copyInputToDeviceAsync(stream);
28
29// Asynchronously enqueue the inference work
30if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
31{
32    return false;
33}
34// Asynchronously copy data from device output buffers to host output buffers.
35buffers.copyOutputToHostAsync(stream);
36
37// Wait for the work in the stream to complete.
38CHECK(cudaStreamSynchronize(stream));
39
40// Release stream.
41CHECK(cudaStreamDestroy(stream));
 1// Create RAII buffer manager object.
 2samplesCommon::BufferManager buffers(mEngine);
 3
 4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
 5if (!context)
 6{
 7    return false;
 8}
 9
10for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
11{
12    auto const name = mEngine->getIOTensorName(i);
13    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
14}
15
16// Pick a random digit to try to infer.
17srand(time(NULL));
18int32_t const digit = rand() % 10;
19
20// Read the input data into the managed buffers.
21// There should be just 1 input tensor.
22ASSERT(mParams.inputTensorNames.size() == 1);
23
24if (!processInput(buffers, mParams.inputTensorNames[0], digit))
25{
26    return false;
27}
28// Create a CUDA stream to execute this inference.
29cudaStream_t stream;
30CHECK(cudaStreamCreate(&stream));
31
32// Asynchronously copy data from host input buffers to device input
33buffers.copyInputToDeviceAsync(stream);
34
35// Asynchronously enqueue the inference work
36if (!context->enqueueV3(stream))
37{
38    return false;
39}
40
41// Asynchronously copy data from device output buffers to host output buffers.
42buffers.copyOutputToHostAsync(stream);
43
44// Wait for the work in the stream to complete.
45CHECK(cudaStreamSynchronize(stream));
46
47// Release stream.
48CHECK(cudaStreamDestroy(stream));

64-Bit Dimension Changes#

The dimensions held by Dims changed from int32_t to int64_t. However, in TensorRT 10.x, TensorRT will generally reject networks that use dimensions exceeding the range of int32_t. The tensor type returned by IShapeLayer is now DataType::kINT64. Use ICastLayer to cast the result to the tensor of type DataType::kINT32 if 32-bit dimensions are required.

Inspect code that bitwise copies to and from Dims to ensure it is correct for int64_t dimensions.

Added C++ APIs#

Enums

  • ActivationType::kGELU_ERF

  • ActivationType::kGELU_TANH

  • BuilderFlag::kREFIT_IDENTICAL

  • BuilderFlag::kSTRIP_PLAN

  • BuilderFlag::kWEIGHT_STREAMING

  • BuilderFlag::kSTRICT_NANS

  • Datatype::kINT4

  • LayerType::kPLUGIN_V3

Types

  • APILanguage

  • Dims64

  • ExecutionContextAllocationStrategy

  • IGpuAsyncAllocator

  • InterfaceInfo

  • IPluginResource

  • IPluginV3

  • IStreamReader

  • IVersionedInterface

Methods and Properties

  • getInferLibBuildVersion

  • getInferLibMajorVersion

  • getInferLibMinorVersion

  • getInferLibPatchVersion

  • IBuilderConfig::setMaxNbTactics

  • IBuilderConfig::getMaxNbTactics

  • ICudaEngine::createRefitter

  • IcudaEngine::getMinimumWeightStreamingBudget

  • IcudaEngine::getStreamableWeightsSize

  • ICudaEngine::getWeightStreamingBudget

  • IcudaEngine::isDebugTensor

  • ICudaEngine::setWeightStreamingBudget

  • IExecutionContext::getDebugListener

  • IExecutionContext::getTensorDebugState

  • IExecutionContext::setAllTensorsDebugState

  • IExecutionContext::setDebugListener

  • IExecutionContext::setOuputTensorAddress

  • IExecutionContext::setTensorDebugState

  • IExecutionContext::updateDeviceMemorySizeForShapes

  • IGpuAllocator::allocateAsync

  • IGpuAllocator::deallocateAsync

  • INetworkDefinition::addPluginV3

  • INetworkDefinition::isDebugTensor

  • INetworkDefinition::markDebug

  • INetworkDefinition::unmarkDebug

  • IPluginRegistry::acquirePluginResource

  • IPluginRegistry::deregisterCreator

  • IPluginRegistry::getAllCreators

  • IPluginRegistry::getCreator

  • IPluginRegistry::registerCreator

  • IPluginRegistry::releasePluginResource

Removed C++ APIs#

The following C++ APIs are listed next to their superseded API.

  • BuilderFlag::kENABLE_TACTIC_HEURISTIC > Builder optimization level 2

  • BuilderFlag::kSTRICT_TYPES > Use all three flags: kREJECT_EMPTY_ALGORITHMS, kDIRECT_IO, kPREFER_PRECISION_CONSTRAINTS

  • EngineCapability::kDEFAULT > EngineCapability::kSTANDARD

  • EngineCapability::kSAFE_DLA > EngineCapability::kDLA_STANDALONE

  • EngineCapability::kSAFE_GPU > EngineCapability::kSAFETY

  • IAlgorithm::getAlgorithmIOInfo() > IAlgorithm::getAlgorithmIOInfoByIndex()

  • IAlgorithmIOInfo::getTensorFormat() > The strides, data type, and vectorization information are sufficient to identify tensor formats uniquely.

  • IBuilder::buildEngineWithConfig() > IBuilder::buildSerializedNetwork()

  • IBuilder::destroy() > delete ObjectName

  • IBuilder::getMaxBatchSize() > Implicit batch support was removed

  • IBuilder::setMaxBatchSize() > Implicit batch support was removed

  • IBuilderConfig::destroy() > delete ObjectName

  • IBuilderConfig::getMaxWorkspaceSize() > IBuilderConfig::getMemoryPoolLimit() with MemoryPoolType::kWORKSPACE

  • IBuilderConfig::getMinTimingIterations() > IBuilderConfig::getAvgTimingIterations()

  • IBuilderConfig::setMaxWorkspaceSize() > IBuilderConfig::setMemoryPoolLimit() with MemoryPoolType::kWORKSPACE

  • IBuilderConfig::setMinTimingIterations() > IBuilderConfig::setAvgTimingIterations()

  • IConvolutionLayer::getDilation() > IConvolutionLayer::getDilationNd()

  • IConvolutionLayer::getKernelSize() > IConvolutionLayer::getKernelSizeNd()

  • IConvolutionLayer::getPadding() > IConvolutionLayer::getPaddingNd()

  • IConvolutionLayer::getStride() > IConvolutionLayer::getStrideNd()

  • IConvolutionLayer::setDilation() > IConvolutionLayer::setDilationNd()

  • IConvolutionLayer::setKernelSize() > IConvolutionLayer::setKernelSizeNd()

  • IConvolutionLayer::setPadding() > IConvolutionLayer::setPaddingNd()

  • IConvolutionLayer::setStride() > IConvolutionLayer::setStrideNd()

  • ICudaEngine::bindingIsInput() > ICudaEngine::getTensorIOMode()

  • ICudaEngine::destroy() > delete ObjectName

  • ICudaEngine::getBindingBytesPerComponent() > ICudaEngine::getTensorBytesPerComponent()

  • ICudaEngine::getBindingComponentsPerElement() > ICudaEngine::getTensorComponentsPerElement()

  • ICudaEngine::getBindingDataType() > ICudaEngine::getTensorDataType()

  • ICudaEngine::getBindingDimensions() > ICudaEngine::getTensorShape()

  • ICudaEngine::getBindingFormat() > ICudaEngine::getTensorFormat()

  • ICudaEngine::getBindingFormatDesc() > ICudaEngine::getTensorFormatDesc()

  • ICudaEngine::getBindingIndex() > Name-based methods

  • ICudaEngine::getBindingName() > Name-based methods

  • ICudaEngine::getBindingVectorizedDim() > ICudaEngine::getTensorVectorizedDim()

  • ICudaEngine::getLocation() > ITensor::getLocation()

  • ICudaEngine::getMaxBatchSize() > Implicit batch support was removed

  • ICudaEngine::getNbBindings() > ICudaEngine::getNbIOTensors()

  • ICudaEngine::getProfileDimensions() > ICudaEngine::getProfileShape()

  • ICudaEngine::getProfileShapeValues() > ICudaEngine::getShapeValues()

  • ICudaEngine::hasImplicitBatchDimension() > Implicit batch support was removed

  • ICudaEngine::isExecutionBinding() > No name-based equivalent replacement

  • ICudaEngine::isShapeBinding() > ICudaEngine::isShapeInferenceIO()

  • IDeconvolutionLayer::getKernelSize() > IDeconvolutionLayer::getKernelSizeNd()

  • IDeconvolutionLayer::getPadding() > IDeconvolutionLayer::getPaddingNd()

  • IDeconvolutionLayer::getStride() > IDeconvolutionLayer::getStrideNd()

  • IDeconvolutionLayer::setKernelSize() > IDeconvolutionLayer::setKernelSizeNd()

  • IDeconvolutionLayer::setPadding() > IDeconvolutionLayer::setPaddingNd()

  • IDeconvolutionLayer::setStride() > IDeconvolutionLayer::setStrideNd()

  • IExecutionContext::destroy() > delete ObjectName

  • IExecutionContext::enqueue() > IExecutionContext::enqueueV3()

  • IExecutionContext::enqueueV2() > IExecutionContext::enqueueV3()

  • IExecutionContext::execute() > IExecutionContext::executeV2()

  • IExecutionContext::getBindingDimensions() > IExecutionContext::getTensorShape()

  • IExecutionContext::getShapeBinding() > IExecutionContext::getTensorAddress() or getOutputTensorAddress()

  • IExecutionContext::getStrides() > IExecutionContext::getTensorStrides()

  • IExecutionContext::setBindingDimensions() > IExecutionContext::setInputShape()

  • IExecutionContext::setInputShapeBinding() > IExecutionContext::setInputTensorAddress() or setTensorAddress()

  • IExecutionContext::setOptimizationProfile() > IExecutionContext::setOptimizationProfileAsync()

  • IFullyConnectedLayer > IMatrixMultiplyLayer

  • IGpuAllocator::free() > IGpuAllocator::deallocate()

  • IHostMemory::destroy() > delete ObjectName

  • INetworkDefinition::addConvolution() > INetworkDefinition::addConvolutionNd()

  • INetworkDefinition::addDeconvolution() > INetworkDefinition::addDeconvolutionNd()

  • INetworkDefinition::addFullyConnected() > INetworkDefinition::addMatrixMultiply()

  • INetworkDefinition::addPadding() > INetworkDefinition::addPaddingNd()

  • INetworkDefinition::addPooling() > INetworkDefinition::addPoolingNd()

  • INetworkDefinition::addRNNv2() > INetworkDefinition::addLoop()

  • INetworkDefinition::destroy() > delete ObjectName

  • INetworkDefinition::hasExplicitPrecision() > Explicit precision support was removed in 10.0

  • INetworkDefinition::hasImplicitBatchDimension() > Implicit batch support was removed

  • IOnnxConfig::destroy() > delete ObjectName

  • IPaddingLayer::getPostPadding() > IPaddingLayer::getPostPaddingNd()

  • IPaddingLayer::getPrePadding() > IPaddingLayer::getPrePaddingNd()

  • IPaddingLayer::setPostPadding() > IPaddingLayer::setPostPaddingNd()

  • IPaddingLayer::setPrePadding() > IPaddingLayer::setPrePaddingNd()

  • IPoolingLayer::getPadding() > IPoolingLayer::getPaddingNd()

  • IPoolingLayer::getStride() > IPoolingLayer::getStrideNd()

  • IPoolingLayer::getWindowSize() > IPoolingLayer::getWindowSizeNd()

  • IPoolingLayer::setPadding() > IPoolingLayer::setPaddingNd()

  • IPoolingLayer::setStride() > IPoolingLayer::setStrideNd()

  • IPoolingLayer::setWindowSize() > IPoolingLayer::setWindowSizeNd()

  • IRefitter::destroy() > delete ObjectName

  • IResizeLayer::getAlignCorners() > IResizeLayer::getAlignCornersNd()

  • IResizeLayer::setAlignCorners() > IResizeLayer::setAlignCornersNd()

  • IRuntime::deserializeCudaEngine(void const* blob, std::size_t size, IPluginFactory* pluginFactory) > Use deserializeCudaEngine with two parameters

  • IRuntime::destroy() > delete ObjectName

  • IRNNv2Layer > ILoop

  • kNV_TENSORRT_VERSION_IMPL > define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L). TensorRT version encoding was changed to accommodate a two-digit minor version.

  • NetworkDefinitionCreationFlag::kEXPLICIT_BATCH > Support was removed in 10.0

  • NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION > Support was removed in 10.0

  • NV_TENSORRT_SONAME_MAJOR > NV_TENSORRT_MAJOR

  • NV_TENSORRT_SONAME_MINOR > NV_TENSORRT_MINOR

  • NV_TENSORRT_SONAME_PATCH > NV_TENSORRT_PATCH

  • PaddingMode::kCAFFE_ROUND_DOWN > Caffe support was removed

  • PaddingMode::kCAFFE_ROUND_UP > Caffe support was removed

  • PreviewFeature::kDISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805 > External tactics are always disabled for core code

  • PreviewFeature::kFASTER_DYNAMIC_SHAPES_080 > This flag is on by default

  • ProfilingVerbosity::kDEFAULT > ProfilingVerbosity::kLAYER_NAMES_ONLY

  • ProfilingVerbosity::kVERBOSE > ProfilingVerbosity::kDETAILED

  • ResizeMode > Use InterpolationMode. Alias was removed.

  • RNNDirection > RNN-related data structures were removed

  • RNNGateType > RNN-related data structures were removed

  • RNNInputMode > RNN-related data structures were removed

  • RNNOperation > RNN-related data structures were removed

  • SampleMode::kDEFAULT > SampleMode::kSTRICT_BOUNDS

  • SliceMode > Use SampleMode. Alias was removed.

Removed C++ Plugins#

The following C++ plugin are listed next to their superseded plugin.

  • createAnchorGeneratorPlugin() > GridAnchorPluginCreator::createPlugin()

  • createBatchedNMSPlugin() > BatchedNMSPluginCreator::createPlugin()

  • createInstanceNormalizationPlugin() > InstanceNormalizationPluginCreator::createPlugin()

  • createNMSPlugin() > NMSPluginCreator::createPlugin()

  • createNormalizePlugin() > NormalizePluginCreator::createPlugin()

  • createPriorBoxPlugin() > PriorBoxPluginCreator::createPlugin()

  • createRegionPlugin() > RegionPluginCreator::createPlugin()

  • createReorgPlugin() > ReorgPluginCreator::createPlugin()

  • createRPNROIPlugin() > RPROIPluginCreator::createPlugin()

  • createSplitPlugin() > INetworkDefinition::addSlice()

  • struct Quadruple > Related plugins were removed