C++#
C++ API Changes#
Transition from enqueueV2 to enqueueV3 for C++
1// Create RAII buffer manager object.
2samplesCommon::BufferManager buffers(mEngine);
3
4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
5if (!context)
6{
7 return false;
8}
9
10// Pick a random digit to try to infer.
11srand(time(NULL));
12int32_t const digit = rand() % 10;
13
14// Read the input data into the managed buffers.
15// There should be just 1 input tensor.
16ASSERT(mParams.inputTensorNames.size() == 1);
17
18if (!processInput(buffers, mParams.inputTensorNames[0], digit))
19{
20 return false;
21}
22// Create a CUDA stream to execute this inference.
23cudaStream_t stream;
24CHECK(cudaStreamCreate(&stream));
25
26// Asynchronously copy data from host input buffers to device input
27buffers.copyInputToDeviceAsync(stream);
28
29// Asynchronously enqueue the inference work
30if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
31{
32 return false;
33}
34// Asynchronously copy data from device output buffers to host output buffers.
35buffers.copyOutputToHostAsync(stream);
36
37// Wait for the work in the stream to complete.
38CHECK(cudaStreamSynchronize(stream));
39
40// Release stream.
41CHECK(cudaStreamDestroy(stream));
1// Create RAII buffer manager object.
2samplesCommon::BufferManager buffers(mEngine);
3
4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
5if (!context)
6{
7 return false;
8}
9
10for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
11{
12 auto const name = mEngine->getIOTensorName(i);
13 context->setTensorAddress(name, buffers.getDeviceBuffer(name));
14}
15
16// Pick a random digit to try to infer.
17srand(time(NULL));
18int32_t const digit = rand() % 10;
19
20// Read the input data into the managed buffers.
21// There should be just 1 input tensor.
22ASSERT(mParams.inputTensorNames.size() == 1);
23
24if (!processInput(buffers, mParams.inputTensorNames[0], digit))
25{
26 return false;
27}
28// Create a CUDA stream to execute this inference.
29cudaStream_t stream;
30CHECK(cudaStreamCreate(&stream));
31
32// Asynchronously copy data from host input buffers to device input
33buffers.copyInputToDeviceAsync(stream);
34
35// Asynchronously enqueue the inference work
36if (!context->enqueueV3(stream))
37{
38 return false;
39}
40
41// Asynchronously copy data from device output buffers to host output buffers.
42buffers.copyOutputToHostAsync(stream);
43
44// Wait for the work in the stream to complete.
45CHECK(cudaStreamSynchronize(stream));
46
47// Release stream.
48CHECK(cudaStreamDestroy(stream));
64-Bit Dimension Changes#
The dimensions held by Dims changed from int32_t to int64_t. However, in TensorRT 10.x, TensorRT will generally reject networks that use dimensions exceeding the range of int32_t. The tensor type returned by IShapeLayer is now DataType::kINT64. Use ICastLayer to cast the result to the tensor of type DataType::kINT32 if 32-bit dimensions are required.
Inspect code that bitwise copies to and from Dims to ensure it is correct for int64_t dimensions.
Added C++ APIs#
Enums
ActivationType::kGELU_ERFActivationType::kGELU_TANHBuilderFlag::kREFIT_IDENTICALBuilderFlag::kSTRIP_PLANBuilderFlag::kWEIGHT_STREAMINGBuilderFlag::kSTRICT_NANSDatatype::kINT4LayerType::kPLUGIN_V3
Types
APILanguageDims64ExecutionContextAllocationStrategyIGpuAsyncAllocatorInterfaceInfoIPluginResourceIPluginV3IStreamReaderIVersionedInterface
Methods and Properties
getInferLibBuildVersiongetInferLibMajorVersiongetInferLibMinorVersiongetInferLibPatchVersionIBuilderConfig::setMaxNbTacticsIBuilderConfig::getMaxNbTacticsICudaEngine::createRefitterIcudaEngine::getMinimumWeightStreamingBudgetIcudaEngine::getStreamableWeightsSizeICudaEngine::getWeightStreamingBudgetIcudaEngine::isDebugTensorICudaEngine::setWeightStreamingBudgetIExecutionContext::getDebugListenerIExecutionContext::getTensorDebugStateIExecutionContext::setAllTensorsDebugStateIExecutionContext::setDebugListenerIExecutionContext::setOuputTensorAddressIExecutionContext::setTensorDebugStateIExecutionContext::updateDeviceMemorySizeForShapesIGpuAllocator::allocateAsyncIGpuAllocator::deallocateAsyncINetworkDefinition::addPluginV3INetworkDefinition::isDebugTensorINetworkDefinition::markDebugINetworkDefinition::unmarkDebugIPluginRegistry::acquirePluginResourceIPluginRegistry::deregisterCreatorIPluginRegistry::getAllCreatorsIPluginRegistry::getCreatorIPluginRegistry::registerCreatorIPluginRegistry::releasePluginResource
Removed C++ APIs#
The following C++ APIs are listed next to their superseded API.
BuilderFlag::kENABLE_TACTIC_HEURISTIC> Builder optimization level 2BuilderFlag::kSTRICT_TYPES> Use all three flags:kREJECT_EMPTY_ALGORITHMS,kDIRECT_IO,kPREFER_PRECISION_CONSTRAINTSEngineCapability::kDEFAULT>EngineCapability::kSTANDARDEngineCapability::kSAFE_DLA>EngineCapability::kDLA_STANDALONEEngineCapability::kSAFE_GPU>EngineCapability::kSAFETYIAlgorithm::getAlgorithmIOInfo()>IAlgorithm::getAlgorithmIOInfoByIndex()IAlgorithmIOInfo::getTensorFormat()> The strides, data type, and vectorization information are sufficient to identify tensor formats uniquely.IBuilder::buildEngineWithConfig()>IBuilder::buildSerializedNetwork()IBuilder::destroy()>delete ObjectNameIBuilder::getMaxBatchSize()> Implicit batch support was removedIBuilder::setMaxBatchSize()> Implicit batch support was removedIBuilderConfig::destroy()>delete ObjectNameIBuilderConfig::getMaxWorkspaceSize()>IBuilderConfig::getMemoryPoolLimit()withMemoryPoolType::kWORKSPACEIBuilderConfig::getMinTimingIterations()>IBuilderConfig::getAvgTimingIterations()IBuilderConfig::setMaxWorkspaceSize()>IBuilderConfig::setMemoryPoolLimit()withMemoryPoolType::kWORKSPACEIBuilderConfig::setMinTimingIterations()>IBuilderConfig::setAvgTimingIterations()IConvolutionLayer::getDilation()>IConvolutionLayer::getDilationNd()IConvolutionLayer::getKernelSize()>IConvolutionLayer::getKernelSizeNd()IConvolutionLayer::getPadding()>IConvolutionLayer::getPaddingNd()IConvolutionLayer::getStride()>IConvolutionLayer::getStrideNd()IConvolutionLayer::setDilation()>IConvolutionLayer::setDilationNd()IConvolutionLayer::setKernelSize()>IConvolutionLayer::setKernelSizeNd()IConvolutionLayer::setPadding()>IConvolutionLayer::setPaddingNd()IConvolutionLayer::setStride()>IConvolutionLayer::setStrideNd()ICudaEngine::bindingIsInput()>ICudaEngine::getTensorIOMode()ICudaEngine::destroy()>delete ObjectNameICudaEngine::getBindingBytesPerComponent()>ICudaEngine::getTensorBytesPerComponent()ICudaEngine::getBindingComponentsPerElement()>ICudaEngine::getTensorComponentsPerElement()ICudaEngine::getBindingDataType()>ICudaEngine::getTensorDataType()ICudaEngine::getBindingDimensions()>ICudaEngine::getTensorShape()ICudaEngine::getBindingFormat()>ICudaEngine::getTensorFormat()ICudaEngine::getBindingFormatDesc()>ICudaEngine::getTensorFormatDesc()ICudaEngine::getBindingIndex()> Name-based methodsICudaEngine::getBindingName()> Name-based methodsICudaEngine::getBindingVectorizedDim()>ICudaEngine::getTensorVectorizedDim()ICudaEngine::getLocation()>ITensor::getLocation()ICudaEngine::getMaxBatchSize()> Implicit batch support was removedICudaEngine::getNbBindings()>ICudaEngine::getNbIOTensors()ICudaEngine::getProfileDimensions()>ICudaEngine::getProfileShape()ICudaEngine::getProfileShapeValues()>ICudaEngine::getShapeValues()ICudaEngine::hasImplicitBatchDimension()> Implicit batch support was removedICudaEngine::isExecutionBinding()> No name-based equivalent replacementICudaEngine::isShapeBinding()>ICudaEngine::isShapeInferenceIO()IDeconvolutionLayer::getKernelSize()>IDeconvolutionLayer::getKernelSizeNd()IDeconvolutionLayer::getPadding()>IDeconvolutionLayer::getPaddingNd()IDeconvolutionLayer::getStride()>IDeconvolutionLayer::getStrideNd()IDeconvolutionLayer::setKernelSize()>IDeconvolutionLayer::setKernelSizeNd()IDeconvolutionLayer::setPadding()>IDeconvolutionLayer::setPaddingNd()IDeconvolutionLayer::setStride()>IDeconvolutionLayer::setStrideNd()IExecutionContext::destroy()>delete ObjectNameIExecutionContext::enqueue()>IExecutionContext::enqueueV3()IExecutionContext::enqueueV2()>IExecutionContext::enqueueV3()IExecutionContext::execute()>IExecutionContext::executeV2()IExecutionContext::getBindingDimensions()>IExecutionContext::getTensorShape()IExecutionContext::getShapeBinding()>IExecutionContext::getTensorAddress()orgetOutputTensorAddress()IExecutionContext::getStrides()>IExecutionContext::getTensorStrides()IExecutionContext::setBindingDimensions()>IExecutionContext::setInputShape()IExecutionContext::setInputShapeBinding()>IExecutionContext::setInputTensorAddress()orsetTensorAddress()IExecutionContext::setOptimizationProfile()>IExecutionContext::setOptimizationProfileAsync()IFullyConnectedLayer>IMatrixMultiplyLayerIGpuAllocator::free()>IGpuAllocator::deallocate()IHostMemory::destroy()>delete ObjectNameINetworkDefinition::addConvolution()>INetworkDefinition::addConvolutionNd()INetworkDefinition::addDeconvolution()>INetworkDefinition::addDeconvolutionNd()INetworkDefinition::addFullyConnected()>INetworkDefinition::addMatrixMultiply()INetworkDefinition::addPadding()>INetworkDefinition::addPaddingNd()INetworkDefinition::addPooling()>INetworkDefinition::addPoolingNd()INetworkDefinition::addRNNv2()>INetworkDefinition::addLoop()INetworkDefinition::destroy()>delete ObjectNameINetworkDefinition::hasExplicitPrecision()> Explicit precision support was removed in 10.0INetworkDefinition::hasImplicitBatchDimension()> Implicit batch support was removedIOnnxConfig::destroy()>delete ObjectNameIPaddingLayer::getPostPadding()>IPaddingLayer::getPostPaddingNd()IPaddingLayer::getPrePadding()>IPaddingLayer::getPrePaddingNd()IPaddingLayer::setPostPadding()>IPaddingLayer::setPostPaddingNd()IPaddingLayer::setPrePadding()>IPaddingLayer::setPrePaddingNd()IPoolingLayer::getPadding()>IPoolingLayer::getPaddingNd()IPoolingLayer::getStride()>IPoolingLayer::getStrideNd()IPoolingLayer::getWindowSize()>IPoolingLayer::getWindowSizeNd()IPoolingLayer::setPadding()>IPoolingLayer::setPaddingNd()IPoolingLayer::setStride()>IPoolingLayer::setStrideNd()IPoolingLayer::setWindowSize()>IPoolingLayer::setWindowSizeNd()IRefitter::destroy()>delete ObjectNameIResizeLayer::getAlignCorners()>IResizeLayer::getAlignCornersNd()IResizeLayer::setAlignCorners()>IResizeLayer::setAlignCornersNd()IRuntime::deserializeCudaEngine(void const* blob, std::size_t size, IPluginFactory* pluginFactory)> UsedeserializeCudaEnginewith two parametersIRuntime::destroy()>delete ObjectNameIRNNv2Layer>ILoopkNV_TENSORRT_VERSION_IMPL>define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L). TensorRT version encoding was changed to accommodate a two-digit minor version.NetworkDefinitionCreationFlag::kEXPLICIT_BATCH> Support was removed in 10.0NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION> Support was removed in 10.0NV_TENSORRT_SONAME_MAJOR>NV_TENSORRT_MAJORNV_TENSORRT_SONAME_MINOR>NV_TENSORRT_MINORNV_TENSORRT_SONAME_PATCH>NV_TENSORRT_PATCHPaddingMode::kCAFFE_ROUND_DOWN> Caffe support was removedPaddingMode::kCAFFE_ROUND_UP> Caffe support was removedPreviewFeature::kDISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805> External tactics are always disabled for core codePreviewFeature::kFASTER_DYNAMIC_SHAPES_080> This flag is on by defaultProfilingVerbosity::kDEFAULT>ProfilingVerbosity::kLAYER_NAMES_ONLYProfilingVerbosity::kVERBOSE>ProfilingVerbosity::kDETAILEDResizeMode> UseInterpolationMode. Alias was removed.RNNDirection> RNN-related data structures were removedRNNGateType> RNN-related data structures were removedRNNInputMode> RNN-related data structures were removedRNNOperation> RNN-related data structures were removedSampleMode::kDEFAULT>SampleMode::kSTRICT_BOUNDSSliceMode> UseSampleMode. Alias was removed.
Removed C++ Plugins#
The following C++ plugin are listed next to their superseded plugin.
createAnchorGeneratorPlugin()>GridAnchorPluginCreator::createPlugin()createBatchedNMSPlugin()>BatchedNMSPluginCreator::createPlugin()createInstanceNormalizationPlugin()>InstanceNormalizationPluginCreator::createPlugin()createNMSPlugin()>NMSPluginCreator::createPlugin()createNormalizePlugin()>NormalizePluginCreator::createPlugin()createPriorBoxPlugin()>PriorBoxPluginCreator::createPlugin()createRegionPlugin()>RegionPluginCreator::createPlugin()createReorgPlugin()>ReorgPluginCreator::createPlugin()createRPNROIPlugin()>RPROIPluginCreator::createPlugin()createSplitPlugin()>INetworkDefinition::addSlice()struct Quadruple> Related plugins were removed