C++#

C++ API Changes#

Transition from enqueueV2 to enqueueV3 for C++

TensorRT 8.x

// Create RAII buffer manager object.
samplesCommon::BufferManager buffers(mEngine);

auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
    return false;
}

// Pick a random digit to try to infer.
srand(time(NULL));
int32_t const digit = rand() % 10;

// Read the input data into the managed buffers.
// There should be just 1 input tensor.
ASSERT(mParams.inputTensorNames.size() == 1);

if (!processInput(buffers, mParams.inputTensorNames[0], digit))
{
    return false;
}
// Create a CUDA stream to execute this inference.
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));

// Asynchronously copy data from host input buffers to device input
buffers.copyInputToDeviceAsync(stream);

// Asynchronously enqueue the inference work
if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
{
    return false;
}
// Asynchronously copy data from device output buffers to host output buffers.
buffers.copyOutputToHostAsync(stream);

// Wait for the work in the stream to complete.
CHECK(cudaStreamSynchronize(stream));

// Release stream.
CHECK(cudaStreamDestroy(stream));

TensorRT 10.x

// Create RAII buffer manager object.
samplesCommon::BufferManager buffers(mEngine);

auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
    return false;
}

for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
    auto const name = mEngine->getIOTensorName(i);
    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}

// Pick a random digit to try to infer.
srand(time(NULL));
int32_t const digit = rand() % 10;

// Read the input data into the managed buffers.
// There should be just 1 input tensor.
ASSERT(mParams.inputTensorNames.size() == 1);

if (!processInput(buffers, mParams.inputTensorNames[0], digit))
{
    return false;
}
// Create a CUDA stream to execute this inference.
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));

// Asynchronously copy data from host input buffers to device input
buffers.copyInputToDeviceAsync(stream);

// Asynchronously enqueue the inference work
if (!context->enqueueV3(stream))
{
    return false;
}

// Asynchronously copy data from device output buffers to host output buffers.
buffers.copyOutputToHostAsync(stream);

// Wait for the work in the stream to complete.
CHECK(cudaStreamSynchronize(stream));

// Release stream.
CHECK(cudaStreamDestroy(stream));

64-Bit Dimension Changes#

The dimensions held by Dims changed from int32_t to int64_t. However, in TensorRT 10.x, TensorRT will generally reject networks that use dimensions exceeding the range of int32_t. The tensor type returned by IShapeLayer is now DataType::kINT64. Use ICastLayer to cast the result to the tensor of type DataType::kINT32 if 32-bit dimensions are required.

Inspect code that bitwise copies to and from Dims to ensure it is correct for int64_t dimensions.

Added C++ APIs#

Enums

ActivationType::kGELU_ERF
ActivationType::kGELU_TANH
BuilderFlag::kREFIT_IDENTICAL
BuilderFlag::kSTRIP_PLAN
BuilderFlag::kWEIGHT_STREAMING
BuilderFlag::kSTRICT_NANS
Datatype::kINT4
LayerType::kPLUGIN_V3

Types

APILanguage
Dims64
ExecutionContextAllocationStrategy
IGpuAsyncAllocator
InterfaceInfo
IPluginResource
IPluginV3
IStreamReader
IVersionedInterface

Methods and Properties

getInferLibBuildVersion
getInferLibMajorVersion
getInferLibMinorVersion
getInferLibPatchVersion
IBuilderConfig::setMaxNbTactics
IBuilderConfig::getMaxNbTactics
ICudaEngine::createRefitter
IcudaEngine::getMinimumWeightStreamingBudget
IcudaEngine::getStreamableWeightsSize
ICudaEngine::getWeightStreamingBudget
IcudaEngine::isDebugTensor
ICudaEngine::setWeightStreamingBudget
IExecutionContext::getDebugListener
IExecutionContext::getTensorDebugState
IExecutionContext::setAllTensorsDebugState
IExecutionContext::setDebugListener
IExecutionContext::setOuputTensorAddress
IExecutionContext::setTensorDebugState
IExecutionContext::updateDeviceMemorySizeForShapes
IGpuAllocator::allocateAsync
IGpuAllocator::deallocateAsync
INetworkDefinition::addPluginV3
INetworkDefinition::isDebugTensor
INetworkDefinition::markDebug
INetworkDefinition::unmarkDebug
IPluginRegistry::acquirePluginResource
IPluginRegistry::deregisterCreator
IPluginRegistry::getAllCreators
IPluginRegistry::getCreator
IPluginRegistry::registerCreator
IPluginRegistry::releasePluginResource

Removed C++ APIs#

The following C++ APIs are listed next to their superseded API.

BuilderFlag::kENABLE_TACTIC_HEURISTIC > Builder optimization level 2
BuilderFlag::kSTRICT_TYPES > Use all three flags: kREJECT_EMPTY_ALGORITHMS, kDIRECT_IO, kPREFER_PRECISION_CONSTRAINTS
EngineCapability::kDEFAULT > EngineCapability::kSTANDARD
EngineCapability::kSAFE_DLA > EngineCapability::kDLA_STANDALONE
EngineCapability::kSAFE_GPU > EngineCapability::kSAFETY
IAlgorithm::getAlgorithmIOInfo() > IAlgorithm::getAlgorithmIOInfoByIndex()
IAlgorithmIOInfo::getTensorFormat() > The strides, data type, and vectorization information are sufficient to identify tensor formats uniquely.
IBuilder::buildEngineWithConfig() > IBuilder::buildSerializedNetwork()
IBuilder::destroy() > delete ObjectName
IBuilder::getMaxBatchSize() > Implicit batch support was removed
IBuilder::setMaxBatchSize() > Implicit batch support was removed
IBuilderConfig::destroy() > delete ObjectName
IBuilderConfig::getMaxWorkspaceSize() > IBuilderConfig::getMemoryPoolLimit() with MemoryPoolType::kWORKSPACE
IBuilderConfig::getMinTimingIterations() > IBuilderConfig::getAvgTimingIterations()
IBuilderConfig::setMaxWorkspaceSize() > IBuilderConfig::setMemoryPoolLimit() with MemoryPoolType::kWORKSPACE
IBuilderConfig::setMinTimingIterations() > IBuilderConfig::setAvgTimingIterations()
IConvolutionLayer::getDilation() > IConvolutionLayer::getDilationNd()
IConvolutionLayer::getKernelSize() > IConvolutionLayer::getKernelSizeNd()
IConvolutionLayer::getPadding() > IConvolutionLayer::getPaddingNd()
IConvolutionLayer::getStride() > IConvolutionLayer::getStrideNd()
IConvolutionLayer::setDilation() > IConvolutionLayer::setDilationNd()
IConvolutionLayer::setKernelSize() > IConvolutionLayer::setKernelSizeNd()
IConvolutionLayer::setPadding() > IConvolutionLayer::setPaddingNd()
IConvolutionLayer::setStride() > IConvolutionLayer::setStrideNd()
ICudaEngine::bindingIsInput() > ICudaEngine::getTensorIOMode()
ICudaEngine::destroy() > delete ObjectName
ICudaEngine::getBindingBytesPerComponent() > ICudaEngine::getTensorBytesPerComponent()
ICudaEngine::getBindingComponentsPerElement() > ICudaEngine::getTensorComponentsPerElement()
ICudaEngine::getBindingDataType() > ICudaEngine::getTensorDataType()
ICudaEngine::getBindingDimensions() > ICudaEngine::getTensorShape()
ICudaEngine::getBindingFormat() > ICudaEngine::getTensorFormat()
ICudaEngine::getBindingFormatDesc() > ICudaEngine::getTensorFormatDesc()
ICudaEngine::getBindingIndex() > Name-based methods
ICudaEngine::getBindingName() > Name-based methods
ICudaEngine::getBindingVectorizedDim() > ICudaEngine::getTensorVectorizedDim()
ICudaEngine::getLocation() > ITensor::getLocation()
ICudaEngine::getMaxBatchSize() > Implicit batch support was removed
ICudaEngine::getNbBindings() > ICudaEngine::getNbIOTensors()
ICudaEngine::getProfileDimensions() > ICudaEngine::getProfileShape()
ICudaEngine::getProfileShapeValues() > ICudaEngine::getShapeValues()
ICudaEngine::hasImplicitBatchDimension() > Implicit batch support was removed
ICudaEngine::isExecutionBinding() > No name-based equivalent replacement
ICudaEngine::isShapeBinding() > ICudaEngine::isShapeInferenceIO()
IDeconvolutionLayer::getKernelSize() > IDeconvolutionLayer::getKernelSizeNd()
IDeconvolutionLayer::getPadding() > IDeconvolutionLayer::getPaddingNd()
IDeconvolutionLayer::getStride() > IDeconvolutionLayer::getStrideNd()
IDeconvolutionLayer::setKernelSize() > IDeconvolutionLayer::setKernelSizeNd()
IDeconvolutionLayer::setPadding() > IDeconvolutionLayer::setPaddingNd()
IDeconvolutionLayer::setStride() > IDeconvolutionLayer::setStrideNd()
IExecutionContext::destroy() > delete ObjectName
IExecutionContext::enqueue() > IExecutionContext::enqueueV3()
IExecutionContext::enqueueV2() > IExecutionContext::enqueueV3()
IExecutionContext::execute() > IExecutionContext::executeV2()
IExecutionContext::getBindingDimensions() > IExecutionContext::getTensorShape()
IExecutionContext::getShapeBinding() > IExecutionContext::getTensorAddress() or getOutputTensorAddress()
IExecutionContext::getStrides() > IExecutionContext::getTensorStrides()
IExecutionContext::setBindingDimensions() > IExecutionContext::setInputShape()
IExecutionContext::setInputShapeBinding() > IExecutionContext::setInputTensorAddress() or setTensorAddress()
IExecutionContext::setOptimizationProfile() > IExecutionContext::setOptimizationProfileAsync()
IFullyConnectedLayer > IMatrixMultiplyLayer
IGpuAllocator::free() > IGpuAllocator::deallocate()
IHostMemory::destroy() > delete ObjectName
INetworkDefinition::addConvolution() > INetworkDefinition::addConvolutionNd()
INetworkDefinition::addDeconvolution() > INetworkDefinition::addDeconvolutionNd()
INetworkDefinition::addFullyConnected() > INetworkDefinition::addMatrixMultiply()
INetworkDefinition::addPadding() > INetworkDefinition::addPaddingNd()
INetworkDefinition::addPooling() > INetworkDefinition::addPoolingNd()
INetworkDefinition::addRNNv2() > INetworkDefinition::addLoop()
INetworkDefinition::destroy() > delete ObjectName
INetworkDefinition::hasExplicitPrecision() > Explicit precision support was removed in 10.0
INetworkDefinition::hasImplicitBatchDimension() > Implicit batch support was removed
IOnnxConfig::destroy() > delete ObjectName
IPaddingLayer::getPostPadding() > IPaddingLayer::getPostPaddingNd()
IPaddingLayer::getPrePadding() > IPaddingLayer::getPrePaddingNd()
IPaddingLayer::setPostPadding() > IPaddingLayer::setPostPaddingNd()
IPaddingLayer::setPrePadding() > IPaddingLayer::setPrePaddingNd()
IPoolingLayer::getPadding() > IPoolingLayer::getPaddingNd()
IPoolingLayer::getStride() > IPoolingLayer::getStrideNd()
IPoolingLayer::getWindowSize() > IPoolingLayer::getWindowSizeNd()
IPoolingLayer::setPadding() > IPoolingLayer::setPaddingNd()
IPoolingLayer::setStride() > IPoolingLayer::setStrideNd()
IPoolingLayer::setWindowSize() > IPoolingLayer::setWindowSizeNd()
IRefitter::destroy() > delete ObjectName
IResizeLayer::getAlignCorners() > IResizeLayer::getAlignCornersNd()
IResizeLayer::setAlignCorners() > IResizeLayer::setAlignCornersNd()
IRuntime::deserializeCudaEngine(void const* blob, std::size_t size, IPluginFactory* pluginFactory) > Use deserializeCudaEngine with two parameters
IRuntime::destroy() > delete ObjectName
IRNNv2Layer > ILoop
kNV_TENSORRT_VERSION_IMPL > define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L). TensorRT version encoding was changed to accommodate a two-digit minor version.
NetworkDefinitionCreationFlag::kEXPLICIT_BATCH > Support was removed in 10.0
NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION > Support was removed in 10.0
NV_TENSORRT_SONAME_MAJOR > NV_TENSORRT_MAJOR
NV_TENSORRT_SONAME_MINOR > NV_TENSORRT_MINOR
NV_TENSORRT_SONAME_PATCH > NV_TENSORRT_PATCH
PaddingMode::kCAFFE_ROUND_DOWN > Caffe support was removed
PaddingMode::kCAFFE_ROUND_UP > Caffe support was removed
PreviewFeature::kDISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805 > External tactics are always disabled for core code
PreviewFeature::kFASTER_DYNAMIC_SHAPES_080 > This flag is on by default
ProfilingVerbosity::kDEFAULT > ProfilingVerbosity::kLAYER_NAMES_ONLY
ProfilingVerbosity::kVERBOSE > ProfilingVerbosity::kDETAILED
ResizeMode > Use InterpolationMode. Alias was removed.
RNNDirection > RNN-related data structures were removed
RNNGateType > RNN-related data structures were removed
RNNInputMode > RNN-related data structures were removed
RNNOperation > RNN-related data structures were removed
SampleMode::kDEFAULT > SampleMode::kSTRICT_BOUNDS
SliceMode > Use SampleMode. Alias was removed.

Removed C++ Plugins#

The following C++ plugin are listed next to their superseded plugin.

createAnchorGeneratorPlugin() > GridAnchorPluginCreator::createPlugin()
createBatchedNMSPlugin() > BatchedNMSPluginCreator::createPlugin()
createInstanceNormalizationPlugin() > InstanceNormalizationPluginCreator::createPlugin()
createNMSPlugin() > NMSPluginCreator::createPlugin()
createNormalizePlugin() > NormalizePluginCreator::createPlugin()
createPriorBoxPlugin() > PriorBoxPluginCreator::createPlugin()
createRegionPlugin() > RegionPluginCreator::createPlugin()
createReorgPlugin() > ReorgPluginCreator::createPlugin()
createRPNROIPlugin() > RPROIPluginCreator::createPlugin()
createSplitPlugin() > INetworkDefinition::addSlice()
struct Quadruple > Related plugins were removed