The ONNX-LRE library provides a machine learning runtime environment for executing ONNX (Open Neural Network Exchange) models.

ONNX-LRE C++ APIs offer an easy-to-use interface to onboard and execute ONNX models from LEIP Optimize.

Inference Options

ONNX-LRE supports three different input formats for inference:

DLPack Tensors - For framework interoperability with PyTorch, MXNet, etc.
ONNX Runtime Tensors - For direct ONNX Runtime integration
Raw Memory Pointers - For maximum flexibility with any memory source

Each approach offers different tradeoffs between ease of use, performance, and integration complexity. See the examples below for practical usage patterns.

These examples demonstrate usage of the ONNX-LRE.

Examples

Example 1: DLPack Tensors with Smart Pointers

#include "onnx_lre.hpp"
#include <memory>
#include <functional>
#include <iostream>
 
// Custom deleter for DLManagedTensor
struct DLTensorDeleter {
    void operator()(DLManagedTensor* tensor) const {
        if (tensor && tensor->deleter) {
            tensor->deleter(tensor);
        }
    }
};
 
// Use unique_ptr to manage DLManagedTensor lifecycle
using DLTensorPtr = std::unique_ptr<DLManagedTensor, DLTensorDeleter>;
 
int main() {
    try {
        OnnxLre::Options options;
        options.executionProvider = OnnxLre::ExecutionProvider::CPU;
        
        OnnxLre::LatentRuntimeEngine engine("/path/to/model.onnx", options);
        
        // Get results and immediately wrap in smart pointers
        std::vector<DLTensorPtr> outputTensors;
        for (auto* tensor : engine.getOutput()) {
            outputTensors.emplace_back(tensor);
        }
        
        // Process results safely - everything cleaned up automatically
        for (const auto& tensor : outputTensors) {
            if (!tensor) continue;
            
            const auto& dl_tensor = tensor->dl_tensor;
            std::cout << "Shape: [";
            for (int j = 0; j < dl_tensor.ndim; j++) {
                std::cout << dl_tensor.shape[j] << " ";
            }
            std::cout << "]" << std::endl;
        }
        
        // All resources automatically freed when outputTensors goes out of scope
        
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
    
    return 0;
}

Example 2: Using ONNX Runtime Tensors with RAII

This approach uses ONNX Runtime's tensor types with automatic memory management:

#include "onnx_lre.hpp"
#include <memory>
#include <iostream>
 
int main() {
    try {
        // Configuration with scope-limited lifetime
        OnnxLre::Options options;
        options.executionProvider = OnnxLre::ExecutionProvider::CUDA;
        options.precision = OnnxLre::Precision::Float16;
        
        // Create engine (automatically cleaned up when going out of scope)
        OnnxLre::LatentRuntimeEngine engine("/path/to/model.onnx", options);
        
        // Fetch model requirements
        const auto& inputShapes = engine.getInputShapes();
        
        // Create environment for memory management
        Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "example");
        Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(
            OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
        
        // Prepare inputs using std::vector (memory managed automatically)
        std::vector<Ort::Value> inputTensors;
        for (size_t i = 0; i < engine.getNumberOfInputs(); i++) {
            // Calculate elements needed
            size_t totalElements = 1;
            for (auto dim : inputShapes[i]) {
                totalElements *= (dim > 0) ? dim : 1; // Handle dynamic dimensions
            }
            
            // Use std::vector for memory safety
            std::vector<float> data(totalElements, 0.5f);
            
            // Create tensor (moved into vector, no raw pointer leaks)
            inputTensors.push_back(Ort::Value::CreateTensor<float>(
                memInfo, data.data(), data.size() * sizeof(float),
                inputShapes[i].data(), inputShapes[i].size()));
        }
        
        // Run inference (Ort::Value has proper move semantics)
        engine.infer(inputTensors);
        
        // Get results with ownership transfer
        auto outputTensors = engine.getOutputOrt();
        
        // Process results (no cleanup needed - RAII handles it)
        for (size_t i = 0; i < outputTensors.size(); i++) {
            // Tensors automatically released when going out of scope
            auto info = outputTensors[i].GetTensorTypeAndShapeInfo();
            std::cout << "Output " << i << " shape: [";
            for (auto dim : info.GetShape()) {
                std::cout << dim << " ";
            }
            std::cout << "]" << std::endl;
        }
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
    
    return 0;
}