Instructions to use Respair/NeMo_Canary with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- NeMo
How to use Respair/NeMo_Canary with NeMo:
# tag did not correspond to a valid NeMo domain.
- Notebooks
- Google Colab
- Kaggle
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import os | |
| import tempfile | |
| import unittest | |
| from unittest.mock import MagicMock, patch | |
| import pytest | |
| import torch.nn as nn | |
| class SimpleModel(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1) | |
| self.relu = nn.ReLU() | |
| def forward(self, x): | |
| return self.relu(self.conv(x)) | |
| class TestTensorRTLazyCompiler(unittest.TestCase): | |
| def setUp(self): | |
| self.model = SimpleModel() | |
| self.temp_dir = tempfile.mkdtemp() | |
| self.plan_path = os.path.join(self.temp_dir, "test_model.plan") | |
| def tearDown(self): | |
| if os.path.exists(self.plan_path): | |
| os.remove(self.plan_path) | |
| os.rmdir(self.temp_dir) | |
| def test_get_profile_shapes(self): | |
| from nemo.export.tensorrt_lazy_compiler import get_profile_shapes | |
| input_shape = [1, 3, 224, 224] | |
| dynamic_batchsize = [1, 4, 8] | |
| min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, dynamic_batchsize) | |
| self.assertEqual(min_shape, [1, 3, 224, 224]) | |
| self.assertEqual(opt_shape, [4, 3, 224, 224]) | |
| self.assertEqual(max_shape, [8, 3, 224, 224]) | |
| # Test with None dynamic_batchsize | |
| min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, None) | |
| self.assertEqual(min_shape, input_shape) | |
| self.assertEqual(opt_shape, input_shape) | |
| self.assertEqual(max_shape, input_shape) | |
| def test_get_dynamic_axes(self): | |
| from nemo.export.tensorrt_lazy_compiler import get_dynamic_axes | |
| profiles = [{"input": [[1, 3, 224, 224], [4, 3, 224, 224], [8, 3, 224, 224]]}] | |
| dynamic_axes = get_dynamic_axes(profiles) | |
| self.assertEqual(dynamic_axes, {"input": [0]}) | |
| # Test with empty profiles | |
| dynamic_axes = get_dynamic_axes([]) | |
| self.assertEqual(dynamic_axes, {}) | |
| def test_trt_compile_basic(self, mock_cuda_available): | |
| from nemo.export.tensorrt_lazy_compiler import trt_compile | |
| # Test basic compilation | |
| compiled_model = trt_compile( | |
| self.model, | |
| self.plan_path, | |
| args={"method": "onnx", "precision": "fp16", "build_args": {"builder_optimization_level": 5}}, | |
| ) | |
| self.assertEqual(compiled_model, self.model) | |
| self.assertTrue(hasattr(compiled_model, '_trt_compiler')) | |
| def test_trt_compile_no_tensorrt(self): | |
| from nemo.export.tensorrt_lazy_compiler import trt_compile | |
| # Test when TensorRT is not available | |
| compiled_model = trt_compile(self.model, self.plan_path) | |
| self.assertEqual(compiled_model, self.model) | |
| self.assertFalse(hasattr(compiled_model, '_trt_compiler')) | |
| def test_trt_compiler_initialization(self): | |
| from nemo.export.tensorrt_lazy_compiler import TrtCompiler | |
| compiler = TrtCompiler( | |
| self.model, | |
| self.plan_path, | |
| precision="fp16", | |
| method="onnx", | |
| input_names=["x"], | |
| output_names=["output"], | |
| logger=MagicMock(), | |
| ) | |
| self.assertEqual(compiler.plan_path, self.plan_path) | |
| self.assertEqual(compiler.precision, "fp16") | |
| self.assertEqual(compiler.method, "onnx") | |
| self.assertEqual(compiler.input_names, ["x"]) | |
| self.assertEqual(compiler.output_names, ["output"]) | |
| def test_trt_compiler_invalid_precision(self): | |
| from nemo.export.tensorrt_lazy_compiler import TrtCompiler | |
| with self.assertRaises(ValueError): | |
| TrtCompiler(self.model, self.plan_path, precision="invalid_precision") | |
| def test_trt_compiler_invalid_method(self): | |
| from nemo.export.tensorrt_lazy_compiler import TrtCompiler | |
| with self.assertRaises(ValueError): | |
| TrtCompiler(self.model, self.plan_path, method="invalid_method") | |
| def test_trt_compile_with_submodule(self, mock_cuda_available): | |
| from nemo.export.tensorrt_lazy_compiler import trt_compile | |
| class NestedModel(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.submodule = SimpleModel() | |
| model = NestedModel() | |
| compiled_model = trt_compile(model, self.plan_path, submodule=["submodule"]) | |
| self.assertEqual(compiled_model, model) | |
| self.assertTrue(hasattr(model.submodule, '_trt_compiler')) | |
| if __name__ == '__main__': | |
| unittest.main() | |