Add Python bindings for "compile" and "auto EP" APIs (#24614)
### Description
Python API updates for auto ep selection and the compilation API.
- Adds Python API `SessionOptions.add_provider()` (equivalent to C API's
`SessionOptionsAppendExecutionProvider`)
- Adds Python API `SessionOptions.add_provider_for_devices()`
(equivalent to C API's `SessionOptionsAppendExecutionProvider_V2`)
- Adds Python API `SessionOptions.set_provider_selection_policy()`
(equivalent to C API's `SessionOptionsSetEpSelectionPolicy`)
- Adds Python API class `ModelCompiler` to compile models (wraps C API's
`OrtModelCompilationOptions` and `CompileModel()`)
- TODO: Finish delegate callback. Need to add a `void*` parameter to
delegate function.
### Sample program that uses autoep APIs
Adapted from a unit test.
```python
def test_cuda_prefer_gpu_and_inference(self):
"""
Test selecting CUDA EP via the PREFER_GPU policy and running inference.
"""
ep_lib_path = "onnxruntime_providers_cuda.dll"
ep_registration_name = "CUDAExecutionProvider"
if sys.platform != "win32":
self.skipTest("Skipping test because device discovery is only supported on Windows")
if not os.path.exists(ep_lib_path):
self.skipTest(f"Skipping test because EP library '{ep_lib_path}' cannot be found")
onnxrt.register_execution_provider_library(ep_registration_name, os.path.realpath(ep_lib_path))
# Set a policy to prefer GPU. Cuda should be selected.
sess_options = onnxrt.SessionOptions()
sess_options.set_provider_selection_policy(onnxrt.OrtExecutionProviderDevicePolicy.PREFER_GPU)
self.assertTrue(sess_options.has_providers())
# Run sample model and check output
sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=sess_options)
x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
input_name = sess.get_inputs()[0].name
res = sess.run([], {input_name: x})
output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32)
np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
```
### Sample program that uses compile APIs
Adapted from a unit test that compiles using EP selection policy.
```python
def test_compile_with_files_prefer_npu_policy(self):
"""
Tests compiling a model (to/from files) using an EP selection policy (PREFER_NPU).
"""
ep_lib_path = "onnxruntime_providers_qnn.dll"
ep_registration_name = "QNNExecutionProvider"
onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path)
input_model_path = get_name("nhwc_resize_scales_opset18.onnx")
output_model_path = os.path.join(self._tmp_dir_path, "model.compiled0.onnx")
session_options = onnxrt.SessionOptions()
session_options.set_provider_selection_policy(onnxrt.OrtExecutionProviderDevicePolicy.PREFER_NPU)
model_compiler = onnxrt.ModelCompiler(
session_options,
input_model_path,
embed_compiled_data_into_model=True,
external_initializers_file_path=None,
)
model_compiler.compile_to_file(output_model_path)
self.assertTrue(os.path.exists(output_model_path))
onnxrt.unregister_execution_provider_library(ep_registration_name)
```
Adapted from a unit test that compiles using explicit EPs.
```python
def test_compile_with_input_and_output_files(self):
"""
Tests compiling a model (to/from files) using explicit EP.
"""
provider = None
provider_options = dict()
if "QNNExecutionProvider" in available_providers:
provider = "QNNExecutionProvider"
provider_options["backend_type"] = "htp"
# TODO(adrianlizarraga): Allow test to run for other compiling EPs (e.g., OpenVINO)
input_model_path = get_name("nhwc_resize_scales_opset18.onnx")
output_model_path = os.path.join(self._tmp_dir_path, "model.compiled1.onnx")
session_options = onnxrt.SessionOptions()
if provider:
session_options.add_provider(provider, provider_options)
model_compiler = onnxrt.ModelCompiler(
session_options,
input_model_path,
embed_compiled_data_into_model=True,
external_initializers_file_path=None,
)
model_compiler.compile_to_file(output_model_path)
self.assertTrue(os.path.exists(output_model_path))
```
### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->