Enable opti on Microsoft.ML.OnnxRuntime with RelWithDebInfo config (#23463)
Microsoft.ML.OnnxRuntime is not built with the Release configuration but
RelWithDebInfo which is not recognized by the MSBuild SDK. Consequently,
the optimizations are not enabled. A fix would be to simply force the
configuration to be Release when building the .NET code even if it was
set to RelWithDebInfo in the command line arguments but I could not find
an easy way to do that. Instead, I try to mimic the behavior of the
Release configuration by setting the optimize property.
I can see a 15% performance improvement using this simple model summing
up the 3 inputs:
```csharp
using System.Buffers;
using System.Collections.Frozen;
using System.Net;
using System.Net.Sockets;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Running;
using Microsoft.ML.OnnxRuntime;
var config = DefaultConfig.Instance; //.WithOptions(ConfigOptions.DisableOptimizationsValidator);
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config);
public class OnnxBench
{
private const int Iterations = 100_000;
private const int BatchSize = 50;
private InferenceSession _session = default!;
private string[] _inputNames = default!;
private OrtValue[] _inputValues = default!;
private RunOptions _runOptions = default!;
[GlobalSetup]
public void GlobalSetup()
{
using SessionOptions sessionOptions = new();
sessionOptions.InterOpNumThreads = 1;
sessionOptions.IntraOpNumThreads = 1;
sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
sessionOptions.ExecutionMode = ExecutionMode.ORT_SEQUENTIAL;
_session = new InferenceSession(
Convert.FromBase64String("CAo6cAoOCgFBCgFCEgFEIgNBZGQKDgoBQwoBRBIBWCIDQWRkEgJscloRCgFBEgwKCggBEgYKAAoCCAFaEQoBQhIMCgoIARIGCgAKAggBWhEKAUMSDAoKCAESBgoACgIIAWIRCgFYEgwKCggBEgYKAAoCCAFCBAoAEBU="),
sessionOptions);
_inputNames = ["A", "B", "C"];
_inputValues =
[
OrtValue.CreateTensorValueFromMemory(new float[BatchSize], [BatchSize, 1]),
OrtValue.CreateTensorValueFromMemory(new float[BatchSize], [BatchSize, 1]),
OrtValue.CreateTensorValueFromMemory(new float[BatchSize], [BatchSize, 1]),
];
_runOptions = new RunOptions();
}
[Benchmark(OperationsPerInvoke = Iterations)]
public float Run()
{
var inputValues0Span = _inputValues[0].GetTensorMutableDataAsSpan<float>();
var inputValues1Span = _inputValues[1].GetTensorMutableDataAsSpan<float>();
var inputValues2Span = _inputValues[2].GetTensorMutableDataAsSpan<float>();
for (int i = 0; i < BatchSize; i += 1)
{
inputValues0Span[i] = Random.Shared.NextSingle();
inputValues1Span[i] = Random.Shared.NextSingle();
inputValues2Span[i] = Random.Shared.NextSingle();
}
float sum = 0f;
for (int i = 0; i < Iterations; i += 1)
{
using var output = _session.Run(_runOptions, _inputNames, _inputValues, _session.OutputNames);
ReadOnlySpan<float> outputData = output[0].GetTensorDataAsSpan<float>();
for (int j = 0; j < outputData.Length; j += 1)
{
sum += outputData[j];
}
}
return sum;
}
}
```
| Method | Mean | Error | StdDev |
|------- |---------:|----------:|----------:|
| Before | 5.003 us | 0.0318 us | 0.0297 us |
| After | 4.325 us | 0.0568 us | 0.0503 us |