diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 3e69923330..b321b2b2ba 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3598,7 +3598,7 @@ struct OrtApi { * "rpc_control_latency": QNN RPC control latency. * "vtcm_mb": QNN VTCM size in MB. default to 0(not set). * "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance", - * "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default". + * "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default". * "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and * may alter model/EP partitioning. Use only for debugging. diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index ca6a2238e5..193e4f5ff2 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -693,6 +693,18 @@ Status QnnBackendManager::SetHtpPowerConfig() { dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS; dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS; break; + case HtpPerformanceMode::kHtpExtremePowerSaver: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + break; case HtpPerformanceMode::kHtpLowBalanced: dcvs_v3.setSleepLatency = 1; // true dcvs_v3.sleepLatency = kSleepMediumLatency; @@ -721,13 +733,13 @@ Status QnnBackendManager::SetHtpPowerConfig() { ORT_THROW("Invalid performance profile %d", static_cast(htp_performance_mode_)); break; } - std::vector perf_power_configs_ptr_ = ObtainNullTermPtrVector(power_configs); - status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data()); + std::vector perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs); + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data()); ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode."); // Set rpc control latency here, but note that v68 doesn't support rpc polling mode. if (rpc_control_latency_ != 0) { - constexpr int kNumRpcPollingPowerConfigs = 1; + constexpr int kNumRpcPollingPowerConfigs = 2; std::vector rpc_power_configs(kNumRpcPollingPowerConfigs); QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency = rpc_power_configs[0]; // v68 doesn't support this. @@ -735,8 +747,8 @@ Status QnnBackendManager::SetHtpPowerConfig() { rpc_control_latency.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY; rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME; rpc_control_latency.rpcControlLatencyConfig = rpc_control_latency_; - perf_power_configs_ptr_ = ObtainNullTermPtrVector(rpc_power_configs); - status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data()); + perf_power_configs_ptr = ObtainNullTermPtrVector(rpc_power_configs); + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data()); ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency."); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index c202f2bf79..cb6344b4e7 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -46,6 +46,7 @@ enum class HtpPerformanceMode : uint8_t { kHtpHighPowerSaver, kHtpLowBalanced, kHtpBalanced, + kHtpExtremePowerSaver, }; enum class ContextPriority : uint8_t { diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index e5856e85e1..04bd58c237 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -64,6 +64,8 @@ static void ParseHtpPerformanceMode(std::string htp_performance_mode_string, htp_performance_mode = qnn::HtpPerformanceMode::kHtpLowPowerSaver; } else if (htp_performance_mode_string == "power_saver") { htp_performance_mode = qnn::HtpPerformanceMode::kHtpPowerSaver; + } else if (htp_performance_mode_string == "extreme_power_saver") { + htp_performance_mode = qnn::HtpPerformanceMode::kHtpExtremePowerSaver; } else if (htp_performance_mode_string == "sustained_high_performance") { htp_performance_mode = qnn::HtpPerformanceMode::kHtpSustainedHighPerformance; } else { diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 51edb91b5d..7e0a811b7d 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -54,7 +54,7 @@ void usage() { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" - "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" + "\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" "\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n" "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" @@ -487,7 +487,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { // no validation } else if (key == "htp_performance_mode") { std::set supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance", - "high_power_saver", "low_balanced", "low_power_saver", + "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance"}; if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) { std::ostringstream str_stream; diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index f1b9f05a21..ef04e2be8f 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -73,7 +73,7 @@ namespace perftest { "\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n" "\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n" "\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n" - "\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" + "\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n" "\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n" "\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n" "\t [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 27385d44e2..f8a012af5b 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -347,7 +347,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device // no validation } else if (key == "htp_performance_mode") { std::set supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance", - "high_power_saver", "low_balanced", "low_power_saver", + "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance"}; if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) { std::ostringstream str_stream;