mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
Add extreme_power_saver for htp_performance_mode (#19111)
### Description Add extreme_power_saver mode for htp_performance_mode
This commit is contained in:
parent
443aeb851c
commit
62a4e9103e
7 changed files with 25 additions and 10 deletions
|
|
@ -3598,7 +3598,7 @@ struct OrtApi {
|
|||
* "rpc_control_latency": QNN RPC control latency.
|
||||
* "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
|
||||
* "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
|
||||
* "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
|
||||
* "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
|
||||
* "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
|
||||
* dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
|
||||
* may alter model/EP partitioning. Use only for debugging.
|
||||
|
|
|
|||
|
|
@ -693,6 +693,18 @@ Status QnnBackendManager::SetHtpPowerConfig() {
|
|||
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
|
||||
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
|
||||
break;
|
||||
case HtpPerformanceMode::kHtpExtremePowerSaver:
|
||||
dcvs_v3.setSleepLatency = 1; // true
|
||||
dcvs_v3.sleepLatency = kSleepMediumLatency;
|
||||
dcvs_v3.setBusParams = 1;
|
||||
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
dcvs_v3.setCoreParams = 1;
|
||||
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
|
||||
break;
|
||||
case HtpPerformanceMode::kHtpLowBalanced:
|
||||
dcvs_v3.setSleepLatency = 1; // true
|
||||
dcvs_v3.sleepLatency = kSleepMediumLatency;
|
||||
|
|
@ -721,13 +733,13 @@ Status QnnBackendManager::SetHtpPowerConfig() {
|
|||
ORT_THROW("Invalid performance profile %d", static_cast<int>(htp_performance_mode_));
|
||||
break;
|
||||
}
|
||||
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr_ = ObtainNullTermPtrVector(power_configs);
|
||||
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
|
||||
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
|
||||
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data());
|
||||
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");
|
||||
|
||||
// Set rpc control latency here, but note that v68 doesn't support rpc polling mode.
|
||||
if (rpc_control_latency_ != 0) {
|
||||
constexpr int kNumRpcPollingPowerConfigs = 1;
|
||||
constexpr int kNumRpcPollingPowerConfigs = 2;
|
||||
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs(kNumRpcPollingPowerConfigs);
|
||||
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency = rpc_power_configs[0];
|
||||
// v68 doesn't support this.
|
||||
|
|
@ -735,8 +747,8 @@ Status QnnBackendManager::SetHtpPowerConfig() {
|
|||
rpc_control_latency.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
|
||||
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
|
||||
rpc_control_latency.rpcControlLatencyConfig = rpc_control_latency_;
|
||||
perf_power_configs_ptr_ = ObtainNullTermPtrVector(rpc_power_configs);
|
||||
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
|
||||
perf_power_configs_ptr = ObtainNullTermPtrVector(rpc_power_configs);
|
||||
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data());
|
||||
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ enum class HtpPerformanceMode : uint8_t {
|
|||
kHtpHighPowerSaver,
|
||||
kHtpLowBalanced,
|
||||
kHtpBalanced,
|
||||
kHtpExtremePowerSaver,
|
||||
};
|
||||
|
||||
enum class ContextPriority : uint8_t {
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ static void ParseHtpPerformanceMode(std::string htp_performance_mode_string,
|
|||
htp_performance_mode = qnn::HtpPerformanceMode::kHtpLowPowerSaver;
|
||||
} else if (htp_performance_mode_string == "power_saver") {
|
||||
htp_performance_mode = qnn::HtpPerformanceMode::kHtpPowerSaver;
|
||||
} else if (htp_performance_mode_string == "extreme_power_saver") {
|
||||
htp_performance_mode = qnn::HtpPerformanceMode::kHtpExtremePowerSaver;
|
||||
} else if (htp_performance_mode_string == "sustained_high_performance") {
|
||||
htp_performance_mode = qnn::HtpPerformanceMode::kHtpSustainedHighPerformance;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ void usage() {
|
|||
"\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
|
||||
"\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n"
|
||||
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
|
||||
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
|
||||
"\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
|
||||
"\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
|
||||
"\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n"
|
||||
"\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
|
||||
|
|
@ -487,7 +487,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
// no validation
|
||||
} else if (key == "htp_performance_mode") {
|
||||
std::set<std::string> supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance",
|
||||
"high_power_saver", "low_balanced", "low_power_saver",
|
||||
"high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver",
|
||||
"power_saver", "sustained_high_performance"};
|
||||
if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) {
|
||||
std::ostringstream str_stream;
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ namespace perftest {
|
|||
"\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
|
||||
"\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n"
|
||||
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
|
||||
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
|
||||
"\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
|
||||
"\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
|
||||
"\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
|
||||
"\t [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n"
|
||||
|
|
|
|||
|
|
@ -347,7 +347,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
// no validation
|
||||
} else if (key == "htp_performance_mode") {
|
||||
std::set<std::string> supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance",
|
||||
"high_power_saver", "low_balanced", "low_power_saver",
|
||||
"high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver",
|
||||
"power_saver", "sustained_high_performance"};
|
||||
if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) {
|
||||
std::ostringstream str_stream;
|
||||
|
|
|
|||
Loading…
Reference in a new issue