Add extreme_power_saver for htp_performance_mode (#19111)

### Description
Add extreme_power_saver mode for htp_performance_mode
This commit is contained in:
Hector Li 2024-01-12 19:07:02 -08:00 committed by GitHub
parent 443aeb851c
commit 62a4e9103e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 25 additions and 10 deletions

View file

@ -3598,7 +3598,7 @@ struct OrtApi {
* "rpc_control_latency": QNN RPC control latency.
* "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
* "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
* "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
* "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
* "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
* dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
* may alter model/EP partitioning. Use only for debugging.

View file

@ -693,6 +693,18 @@ Status QnnBackendManager::SetHtpPowerConfig() {
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
break;
case HtpPerformanceMode::kHtpExtremePowerSaver:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
break;
case HtpPerformanceMode::kHtpLowBalanced:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
@ -721,13 +733,13 @@ Status QnnBackendManager::SetHtpPowerConfig() {
ORT_THROW("Invalid performance profile %d", static_cast<int>(htp_performance_mode_));
break;
}
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr_ = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");
// Set rpc control latency here, but note that v68 doesn't support rpc polling mode.
if (rpc_control_latency_ != 0) {
constexpr int kNumRpcPollingPowerConfigs = 1;
constexpr int kNumRpcPollingPowerConfigs = 2;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs(kNumRpcPollingPowerConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency = rpc_power_configs[0];
// v68 doesn't support this.
@ -735,8 +747,8 @@ Status QnnBackendManager::SetHtpPowerConfig() {
rpc_control_latency.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
rpc_control_latency.rpcControlLatencyConfig = rpc_control_latency_;
perf_power_configs_ptr_ = ObtainNullTermPtrVector(rpc_power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr_.data());
perf_power_configs_ptr = ObtainNullTermPtrVector(rpc_power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id_, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
}

View file

@ -46,6 +46,7 @@ enum class HtpPerformanceMode : uint8_t {
kHtpHighPowerSaver,
kHtpLowBalanced,
kHtpBalanced,
kHtpExtremePowerSaver,
};
enum class ContextPriority : uint8_t {

View file

@ -64,6 +64,8 @@ static void ParseHtpPerformanceMode(std::string htp_performance_mode_string,
htp_performance_mode = qnn::HtpPerformanceMode::kHtpLowPowerSaver;
} else if (htp_performance_mode_string == "power_saver") {
htp_performance_mode = qnn::HtpPerformanceMode::kHtpPowerSaver;
} else if (htp_performance_mode_string == "extreme_power_saver") {
htp_performance_mode = qnn::HtpPerformanceMode::kHtpExtremePowerSaver;
} else if (htp_performance_mode_string == "sustained_high_performance") {
htp_performance_mode = qnn::HtpPerformanceMode::kHtpSustainedHighPerformance;
} else {

View file

@ -54,7 +54,7 @@ void usage() {
"\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
"\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n"
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
"\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
"\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
"\t 0 means dump the QNN context binary into separate bin file and set the path in the Onnx skeleton model.\n"
"\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
@ -487,7 +487,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
// no validation
} else if (key == "htp_performance_mode") {
std::set<std::string> supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance",
"high_power_saver", "low_balanced", "low_power_saver",
"high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver",
"power_saver", "sustained_high_performance"};
if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) {
std::ostringstream str_stream;

View file

@ -73,7 +73,7 @@ namespace perftest {
"\t [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.\n"
"\t [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0(not set).\n"
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
"\t 'high_power_saver', 'low_balanced', 'extreme_power_saver', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
"\t [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal', 'normal_high', 'high'. Default to 'normal'. \n"
"\t [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g '/folderpath/libQnnSaver.so'.\n"
"\t [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization optimization mode, options: \n"

View file

@ -347,7 +347,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
// no validation
} else if (key == "htp_performance_mode") {
std::set<std::string> supported_htp_perf_mode = {"burst", "balanced", "default", "high_performance",
"high_power_saver", "low_balanced", "low_power_saver",
"high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver",
"power_saver", "sustained_high_performance"};
if (supported_htp_perf_mode.find(value) == supported_htp_perf_mode.end()) {
std::ostringstream str_stream;