mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
enable embedding sparse optimization by default (#19714)
This commit is contained in:
parent
7e613ee821
commit
cd56ea4a74
3 changed files with 11 additions and 7 deletions
|
|
@ -246,7 +246,7 @@ to standard outputs.
|
|||
#### ORTMODULE_ENABLE_EMBEDDING_SPARSE_OPTIMIZER
|
||||
|
||||
- **Feature Area**: *ORTMODULE/Optimizations*
|
||||
- **Description**: By default, this is disabled. This env var can be used for enabling or disabling the embedding input
|
||||
- **Description**: By default, this is enabled. This env var can be used for enabling or disabling the embedding input
|
||||
data sparsity based performance optimizations.
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -681,11 +681,15 @@ class GraphExecutionManager(GraphExecutionInterface):
|
|||
)
|
||||
|
||||
if self._runtime_options.enable_embedding_sparse_optimizer and len(embed_sparsity_results) > 0:
|
||||
graph_transformer_config.sparse_embedding_input_names = list(embed_sparsity_results.keys())
|
||||
self._logger.info("Embedding sparsity-based optimization is ON for %s", embed_sparsity_results)
|
||||
self._runtime_options.embed_sparsity_ratio = ",".join(
|
||||
[f"{k}:{v:.0f}%" for k, v in embed_sparsity_results.items()]
|
||||
)
|
||||
if detected_device.type == "cuda":
|
||||
# Embedding sparsity optimization is only supported on CUDA devices.
|
||||
graph_transformer_config.sparse_embedding_input_names = list(embed_sparsity_results.keys())
|
||||
self._logger.info("Embedding sparsity-based optimization is ON for %s", embed_sparsity_results)
|
||||
self._runtime_options.embed_sparsity_ratio = ",".join(
|
||||
[f"{k}:{v:.0f}%" for k, v in embed_sparsity_results.items()]
|
||||
)
|
||||
else:
|
||||
self._logger.info("Embedding sparsity-based optimization is not supported on non-CUDA devices.")
|
||||
|
||||
# If users don't want to print input density, disable the input density observer to avoid overhead
|
||||
# when looping through inputs during training.
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ class _RuntimeOptions:
|
|||
self.enable_sparse_optimizer = True
|
||||
self.label_sparsity_ratio = ""
|
||||
self.embed_sparsity_ratio = ""
|
||||
self.enable_embedding_sparse_optimizer = False # TODO(pengwa): remove once validation on more models are done.
|
||||
self.enable_embedding_sparse_optimizer = True
|
||||
|
||||
# Configuration for memory optimization.
|
||||
self.memory_optimization_level = (
|
||||
|
|
|
|||
Loading…
Reference in a new issue