enable embedding sparse optimization by default (#19714)

This commit is contained in:
guyang3532 2024-03-05 13:15:30 +08:00 committed by GitHub
parent 7e613ee821
commit cd56ea4a74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 11 additions and 7 deletions

View file

@ -246,7 +246,7 @@ to standard outputs.
#### ORTMODULE_ENABLE_EMBEDDING_SPARSE_OPTIMIZER
- **Feature Area**: *ORTMODULE/Optimizations*
- **Description**: By default, this is disabled. This env var can be used for enabling or disabling the embedding input
- **Description**: By default, this is enabled. This env var can be used for enabling or disabling the embedding input
data sparsity based performance optimizations.
```bash

View file

@ -681,11 +681,15 @@ class GraphExecutionManager(GraphExecutionInterface):
)
if self._runtime_options.enable_embedding_sparse_optimizer and len(embed_sparsity_results) > 0:
graph_transformer_config.sparse_embedding_input_names = list(embed_sparsity_results.keys())
self._logger.info("Embedding sparsity-based optimization is ON for %s", embed_sparsity_results)
self._runtime_options.embed_sparsity_ratio = ",".join(
[f"{k}:{v:.0f}%" for k, v in embed_sparsity_results.items()]
)
if detected_device.type == "cuda":
# Embedding sparsity optimization is only supported on CUDA devices.
graph_transformer_config.sparse_embedding_input_names = list(embed_sparsity_results.keys())
self._logger.info("Embedding sparsity-based optimization is ON for %s", embed_sparsity_results)
self._runtime_options.embed_sparsity_ratio = ",".join(
[f"{k}:{v:.0f}%" for k, v in embed_sparsity_results.items()]
)
else:
self._logger.info("Embedding sparsity-based optimization is not supported on non-CUDA devices.")
# If users don't want to print input density, disable the input density observer to avoid overhead
# when looping through inputs during training.

View file

@ -271,7 +271,7 @@ class _RuntimeOptions:
self.enable_sparse_optimizer = True
self.label_sparsity_ratio = ""
self.embed_sparsity_ratio = ""
self.enable_embedding_sparse_optimizer = False # TODO(pengwa): remove once validation on more models are done.
self.enable_embedding_sparse_optimizer = True
# Configuration for memory optimization.
self.memory_optimization_level = (