onnxruntime/.lintrunner.toml
Chen Fu 06e684c9f2
Adding cuda kernel (optimized for sm80) for block-wise 4b quantized float 16 GEMM. (#18619)
### Description
Adding CUDA kernel for block-wise 4b quantized float 16 GEMM, this is
specially optimized for Nvidia Ampere GPUs.


### Motivation and Context
Trying to improve quantized LLM inference performance on Nvidia Ampere
GPUs

### Note:
This is implemented by extending CUTLASS, so it has a hard dependency on
CUTLASS. However, in current build system, loading of CUTLASS dependency
is guarded with:

(onnxruntime_USE_FLASH_ATTENTION OR
onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)

If both of these options are turned off, then compilation will fail.

Why CUTLASS dependency is guarded at all? It's a header file only
library that does not introduce any binary if not instantiated. What's
the downside of removing all the guards and just include CUTLASS
unconditionally?
2024-03-05 09:37:45 -08:00

158 lines
3.4 KiB
TOML

# Configuration for lintrunner https://github.com/suo/lintrunner
# You can install the dependencies and initialize with
#
# ```sh
# pip install lintrunner lintrunner-adapters
# lintrunner init
# ```
#
# This will install lintrunner on your system and download all the necessary
# dependencies to run linters locally.
# If you want to see what lintrunner init will install, run
# `lintrunner init --dry-run`.
#
# To lint local changes:
#
# ```bash
# lintrunner
# ```
#
# To lint all files:
#
# ```bash
# lintrunner --all-files
# ```
#
# To format files:
#
# ```bash
# lintrunner f --all-files
# ```
#
# To read more about lintrunner, see [wiki](https://github.com/pytorch/pytorch/wiki/lintrunner).
# To update an existing linting rule or create a new one, modify this file or create a
# new adapter following examples in https://github.com/justinchuby/lintrunner-adapters.
merge_base_with = 'origin/main'
[[linter]]
code = 'RUFF'
include_patterns = [
'**/*.py',
'**/*.pyi',
]
exclude_patterns = [
'cmake/external/**',
# ignore generated flatbuffers code
'onnxruntime/core/flatbuffers/ort_flatbuffers_py/**',
'orttraining/orttraining/python/training/optim/_ds_code_store.py',
]
command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'ruff_linter',
'--config=pyproject.toml',
'@{{PATHSFILE}}'
]
init_command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'pip_init',
'--dry-run={{DRYRUN}}',
'--requirement=requirements-lintrunner.txt',
]
is_formatter = true
[[linter]]
code = 'BLACK-ISORT'
include_patterns = [
'**/*.py',
]
exclude_patterns = [
'cmake/**',
'orttraining/*',
'onnxruntime/core/flatbuffers/**',
'orttraining/orttraining/python/training/optim/_ds_code_store.py',
]
command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'black_isort_linter',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'pip_init',
'--dry-run={{DRYRUN}}',
'--requirement=requirements-lintrunner.txt',
]
is_formatter = true
[[linter]]
code = 'RUSTFMT'
include_patterns = ['**/*.rs']
command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'rustfmt_linter',
'--binary=rustfmt',
'--config-path=rust/rustfmt.toml',
'--',
'@{{PATHSFILE}}'
]
is_formatter = true
[[linter]]
code = 'CLANGFORMAT'
include_patterns = [
'**/*.h',
'**/*.cc',
'**/*.hpp',
'**/*.cpp',
'**/*.m',
'**/*.mm',
]
exclude_patterns = [
'java/**', # FIXME: Enable clang-format for java
'js/**',
'onnxruntime/contrib_ops/cuda/bert/tensorrt_fused_multihead_attention/**', # Contains data chunks
'onnxruntime/core/flatbuffers/schema/*.fbs.h', # Generated code
'onnxruntime/core/graph/contrib_ops/quantization_defs.cc',
'onnxruntime/core/mlas/**', # Contains assembly code
'onnxruntime/core/mickey/cutlass_ext/**', # CUTLASS lib recommends NO automatic code formatting
'winml/lib/Api.Image/shaders/**', # Contains data chunks
]
command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'clangformat_linter',
'--binary=clang-format',
'--fallback',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python',
'-m',
'lintrunner_adapters',
'run',
'pip_init',
'--dry-run={{DRYRUN}}',
'--requirement=requirements-lintrunner.txt',
]
is_formatter = true