pytorch/.lintrunner.toml

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1740 lines
52 KiB
TOML
Raw Permalink Normal View History

[[linter]]
code = 'FLAKE8'
include_patterns = ['**/*.py']
exclude_patterns = [
'.git/**',
'build_test_custom_build/**',
'build/**',
'caffe2/**',
'docs/caffe2/**',
'docs/cpp/src/**',
'docs/src/**',
'fb/**',
'**/fb/**',
'functorch/docs/**',
'functorch/examples/**',
'functorch/notebooks/**',
'torch/_inductor/fx_passes/serialized_patterns/**',
AutoHeuristic: mixed_mm heuristic for A100 (#131613) This PR introduces changes to AutoHeuristic that allow one to learn a heuristic as a decision tree. I used this to learn a heuristic for mixed_mm on A100 that consistenly performs better than the default choice (https://github.com/pytorch/pytorch/blob/main/torch/_inductor/kernel/mm.py#L402). This is how the results look like: Explanation of columns: **wrong_max_spdup**: In the worst case, how much better would the best choice have been **wrong_gman_spdup**: For inputs where the heuristic is wrong, how much better is the best choice on average (geomean) **max_spdup_default**: Highest speedup achieved by the learned heuristic over the default choice **gman_spdup_default**: Geomean speedup achived by the learned heuristic over the default choice **max_slowdown_default**: If the default choice is better than the choice predicted by the learned heuristic, how much is it better in the worst case **non_default_preds**: Number of times the learned heuristic predicted a choice that is not the default choice **default_better**: Number of times the default choice is better than the choice made by the heuristic ``` set crit max_depth min_samples_leaf correct wrong unsure total wrong_max_spdup wrong_gman_spdup max_spdup_default gman_spdup_default max_slowdown_default non_default_preds default_better train entropy 5 0.01 2376 740 323 3439 1.855386 1.063236 11.352318 3.438279 1.022164 3116 2 test entropy 5 0.01 563 183 71 817 1.622222 1.060897 10.084181 3.507741 1.017039 746 2 ``` While the number of wrong predictions is high, on average the best choice is only around 6% better. What is important is that the choice predicted by the learned heuristic performs better than the default choice. I evaluated my heuristic on gpt-fast `meta-llama/Llama-2-7b-chat-hf` with int8 weight quantization. To get the `tuned_mixed_mm` to trigger, I had to replace `F.linear()` in https://github.com/pytorch-labs/gpt-fast/blob/main/quantize.py#L355 with `torch.matmul(input, self.weight.t().to(dtype=input.dtype))` because the mixed_mm pattern does not match if there is a transpose between a cast and the matmul. |batch size|prompt length| fallback | heuristic | speedup | |----------|-------------|------------:|------------:|--------:| | 1 | 7 | 75.31 tok/s | 148.83 tok/s| 1.97 | | 1 | 11 | 75.99 tok/s | 148.15 tok/s| 1.94 | | 4 | 7 | 103.48 tok/s | 472.00 tok/s| 4.56 | | 4 | 11 | 103.56 tok/s | 371.36 tok/s| 3.58 | | 8 | 7 | 201.92 tok/s | 813.44 tok/s| 4.02 | | 8 | 11 | 201.76 tok/s | 699.36 tok/s| 3.46 | Currently, the heuristic only applies to the following inputs: - m <= 128, k >= 1024, n >= 1024 (For these sizes, one of the triton kernels wins in most cases, but the heuristic still has to be careful to not choose a config that performs worse than the fallback) - k % 256 == 0 (If k is not a multiple of the block size, some choices perform extremely bad. In one case one config, that usually performs very well, was 130x slower.) - mat1 not transposed - mat2 transposed (In some cases, it was hard for the learned heuristic to detect some cases where it Pull Request resolved: https://github.com/pytorch/pytorch/pull/131613 Approved by: https://github.com/eellison
2024-08-01 22:48:47 +00:00
'torch/_inductor/autoheuristic/artifacts/**',
'scripts/**',
'test/generated_type_hints_smoketest.py',
# Tests from the NumPy test suite
'test/torch_np/numpy_test/**/*.py',
'third_party/**',
'torch/include/**',
'torch/lib/**',
'venv/**',
'**/*.pyi',
'tools/test/test_selective_build.py',
]
command = [
'python3',
'tools/linter/adapters/flake8_linter.py',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'flake8==6.1.0',
'flake8-bugbear==23.3.23',
'flake8-comprehensions==3.15.0',
'flake8-executable==2.1.3',
'flake8-logging-format==0.9.0',
'flake8-pyi==23.3.1',
'flake8-simplify==0.19.3',
'mccabe==0.7.0',
'pycodestyle==2.11.1',
'pyflakes==3.1.0',
'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"',
]
[[linter]]
code = 'CLANGFORMAT'
include_patterns = [
'aten/src/ATen/*.h',
'aten/src/ATen/mps/**/*.mm',
'aten/src/ATen/mps/**/*.h',
'aten/src/ATen/xpu/**/*.h',
'aten/src/ATen/xpu/**/*.cpp',
'aten/src/ATen/core/boxing/**/*.h',
'aten/src/ATen/core/dispatch/**/*.h',
'aten/src/ATen/native/mps/**/*.metal',
'aten/src/ATen/native/mps/**/*.mm',
'aten/src/ATen/native/mps/**/*.h',
'aten/src/ATen/native/vulkan/**/*.h',
'aten/src/ATen/native/vulkan/**/*.cpp',
'aten/src/ATen/native/cuda/MultiTensorApply.cuh',
'aten/src/ATen/native/**/Foreach*.*',
'aten/src/ATen/native/cuda/fused*.*',
'aten/src/ATen/native/cuda/Fused*.cu',
'aten/src/ATen/native/cudnn/*.h',
'aten/src/ATen/native/cudnn/*.cpp',
'aten/src/ATen/native/mkldnn/xpu/**/*.h',
'aten/src/ATen/native/mkldnn/xpu/**/*.cpp',
'aten/src/ATen/native/Tensor*.h',
'aten/src/ATen/native/Tensor*.cpp',
'c10/**/*.h',
'c10/**/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.hpp',
'torch/csrc/**/*.cpp',
'test/cpp/**/*.h',
'test/cpp/**/*.cpp',
]
exclude_patterns = [
'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h',
'aten/src/ATen/native/mps/kernels/Quantized.metal',
'c10/util/strong_type.h',
'**/fb/**',
'torch/csrc/inductor/aoti_torch/generated/**',
'torch/csrc/jit/serialization/mobile_bytecode_generated.h',
'torch/csrc/utils/pythoncapi_compat.h',
'aten/src/ATen/dlpack.h',
]
init_command = [
'python3',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=clang-format',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=clang-format',
]
command = [
'python3',
'tools/linter/adapters/clangformat_linter.py',
'--binary=.lintbin/clang-format',
'--',
'@{{PATHSFILE}}'
]
is_formatter = true
[[linter]]
code = 'MYPY'
include_patterns = [
'torch/**/*.py',
'torch/**/*.pyi',
'caffe2/**/*.py',
'caffe2/**/*.pyi',
'test/test_bundled_images.py',
'test/test_bundled_inputs.py',
'test/test_complex.py',
'test/test_datapipe.py',
'test/test_futures.py',
# 'test/test_numpy_interop.py',
'test/test_torch.py',
'test/test_type_hints.py',
'test/test_type_info.py',
'test/test_utils.py',
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/mypy_linter.py',
'--config=mypy.ini',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'numpy==1.26.4 ; python_version >= "3.9" and python_version <= "3.11"',
'numpy==2.1.0 ; python_version >= "3.12"',
'expecttest==0.3.0',
'mypy==1.13.0',
'sympy==1.13.0 ; python_version >= "3.9"',
'types-requests==2.27.25',
'types-PyYAML==6.0.7',
'types-tabulate==0.8.8',
'types-protobuf==3.19.18',
'types-pkg-resources==0.1.3',
'types-Jinja2==2.11.9',
'types-colorama==0.4.6',
'filelock==3.13.1',
'junitparser==2.1.1',
'rich==10.9.0',
'pyyaml==6.0.1',
'optree==0.13.0',
'dataclasses_json==0.6.7',
'pandas==2.2.3',
]
[[linter]]
code = 'MYPYSTRICT'
include_patterns = [
'.github/**/*.py',
'benchmarks/instruction_counts/**/*.py',
'tools/**/*.py',
'torchgen/**/*.py',
'torch/utils/_pytree.py',
'torch/utils/_cxx_pytree.py',
'torch/utils/benchmark/utils/common.py',
'torch/utils/benchmark/utils/timer.py',
'torch/utils/benchmark/utils/valgrind_wrapper/**/*.py',
]
exclude_patterns = [
# (linbinyu) copied from internal repo
'**/fb/**',
'tools/code_analyzer/gen_operators_yaml.py',
'tools/dynamo/verify_dynamo.py',
'tools/gen_vulkan_spv.py',
'tools/test/gen_operators_yaml_test.py',
'tools/test/gen_oplist_test.py',
'tools/test/test_selective_build.py',
]
command = [
'python3',
'tools/linter/adapters/mypy_linter.py',
'--config=mypy-strict.ini',
'--code=MYPYSTRICT',
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'CLANGTIDY'
include_patterns = [
# Enable coverage of headers in aten/src/ATen
# and excluding most sub-directories for now.
'aten/src/ATen/*.h',
'aten/src/ATen/*.cpp',
'aten/src/ATen/cuda/*.cpp',
'aten/src/ATen/cpu/*.h',
'aten/src/ATen/cpu/*.cpp',
'aten/src/ATen/core/*.h',
'aten/src/ATen/core/*.cpp',
'aten/src/ATen/cudnn/*.h',
'aten/src/ATen/cudnn/*.cpp',
'aten/src/ATen/native/mkldnn/xpu/**/*.h',
'aten/src/ATen/native/mkldnn/xpu/**/*.cpp',
'aten/src/ATen/detail/*',
'aten/src/ATen/functorch/*.h',
'aten/src/ATen/functorch/*.cpp',
'aten/src/ATen/native/nested/cuda/*.cpp',
'aten/src/ATen/native/nested/cuda/*.h',
'aten/src/ATen/native/nested/*.cpp',
'aten/src/ATen/native/nested/*.h',
'c10/**/*.cpp',
'c10/**/*.h',
'torch/*.h',
'torch/csrc/*.h',
'torch/csrc/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.cpp',
'torch/csrc/jit/serialization/*.h',
'torch/csrc/jit/serialization/*.cpp',
]
exclude_patterns = [
# The negative filters below are to exclude files that include onnx_pb.h or
# caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
# CUDA files are also excluded.
'**/fb/**',
'**/generated/**',
'**/*pb.h',
'**/*inl.h',
'aten/src/ATen/cpu/FlushDenormal.cpp',
'aten/src/ATen/cpu/Utils.cpp',
'aten/src/ATen/cpu/vml.h',
'aten/src/ATen/CPUFixedAllocator.h',
'aten/src/ATen/Parallel*.h',
'c10/xpu/**/*.h',
'c10/xpu/**/*.cpp',
'c10/benchmark/intrusive_ptr_benchmark.cpp',
'c10/cuda/CUDAAlgorithm.h',
'c10/util/complex_math.h',
'c10/util/complex_utils.h',
'c10/util/flat_hash_map.h',
'c10/util/logging*.h',
'c10/metal/*.h',
'c10/util/hash.h',
'c10/util/strong_type.h',
'c10/util/SmallVector.h',
'c10/util/win32-headers.h',
'c10/test/**/*.h',
'third_party/**/*',
'torch/csrc/api/include/torch/nn/modules/common.h',
'torch/csrc/api/include/torch/linalg.h',
'torch/csrc/autograd/generated/**',
'torch/csrc/distributed/**/*.cu',
'torch/csrc/distributed/c10d/WinSockUtils.hpp',
'torch/csrc/distributed/c10d/quantization/quantization_gpu.h',
'torch/csrc/dynamo/eval_frame.h',
'torch/csrc/inductor/aoti_torch/c/shim.h',
'torch/csrc/jit/**/*',
'torch/csrc/jit/serialization/mobile_bytecode_generated.h',
'torch/csrc/utils/generated_serialization_types.h',
'torch/csrc/utils/pythoncapi_compat.h',
'torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h',
]
init_command = [
'python3',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=clang-tidy',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=clang-tidy',
]
command = [
'python3',
'tools/linter/adapters/clangtidy_linter.py',
'--binary=.lintbin/clang-tidy',
'--build_dir=./build',
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'TYPEIGNORE'
include_patterns = ['**/*.py', '**/*.pyi']
exclude_patterns = [
'fb/**',
'**/fb/**',
'test/test_jit.py',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=# type:\s*ignore([^\[]|$)',
'--linter-name=TYPEIGNORE',
'--error-name=unqualified type: ignore',
"""--error-description=\
This line has an unqualified `type: ignore`; \
please convert it to `type: ignore[xxxx]`\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'TYPENOSKIP'
include_patterns = ['mypy.ini']
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=follow_imports\s*=\s*skip',
'--linter-name=TYPENOSKIP',
'--error-name=use of follow_imports = skip',
"""--error-description=\
follow_imports = skip is forbidden from mypy.ini configuration as it \
is extremely easy to accidentally turn off type checking unintentionally. If \
you need to suppress type errors, use a top level # mypy: ignore-errors. \
Do not rely on automatic Any substitution; instead, manually # type: ignore \
at use sites or define a pyi type stub with more relaxed types. \
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'NOQA'
include_patterns = ['**/*.py', '**/*.pyi']
exclude_patterns = [
'caffe2/**',
'fb/**',
'**/fb/**'
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=# noqa([^:]|$)',
'--linter-name=NOQA',
'--error-name=unqualified noqa',
"""--error-description=\
This line has an unqualified `noqa`; \
please convert it to `noqa: XXXX`\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'NATIVEFUNCTIONS'
include_patterns=['aten/src/ATen/native/native_functions.yaml']
command = [
'python3',
'tools/linter/adapters/nativefunctions_linter.py',
'--native-functions-yml=aten/src/ATen/native/native_functions.yaml',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'ruamel.yaml==0.17.4',
]
is_formatter = true
[[linter]]
code = 'GHA'
include_patterns=['.github/workflows/**/*.yml']
command = [
'python3',
'tools/linter/adapters/gha_linter.py',
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'NEWLINE'
include_patterns=['**']
exclude_patterns=[
'**/contrib/**',
'third_party/**',
'**/*.bat',
'**/*.expect',
'**/*.ipynb',
'**/*.ps1',
'**/*.ptl',
'fb/**',
'**/fb/**',
'tools/clang_format_hash/**',
'test/cpp/jit/upgrader_models/*.ptl',
'test/cpp/jit/upgrader_models/*.ptl.ff',
'**/*.png',
'**/*.gz',
]
command = [
'python3',
'tools/linter/adapters/newlines_linter.py',
'--',
'@{{PATHSFILE}}',
]
is_formatter = true
[[linter]]
code = 'SPACES'
include_patterns = ['**']
exclude_patterns = [
'**/contrib/**',
'**/*.diff',
[bazel] enable sccache+nvcc in CI (#95528) Fixes #79348 This change is mostly focused on enabling nvcc+sccache in the PyTorch CI. Along the way we had to do couple tweaks: 1. Split the rules_cc from the rules_cuda that embeeded them before. This is needed in order to apply a different patch to the rules_cc compare to the one that rules_cuda does by default. This is in turn needed because we need to workaround an nvcc behavior where it doesn't send `-iquote xxx` to the host compiler, but it does send `-isystem xxx`. So we workaround this problem with (ab)using `-isystem` instead. Without it we are getting errors like `xxx` is not found. 2. Workaround bug in bazel https://github.com/bazelbuild/bazel/issues/10167 that prevents us from using a straightforward and honest `nvcc` sccache wrapper. Instead we generate ad-hock bazel specific nvcc wrapper that has internal knowledge of the relative bazel paths to local_cuda. This allows us to workaround the issue with CUDA symlinks. Without it we are getting `undeclared inclusion(s) in rule` all over the place for CUDA headers. ## Test plan Green CI build https://github.com/pytorch/pytorch/actions/runs/4267147180/jobs/7428431740 Note that now it says "CUDA" in the sccache output ``` + sccache --show-stats Compile requests 9784 Compile requests executed 6726 Cache hits 6200 Cache hits (C/C++) 6131 Cache hits (CUDA) 69 Cache misses 519 Cache misses (C/C++) 201 Cache misses (CUDA) 318 Cache timeouts 0 Cache read errors 0 Forced recaches 0 Cache write errors 0 Compilation failures 0 Cache errors 7 Cache errors (C/C++) 7 Non-cacheable compilations 0 Non-cacheable calls 2893 Non-compilation calls 165 Unsupported compiler calls 0 Average cache write 0.116 s Average cache read miss 23.722 s Average cache read hit 0.057 s Failed distributed compilations 0 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/95528 Approved by: https://github.com/huydhn
2023-02-28 03:51:08 +00:00
'**/*.patch',
'third_party/**',
'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h',
'fb/**',
'**/fb/**',
'test/cpp/jit/upgrader_models/*.ptl',
'test/cpp/jit/upgrader_models/*.ptl.ff',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=[[:blank:]]$',
'--linter-name=SPACES',
'--error-name=trailing spaces',
'--replace-pattern=s/[[:blank:]]+$//',
"""--error-description=\
This line has trailing spaces; please remove them.\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'TABS'
include_patterns = ['**']
exclude_patterns = [
'**/*.svg',
'**/*Makefile',
'**/contrib/**',
'third_party/**',
'**/.gitattributes',
'**/.gitmodules',
'fb/**',
'**/fb/**',
'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h',
'test/cpp/jit/upgrader_models/*.ptl',
'test/cpp/jit/upgrader_models/*.ptl.ff',
Migrate conda, manywheel and libtorch docker builds to pytorch/pytorch (#129022) Migration of Docker conda builds to pytorch/pytorch from pytorch/builder: https://github.com/pytorch/builder/blob/main/.github/workflows/build-conda-images.yml Related to: https://github.com/pytorch/builder/issues/1849 Migrate scripts and worklfows, adds logic to execute on PR and upload to ecr with github hash tag in order to test Docker build and nightly on PR. Test when executing on PR, upload to ecr: https://github.com/pytorch/pytorch/actions/runs/9799439218/job/27059691327 ``` 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/conda-builder-cpu:789cf8fcd738088860056160f6e9ea7cd005972b ``` Test With-Push, upload to dockerhub: https://github.com/pytorch/pytorch/actions/runs/9799783407/job/27060633427 ``` docker.io/pytorch/conda-builder:cpu done ``` Will upload here: https://hub.docker.com/r/pytorch/conda-builder/ Test using ecr image in the nightly workflow: https://github.com/pytorch/pytorch/actions/runs/9798428933/job/27057835235#step:16:87 Note: This is first part that will build docker and upload it to either dockerhub or ecr. After merging followup PR will need to change conda nightly workflows to either use ecr image or dockerhub image, depending if we are running it on PR or from main/release branch. Cleanup of workflows and scripts from builder repo: https://github.com/pytorch/builder/pull/1923 Co-authored-by: atalman <atalman@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/129022 Approved by: https://github.com/atalman, https://github.com/seemethere, https://github.com/malfet, https://github.com/chuanqi129
2024-07-25 14:36:13 +00:00
'.ci/docker/common/install_rocm_drm.sh',
'.lintrunner.toml',
'.ci/magma/package_files/*.patch',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
# @lint-ignore TXT2
'--pattern= ',
'--linter-name=TABS',
'--error-name=saw some tabs',
'--replace-pattern=s/\t/ /',
"""--error-description=\
This line has tabs; please replace them with spaces.\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'C10_UNUSED'
include_patterns = [
'**/*.cpp',
'**/*.h',
]
exclude_patterns = [
'c10/macros/Macros.h',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=C10_UNUSED',
'--linter-name=C10_UNUSED',
'--error-name=deprecated C10_UNUSED macro',
'--replace-pattern=s/C10_UNUSED/[[maybe_unused]]/',
"""--error-description=\
Deprecated macro, use [[maybe_unused]] directly\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'C10_NODISCARD'
include_patterns = [
'**/*.cpp',
'**/*.h',
]
exclude_patterns = [
'c10/macros/Macros.h',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=C10_NODISCARD',
'--linter-name=C10_NODISCARD',
'--error-name=deprecated C10_NODISCARD macro',
'--replace-pattern=s/C10_NODISCARD/[[nodiscard]]/',
"""--error-description=\
Deprecated macro, use [[nodiscard]] directly\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'INCLUDE'
include_patterns = [
'c10/**',
'aten/**',
'torch/csrc/**',
]
exclude_patterns = [
'aten/src/ATen/native/quantized/cpu/qnnpack/**',
'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h',
'aten/src/ATen/native/vulkan/glsl/**',
'**/fb/**',
'torch/csrc/jit/serialization/mobile_bytecode_generated.h',
'torch/csrc/utils/pythoncapi_compat.h',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=#include "',
'--linter-name=INCLUDE',
'--error-name=quoted include',
'--replace-pattern=s/#include "(.*)"$/#include <\1>/',
"""--error-description=\
This #include uses quotes; please convert it to #include <xxxx>\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'PYBIND11_INCLUDE'
include_patterns = [
'**/*.cpp',
'**/*.h',
]
exclude_patterns = [
'torch/csrc/utils/pybind.h',
'torch/utils/benchmark/utils/valgrind_wrapper/compat_bindings.cpp',
'caffe2/**/*',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=#include <pybind11\/(^|[^(gil\.h)])',
'--allowlist-pattern=#include <torch\/csrc\/utils\/pybind.h>',
'--linter-name=PYBIND11_INCLUDE',
'--match-first-only',
'--error-name=direct include of pybind11',
# https://stackoverflow.com/a/33416489/23845
# NB: this won't work if the pybind11 include is on the first line;
# but that's fine because it will just mean the lint will still fail
# after applying the change and you will have to fix it manually
'--replace-pattern=1,/(#include <pybind11\/)/ s/(#include <pybind11\/)/#include <torch\/csrc\/utils\/pybind.h>\n\1/',
"""--error-description=\
This #include directly includes pybind11 without also including \
#include <torch/csrc/utils/pybind.h>; this means some important \
specializations may not be included.\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'ERROR_PRONE_ISINSTANCE'
include_patterns = [
'torch/_refs/**/*.py',
'torch/_prims/**/*.py',
'torch/_prims_common/**/*.py',
'torch/_decomp/**/*.py',
'torch/_meta_registrations.py',
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=isinstance\([^)]+(int|float)\)',
'--linter-name=ERROR_PRONE_ISINSTANCE',
'--error-name=error prone isinstance',
"""--error-description=\
This line has an isinstance call that directly refers to \
int or float. This is error-prone because you may also \
Unify SymIntNode and SymFloatNode into SymNode (#87817) This refactor was prompted by challenges handling mixed int/float operations in C++. A previous version of this patch added overloads for each permutation of int/float and was unwieldy https://github.com/pytorch/pytorch/pull/87722/ This PR takes a different approach. The general outline of the patch is to combine the C++ types SymIntNode and SymFloatNode into a single type, SymNode. This is type erased; we no longer know statically at C++ if we have an int/float and have to test it with the is_int()/is_float() virtual methods. This has a number of knock on effects. - We no longer have C++ classes to bind to Python. Instead, we take an entirely new approach to our Python API, where we have a SymInt/SymFloat class defined entirely in Python, which hold a SymNode (which corresponds to the C++ SymNode). However, SymNode is not pybind11-bound; instead, it lives as-is in Python, and is wrapped into C++ SymNode using PythonSymNode when it goes into C++. This implies a userland rename. In principle, it is also possible for the canonical implementation of SymNode to be written in C++, and then bound to Python with pybind11 (we have this code, although it is commented out.) However, I did not implement this as we currently have no C++ implementations of SymNode. Because we do return SymInt/SymFloat from C++ bindings, the C++ binding code needs to know how to find these classes. Currently, this is done just by manually importing torch and getting the attributes. - Because SymInt/SymFloat are easy Python wrappers, __sym_dispatch__ now takes SymInt/SymFloat, rather than SymNode, bringing it in line with how __torch_dispatch__ works. Some miscellaneous improvements: - SymInt now has a constructor that takes SymNode. Note that this constructor is ambiguous if you pass in a subclass of SymNode, so an explicit downcast is necessary. This means toSymFloat/toSymInt are no more. This is a mild optimization as it means rvalue reference works automatically. - We uniformly use the caster for c10::SymInt/SymFloat, rather than going the long way via the SymIntNode/SymFloatNode. - Removed some unnecessary toSymInt/toSymFloat calls in normalize_* functions, pretty sure this doesn't do anything. - guard_int is now a free function, since to guard on an int you cannot assume the method exists. A function can handle both int and SymInt inputs. - We clean up the magic method definition code for SymInt/SymFloat/SymNode. ONLY the user classes (SymInt/SymFloat) get magic methods; SymNode gets plain methods; this is to help avoid confusion between the two types. Signed-off-by: Edward Z. Yang <ezyang@fb.com> cc @jansel @mlazos @soumith @voznesenskym @yanboliang @penguinwu @anijain2305 Pull Request resolved: https://github.com/pytorch/pytorch/pull/87817 Approved by: https://github.com/albanD, https://github.com/anjali411
2022-10-27 20:49:11 +00:00
have wanted to allow SymInt or SymFloat in your test. \
To suppress this lint, use an appropriate type alias defined \
in torch._prims_common; use IntLike/FloatLike when you would accept \
both regular and symbolic numbers, Dim for ints representing \
dimensions, or IntWithoutSymInt/FloatWithoutSymFloat if you really \
meant to exclude symbolic numbers.
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'PYBIND11_SPECIALIZATION'
include_patterns = [
'**/*.cpp',
'**/*.h',
]
exclude_patterns = [
# The place for all orphan specializations
'torch/csrc/utils/pybind.h',
# These specializations are non-orphan
'torch/csrc/distributed/c10d/init.cpp',
'torch/csrc/jit/python/pybind.h',
'fb/**',
'**/fb/**',
# These are safe to exclude as they do not have Python
'c10/**/*',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=PYBIND11_DECLARE_HOLDER_TYPE',
'--linter-name=PYBIND11_SPECIALIZATION',
'--error-name=pybind11 specialization in non-standard location',
"""--error-description=\
This pybind11 specialization (PYBIND11_DECLARE_HOLDER_TYPE) should \
be placed in torch/csrc/utils/pybind.h so that it is guaranteed to be \
included at any site that may potentially make use of it via py::cast. \
If your specialization is in the same header file as the definition \
of the holder type, you can ignore this lint by adding your header to \
the exclude_patterns for this lint in .lintrunner.toml. For more \
information see https://github.com/pybind/pybind11/issues/4099 \
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'PYPIDEP'
include_patterns = ['.github/**']
exclude_patterns = [
'**/*.rst',
'**/*.py',
'**/*.md',
'**/*.diff',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
"""--pattern=\
(pip|pip3|python -m pip|python3 -m pip|python3 -mpip|python -mpip) \
install ([a-zA-Z0-9][A-Za-z0-9\\._\\-]+)([^/=<>~!]+)[A-Za-z0-9\\._\\-\\*\\+\\!]*$\
""",
'--linter-name=PYPIDEP',
'--error-name=unpinned PyPI install',
"""--error-description=\
This line has unpinned PyPi installs; \
please pin them to a specific version: e.g. 'thepackage==1.2'\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'EXEC'
include_patterns = ['**']
exclude_patterns = [
'third_party/**',
'torch/bin/**',
'**/*.so',
'**/*.py',
'**/*.sh',
'**/*.bash',
'**/git-pre-commit',
'**/git-clang-format',
'**/gradlew',
'fb/**',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/exec_linter.py',
'--',
'@{{PATHSFILE}}',
]
[[linter]]
code = 'CUBINCLUDE'
include_patterns = ['aten/**']
exclude_patterns = [
'aten/src/ATen/cuda/cub*.cuh',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=#include <cub/',
'--linter-name=CUBINCLUDE',
'--error-name=direct cub include',
"""--error-description=\
This line has a direct cub include; please include \
ATen/cuda/cub.cuh instead and wrap your cub calls in \
at::native namespace if necessary.
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'RAWCUDA'
include_patterns = [
'aten/**',
'c10/**',
]
exclude_patterns = [
'aten/src/ATen/test/**',
'c10/cuda/CUDAFunctions.h',
'c10/cuda/CUDACachingAllocator.cpp',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=cudaStreamSynchronize',
'--linter-name=RAWCUDA',
'--error-name=raw CUDA API usage',
"""--error-description=\
This line calls raw CUDA APIs directly; please use at::cuda wrappers instead.
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'RAWCUDADEVICE'
include_patterns = [
'aten/**',
'c10/**',
'torch/csrc/**',
]
exclude_patterns = [
'aten/src/ATen/cuda/CUDAContext.cpp',
'aten/src/ATen/cuda/CUDAGeneratorImpl.cpp',
'aten/src/ATen/test/**',
'c10/core/impl/InlineDeviceGuard.h',
'c10/cuda/CUDAFunctions.cpp',
'c10/cuda/CUDAGuard.h',
'c10/cuda/impl/CUDATest.cpp',
'torch/csrc/cuda/nccl.cpp',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=cudaSetDevice(',
'--pattern=cudaGetDevice(',
'--linter-name=RAWCUDADEVICE',
'--error-name=raw CUDA API usage',
"""--error-description=\
This line calls raw CUDA APIs directly; please use c10::cuda wrappers instead.
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'ROOT_LOGGING'
include_patterns = [
'**/*.py',
]
# These are not library code, but scripts in their own right, and so
# therefore are permitted to use logging
exclude_patterns = [
'tools/**',
'test/**',
'benchmarks/**',
'torch/distributed/run.py',
'functorch/benchmarks/**',
# Grandfathered in
'caffe2/**',
'fb/**',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=logging\.(debug|info|warn|warning|error|critical|log|exception)\(',
'--replace-pattern=s/logging\.(debug|info|warn|warning|error|critical|log|exception)\(/log.\1(/',
'--linter-name=ROOT_LOGGING',
'--error-name=use of root logger',
"""--error-description=\
Do not use root logger (logging.info, etc) directly; instead \
define 'log = logging.getLogger(__name__)' and call, e.g., log.info().
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'DEPLOY_DETECTION'
include_patterns = [
'**/*.py',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=sys\.executable == .torch_deploy.',
'--replace-pattern=s/sys\.executable == .torch_deploy./torch._running_with_deploy\(\)/',
'--linter-name=DEPLOY_DETECTION',
'--error-name=properly detect deploy runner',
"""--error-description=\
Do not use sys.executable to detect if running within deploy/multipy, use torch._running_with_deploy().
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'CMAKE'
include_patterns = [
"**/*.cmake",
"**/*.cmake.in",
"**/CMakeLists.txt",
]
exclude_patterns = [
'cmake/Modules/**',
'cmake/Modules_CUDA_fix/**',
'cmake/Caffe2Config.cmake.in',
'aten/src/ATen/ATenConfig.cmake.in',
'cmake/TorchConfig.cmake.in',
'cmake/TorchConfigVersion.cmake.in',
'cmake/cmake_uninstall.cmake.i',
'fb/**',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/cmake_linter.py',
'--config=.cmakelintrc',
'--',
'@{{PATHSFILE}}',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'cmakelint==1.4.1',
]
[[linter]]
code = 'SHELLCHECK'
include_patterns = [
'.ci/pytorch/**/*.sh'
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/shellcheck_linter.py',
'--',
'@{{PATHSFILE}}',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'shellcheck-py==0.7.2.1',
]
[[linter]]
code = 'ACTIONLINT'
include_patterns = [
'.github/workflows/*.yml',
'.github/workflows/*.yaml',
# actionlint does not support composite actions yet
# '.github/actions/**/*.yml',
# '.github/actions/**/*.yaml',
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/actionlint_linter.py',
'--binary=.lintbin/actionlint',
'--',
'@{{PATHSFILE}}',
]
init_command = [
'python3',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=actionlint',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=actionlint',
]
[[linter]]
code = 'TESTOWNERS'
include_patterns = [
'test/**/test_*.py',
'test/**/*_test.py',
]
exclude_patterns = [
'test/run_test.py',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/testowners_linter.py',
'--',
'@{{PATHSFILE}}',
]
[[linter]]
code = 'TEST_HAS_MAIN'
include_patterns = [
'test/**/test_*.py',
]
exclude_patterns = [
'test/run_test.py',
'**/fb/**',
'test/quantization/**', # should be run through test/test_quantization.py
'test/jit/**', # should be run through test/test_jit.py
'test/ao/sparsity/**', # should be run through test/test_ao_sparsity.py
'test/fx/**', # should be run through test/test_fx.py
'test/bottleneck_test/**', # excluded by test/run_test.py
'test/package/**', # excluded by test/run_test.py
'test/distributed/argparse_util_test.py',
'test/distributed/bin/test_script.py',
'test/distributed/elastic/agent/server/test/local_elastic_agent_test.py',
'test/distributed/elastic/multiprocessing/bin/test_script.py',
'test/distributed/elastic/multiprocessing/bin/zombie_test.py',
'test/distributed/elastic/multiprocessing/errors/api_test.py',
'test/distributed/elastic/multiprocessing/errors/error_handler_test.py',
'test/distributed/elastic/multiprocessing/redirects_test.py',
'test/distributed/elastic/multiprocessing/tail_log_test.py',
'test/distributed/elastic/rendezvous/api_test.py',
'test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py',
'test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py',
'test/distributed/elastic/rendezvous/etcd_rendezvous_backend_test.py',
'test/distributed/elastic/rendezvous/etcd_rendezvous_test.py',
'test/distributed/elastic/rendezvous/etcd_server_test.py',
'test/distributed/elastic/rendezvous/rendezvous_backend_test.py',
'test/distributed/elastic/rendezvous/static_rendezvous_test.py',
'test/distributed/elastic/rendezvous/utils_test.py',
'test/distributed/elastic/timer/api_test.py',
'test/distributed/elastic/utils/data/cycling_iterator_test.py',
'test/distributed/launcher/api_test.py',
'test/distributed/launcher/bin/test_script.py',
'test/distributed/launcher/bin/test_script_init_method.py',
'test/distributed/launcher/bin/test_script_is_torchelastic_launched.py',
'test/distributed/launcher/bin/test_script_local_rank.py',
'test/distributed/launcher/launch_test.py',
'test/distributed/launcher/run_test.py',
'test/distributed/optim/test_apply_optimizer_in_backward.py',
'test/distributed/optim/test_named_optimizer.py',
'test/distributed/test_c10d_spawn.py',
'test/distributed/test_collective_utils.py',
'test/distributions/test_distributions.py',
'test/inductor/test_aot_inductor_utils.py',
'test/lazy/test_bindings.py',
'test/lazy/test_extract_compiled_graph.py',
'test/lazy/test_meta_kernel.py',
'test/nn/test_init.py',
'test/onnx/model_defs/op_test.py',
'test/onnx/test_models_quantized_onnxruntime.py',
'test/onnx/test_onnxscript_no_runtime.py',
'test/onnx_caffe2/test_caffe2_common.py',
'test/optim/test_lrscheduler.py',
'test/optim/test_optim.py',
'test/optim/test_swa_utils.py',
'test/run_test.py',
'test/test_bundled_images.py',
'test/test_cuda_expandable_segments.py',
'test/test_hub.py',
]
command = [
'python3',
'tools/linter/adapters/test_has_main_linter.py',
'--',
'@{{PATHSFILE}}',
]
[[linter]]
code = 'CALL_ONCE'
include_patterns = [
'c10/**',
'aten/**',
'torch/csrc/**',
]
exclude_patterns = [
'c10/util/CallOnce.h',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=std::call_once',
'--linter-name=CALL_ONCE',
'--error-name=invalid call_once',
'--replace-pattern=s/std::call_once/c10::call_once/',
"""--error-description=\
Use of std::call_once is forbidden and should be replaced with c10::call_once\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'CONTEXT_DECORATOR'
include_patterns = [
'torch/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=@.*(dynamo_timed|preserve_rng_state|clear_frame|with_fresh_cache_if_config|use_lazy_graph_module|_disable_current_modes)',
'--linter-name=CONTEXT_DECORATOR',
'--error-name=avoid context decorator',
"""--error-description=\
Do not use context manager as decorator as it breaks cProfile traces. Use it as \
a context manager instead\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'ONCE_FLAG'
include_patterns = [
'c10/**',
'aten/**',
'torch/csrc/**',
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=std::once_flag',
'--linter-name=ONCE_FLAG',
'--error-name=invalid once_flag',
'--replace-pattern=s/std::once_flag/c10::once_flag/',
"""--error-description=\
Use of std::once_flag is forbidden and should be replaced with c10::once_flag\
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'WORKFLOWSYNC'
include_patterns = [
'.github/workflows/pull.yml',
'.github/workflows/trunk.yml',
'.github/workflows/periodic.yml',
'.github/workflows/mac-mps.yml',
'.github/workflows/slow.yml',
]
command = [
'python3',
'tools/linter/adapters/workflow_consistency_linter.py',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'PyYAML==6.0.1',
]
[[linter]]
code = 'NO_WORKFLOWS_ON_FORK'
include_patterns = [
'.github/**/*.yml',
'.github/**/*.yaml',
]
exclude_patterns = [
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/no_workflows_on_fork.py',
'--',
'@{{PATHSFILE}}',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'PyYAML==6.0.1',
]
# usort + ruff-format
[[linter]]
code = 'PYFMT'
include_patterns = [
'**/*.py',
'**/*.pyi',
]
command = [
'python3',
'tools/linter/adapters/pyfmt_linter.py',
'--',
'@{{PATHSFILE}}'
]
exclude_patterns = [
'tools/gen_vulkan_spv.py',
# We don't care too much about files in this directory, don't enforce
# formatting on them
'caffe2/**/*.py',
'caffe2/**/*.pyi',
'fb/**',
'**/fb/**',
'third_party/**/*.py',
'third_party/**/*.pyi',
'torch/_vendor/**',
'torch/_inductor/fx_passes/serialized_patterns/**',
AutoHeuristic: mixed_mm heuristic for A100 (#131613) This PR introduces changes to AutoHeuristic that allow one to learn a heuristic as a decision tree. I used this to learn a heuristic for mixed_mm on A100 that consistenly performs better than the default choice (https://github.com/pytorch/pytorch/blob/main/torch/_inductor/kernel/mm.py#L402). This is how the results look like: Explanation of columns: **wrong_max_spdup**: In the worst case, how much better would the best choice have been **wrong_gman_spdup**: For inputs where the heuristic is wrong, how much better is the best choice on average (geomean) **max_spdup_default**: Highest speedup achieved by the learned heuristic over the default choice **gman_spdup_default**: Geomean speedup achived by the learned heuristic over the default choice **max_slowdown_default**: If the default choice is better than the choice predicted by the learned heuristic, how much is it better in the worst case **non_default_preds**: Number of times the learned heuristic predicted a choice that is not the default choice **default_better**: Number of times the default choice is better than the choice made by the heuristic ``` set crit max_depth min_samples_leaf correct wrong unsure total wrong_max_spdup wrong_gman_spdup max_spdup_default gman_spdup_default max_slowdown_default non_default_preds default_better train entropy 5 0.01 2376 740 323 3439 1.855386 1.063236 11.352318 3.438279 1.022164 3116 2 test entropy 5 0.01 563 183 71 817 1.622222 1.060897 10.084181 3.507741 1.017039 746 2 ``` While the number of wrong predictions is high, on average the best choice is only around 6% better. What is important is that the choice predicted by the learned heuristic performs better than the default choice. I evaluated my heuristic on gpt-fast `meta-llama/Llama-2-7b-chat-hf` with int8 weight quantization. To get the `tuned_mixed_mm` to trigger, I had to replace `F.linear()` in https://github.com/pytorch-labs/gpt-fast/blob/main/quantize.py#L355 with `torch.matmul(input, self.weight.t().to(dtype=input.dtype))` because the mixed_mm pattern does not match if there is a transpose between a cast and the matmul. |batch size|prompt length| fallback | heuristic | speedup | |----------|-------------|------------:|------------:|--------:| | 1 | 7 | 75.31 tok/s | 148.83 tok/s| 1.97 | | 1 | 11 | 75.99 tok/s | 148.15 tok/s| 1.94 | | 4 | 7 | 103.48 tok/s | 472.00 tok/s| 4.56 | | 4 | 11 | 103.56 tok/s | 371.36 tok/s| 3.58 | | 8 | 7 | 201.92 tok/s | 813.44 tok/s| 4.02 | | 8 | 11 | 201.76 tok/s | 699.36 tok/s| 3.46 | Currently, the heuristic only applies to the following inputs: - m <= 128, k >= 1024, n >= 1024 (For these sizes, one of the triton kernels wins in most cases, but the heuristic still has to be careful to not choose a config that performs worse than the fallback) - k % 256 == 0 (If k is not a multiple of the block size, some choices perform extremely bad. In one case one config, that usually performs very well, was 130x slower.) - mat1 not transposed - mat2 transposed (In some cases, it was hard for the learned heuristic to detect some cases where it Pull Request resolved: https://github.com/pytorch/pytorch/pull/131613 Approved by: https://github.com/eellison
2024-08-01 22:48:47 +00:00
'torch/_inductor/autoheuristic/artifacts/**',
# These files are all grandfathered in, feel free to remove from this list
# as necessary
'test/_nvfuser/__init__.py',
'test/_nvfuser/test_dynamo.py',
'test/_nvfuser/test_python_frontend.py',
'test/_nvfuser/test_torchscript.py',
'test/delete.py',
'test/expect/__init__.py',
'test/quantization/__init__.py',
'test/quantization/core/__init__.py',
'test/quantization/core/experimental/apot_fx_graph_mode_ptq.py',
'test/quantization/core/experimental/apot_fx_graph_mode_qat.py',
'test/quantization/core/experimental/quantization_util.py',
'test/quantization/core/experimental/test_bits.py',
'test/quantization/core/experimental/test_fake_quantize.py',
'test/quantization/core/experimental/test_linear.py',
'test/quantization/core/experimental/test_nonuniform_observer.py',
'test/quantization/core/experimental/test_quantized_tensor.py',
'test/quantization/core/experimental/test_quantizer.py',
'test/quantization/core/test_backend_config.py',
'test/quantization/core/test_docs.py',
'test/quantization/core/test_quantized_functional.py',
'test/quantization/core/test_quantized_module.py',
'test/quantization/core/test_quantized_op.py',
'test/quantization/core/test_quantized_tensor.py',
'test/quantization/core/test_top_level_apis.py',
'test/quantization/core/test_utils.py',
'test/quantization/core/test_workflow_module.py',
'test/quantization/core/test_workflow_ops.py',
'test/quantization/eager/__init__.py',
'test/quantization/eager/test_bias_correction_eager.py',
'test/quantization/eager/test_equalize_eager.py',
'test/quantization/eager/test_fuse_eager.py',
'test/quantization/eager/test_model_numerics.py',
'test/quantization/eager/test_numeric_suite_eager.py',
'test/quantization/eager/test_quantize_eager_ptq.py',
'test/quantization/eager/test_quantize_eager_qat.py',
'test/quantization/fx/__init__.py',
'test/quantization/fx/test_equalize_fx.py',
'test/quantization/fx/test_model_report_fx.py',
'test/quantization/fx/test_numeric_suite_fx.py',
'test/quantization/fx/test_quantize_fx.py',
'test/quantization/fx/test_subgraph_rewriter.py',
'test/test_fake_tensor.py',
'test/test_flop_counter.py',
'test/test_function_schema.py',
'test/test_functional_autograd_benchmark.py',
'test/test_functional_optim.py',
'test/test_functionalization_of_rng_ops.py',
'test/test_datapipe.py',
'test/test_futures.py',
'test/test_fx.py',
'test/test_fx_experimental.py',
'test/test_fx_passes.py',
'test/test_fx_reinplace_pass.py',
'test/test_import_stats.py',
'test/test_itt.py',
'test/test_jit.py',
'test/test_jit_autocast.py',
'test/test_jit_cuda_fuser.py',
'test/test_jit_disabled.py',
'test/test_jit_fuser.py',
'test/test_jit_fuser_legacy.py',
'test/test_jit_legacy.py',
'test/test_jit_llga_fuser.py',
'test/test_jit_profiling.py',
'test/test_jit_simple.py',
'test/test_jit_string.py',
'test/test_jiterator.py',
'test/test_kernel_launch_checks.py',
'test/test_linalg.py',
'test/test_masked.py',
'test/test_maskedtensor.py',
'test/test_matmul_cuda.py',
'test/test_meta.py',
'test/test_metal.py',
'test/test_mkl_verbose.py',
'test/test_mkldnn.py',
'test/test_mkldnn_fusion.py',
'test/test_mkldnn_verbose.py',
'test/test_mobile_optimizer.py',
'test/test_model_dump.py',
'test/test_modules.py',
'test/test_monitor.py',
'test/test_mps.py',
'test/test_multiprocessing_spawn.py',
'test/test_namedtensor.py',
'test/test_namedtuple_return_api.py',
'test/test_native_functions.py',
'test/test_native_mha.py',
'test/test_nn.py',
'test/test_out_dtype_op.py',
'test/test_overrides.py',
'test/test_prims.py',
'test/test_proxy_tensor.py',
'test/test_pruning_op.py',
'test/test_quantization.py',
'test/test_reductions.py',
'test/test_scatter_gather_ops.py',
'test/test_schema_check.py',
'test/test_segment_reductions.py',
'test/test_serialization.py',
'test/test_set_default_mobile_cpu_allocator.py',
'test/test_sparse.py',
'test/test_sparse_csr.py',
'test/test_sparse_semi_structured.py',
'test/test_spectral_ops.py',
'test/test_stateless.py',
'test/test_static_runtime.py',
'test/test_subclass.py',
'test/test_sympy_utils.py',
'test/test_tensor_creation_ops.py',
'test/test_tensorboard.py',
'test/test_tensorexpr.py',
'test/test_tensorexpr_pybind.py',
'test/test_testing.py',
'test/test_torch.py',
'test/test_transformers.py',
'test/test_type_promotion.py',
'test/test_unary_ufuncs.py',
'test/test_vulkan.py',
'torch/_awaits/__init__.py',
'torch/_custom_op/__init__.py',
'torch/_custom_op/autograd.py',
'torch/_custom_op/functional.py',
'torch/_custom_op/impl.py',
'torch/_export/__init__.py',
'torch/_export/constraints.py',
'torch/_export/db/__init__.py',
'torch/_export/db/case.py',
'torch/_export/db/examples/__init__.py',
'torch/_export/db/examples/assume_constant_result.py',
'torch/_export/db/examples/autograd_function.py',
'torch/_export/db/examples/class_method.py',
'torch/_export/db/examples/cond_branch_class_method.py',
'torch/_export/db/examples/cond_branch_nested_function.py',
'torch/_export/db/examples/cond_branch_nonlocal_variables.py',
'torch/_export/db/examples/cond_closed_over_variable.py',
'torch/_export/db/examples/cond_operands.py',
'torch/_export/db/examples/cond_predicate.py',
'torch/_export/db/examples/decorator.py',
'torch/_export/db/examples/dictionary.py',
'torch/_export/db/examples/dynamic_shape_assert.py',
'torch/_export/db/examples/dynamic_shape_constructor.py',
'torch/_export/db/examples/dynamic_shape_if_guard.py',
'torch/_export/db/examples/dynamic_shape_map.py',
'torch/_export/db/examples/dynamic_shape_round.py',
'torch/_export/db/examples/dynamic_shape_slicing.py',
'torch/_export/db/examples/dynamic_shape_view.py',
'torch/_export/db/examples/fn_with_kwargs.py',
'torch/_export/db/examples/list_contains.py',
'torch/_export/db/examples/list_unpack.py',
'torch/_export/db/examples/nested_function.py',
'torch/_export/db/examples/null_context_manager.py',
'torch/_export/db/examples/pytree_flatten.py',
'torch/_export/db/examples/scalar_output.py',
'torch/_export/db/examples/specialized_attribute.py',
'torch/_export/db/examples/static_for_loop.py',
'torch/_export/db/examples/static_if.py',
'torch/_export/db/examples/tensor_setattr.py',
'torch/_export/db/examples/type_reflection_method.py',
'torch/_export/db/gen_example.py',
'torch/_export/db/logging.py',
'torch/_export/error.py',
'torch/_export/exported_program.py',
'torch/_export/pass_base.py',
'torch/_export/pass_infra/__init__.py',
'torch/_export/pass_infra/node_metadata.py',
'torch/_export/pass_infra/proxy_value.py',
'torch/_export/passes/__init__.py',
'torch/_export/passes/add_runtime_assertions_for_constraints_pass.py',
'torch/_export/passes/const_prop_pass.py',
'torch/_export/passes/functionalize_side_effectful_ops_pass.py',
'torch/_export/passes/replace_sym_size_ops_pass.py',
'torch/_export/passes/replace_view_ops_with_view_copy_ops_pass.py',
'torch/_export/serde/__init__.py',
'torch/_export/serde/schema.py',
'torch/_export/serde/serialize.py',
'torch/_export/serde/upgrade.py',
'torch/_export/trace.py',
'torch/_export/verifier.py',
'torch/testing/_internal/__init__.py',
'torch/testing/_internal/autocast_test_lists.py',
'torch/testing/_internal/autograd_function_db.py',
'torch/testing/_internal/check_kernel_launches.py',
'torch/testing/_internal/codegen/__init__.py',
'torch/testing/_internal/codegen/random_topo_test.py',
'torch/testing/_internal/common_cuda.py',
'torch/testing/_internal/common_distributed.py',
'torch/testing/_internal/common_jit.py',
'torch/testing/_internal/common_methods_invocations.py',
'torch/testing/_internal/common_modules.py',
'torch/testing/_internal/common_nn.py',
'torch/testing/_internal/common_pruning.py',
'torch/testing/_internal/common_quantization.py',
'torch/testing/_internal/common_quantized.py',
'torch/testing/_internal/common_subclass.py',
'torch/testing/_internal/common_utils.py',
'torch/testing/_internal/composite_compliance.py',
'torch/testing/_internal/hop_db.py',
'torch/testing/_internal/custom_op_db.py',
'torch/testing/_internal/data/__init__.py',
'torch/testing/_internal/data/network1.py',
'torch/testing/_internal/data/network2.py',
'torch/testing/_internal/dist_utils.py',
'torch/testing/_internal/distributed/__init__.py',
'torch/testing/_internal/distributed/_shard/__init__.py',
'torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py',
'torch/testing/_internal/distributed/_shard/sharded_tensor/_test_ops_common.py',
'torch/testing/_internal/distributed/_shard/sharded_tensor/_test_st_common.py',
'torch/testing/_internal/distributed/_shard/test_common.py',
'torch/testing/_internal/distributed/_tensor/__init__.py',
'torch/testing/_internal/distributed/_tensor/common_dtensor.py',
'torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py',
'torch/testing/_internal/distributed/distributed_test.py',
'torch/testing/_internal/distributed/distributed_utils.py',
'torch/testing/_internal/distributed/fake_pg.py',
'torch/testing/_internal/distributed/multi_threaded_pg.py',
'torch/testing/_internal/distributed/nn/__init__.py',
'torch/testing/_internal/distributed/nn/api/__init__.py',
'torch/testing/_internal/distributed/nn/api/remote_module_test.py',
'torch/testing/_internal/distributed/rpc/__init__.py',
'torch/testing/_internal/distributed/rpc/dist_autograd_test.py',
'torch/testing/_internal/distributed/rpc/dist_optimizer_test.py',
'torch/testing/_internal/distributed/rpc/examples/__init__.py',
'torch/testing/_internal/distributed/rpc/examples/parameter_server_test.py',
'torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py',
'torch/testing/_internal/distributed/rpc/faulty_agent_rpc_test.py',
'torch/testing/_internal/distributed/rpc/faulty_rpc_agent_test_fixture.py',
'torch/testing/_internal/distributed/rpc/jit/__init__.py',
'torch/testing/_internal/distributed/rpc/jit/dist_autograd_test.py',
'torch/testing/_internal/distributed/rpc/jit/rpc_test.py',
'torch/testing/_internal/distributed/rpc/jit/rpc_test_faulty.py',
'torch/testing/_internal/distributed/rpc/rpc_agent_test_fixture.py',
'torch/testing/_internal/distributed/rpc/rpc_test.py',
'torch/testing/_internal/distributed/rpc/tensorpipe_rpc_agent_test_fixture.py',
'torch/testing/_internal/distributed/rpc_utils.py',
'torch/testing/_internal/generated/__init__.py',
'torch/testing/_internal/hypothesis_utils.py',
'torch/testing/_internal/inductor_utils.py',
'torch/testing/_internal/jit_metaprogramming_utils.py',
'torch/testing/_internal/jit_utils.py',
'torch/testing/_internal/logging_tensor.py',
'torch/testing/_internal/logging_utils.py',
'torch/testing/_internal/optests/__init__.py',
'torch/testing/_internal/optests/aot_autograd.py',
'torch/testing/_internal/optests/compile_check.py',
'torch/testing/_internal/optests/fake_tensor.py',
'torch/testing/_internal/optests/make_fx.py',
'torch/testing/_internal/quantization_torch_package_models.py',
'torch/testing/_internal/test_module/__init__.py',
'torch/testing/_internal/test_module/future_div.py',
'torch/testing/_internal/test_module/no_future_div.py',
'torch/utils/_contextlib.py',
'torch/utils/_cpp_extension_versioner.py',
'torch/utils/_crash_handler.py',
'torch/utils/_device.py',
'torch/utils/_foreach_utils.py',
'torch/utils/_freeze.py',
'torch/utils/_mode_utils.py',
'torch/utils/_python_dispatch.py',
'torch/utils/_stats.py',
'torch/utils/_traceback.py',
'torch/utils/_zip.py',
'torch/utils/backcompat/__init__.py',
'torch/utils/backend_registration.py',
'torch/utils/benchmark/__init__.py',
'torch/utils/benchmark/examples/__init__.py',
'torch/utils/benchmark/examples/blas_compare.py',
'torch/utils/benchmark/examples/blas_compare_setup.py',
'torch/utils/benchmark/examples/compare.py',
'torch/utils/benchmark/examples/end_to_end.py',
'torch/utils/benchmark/examples/fuzzer.py',
'torch/utils/benchmark/examples/op_benchmark.py',
'torch/utils/benchmark/examples/simple_timeit.py',
'torch/utils/benchmark/examples/sparse/compare.py',
'torch/utils/benchmark/examples/sparse/fuzzer.py',
'torch/utils/benchmark/examples/sparse/op_benchmark.py',
'torch/utils/benchmark/examples/spectral_ops_fuzz_test.py',
'torch/utils/benchmark/op_fuzzers/__init__.py',
'torch/utils/benchmark/op_fuzzers/binary.py',
'torch/utils/benchmark/op_fuzzers/sparse_binary.py',
'torch/utils/benchmark/op_fuzzers/sparse_unary.py',
'torch/utils/benchmark/op_fuzzers/spectral.py',
'torch/utils/benchmark/op_fuzzers/unary.py',
'torch/utils/benchmark/utils/__init__.py',
'torch/utils/benchmark/utils/_stubs.py',
'torch/utils/benchmark/utils/common.py',
'torch/utils/benchmark/utils/compare.py',
'torch/utils/benchmark/utils/compile.py',
'torch/utils/benchmark/utils/cpp_jit.py',
'torch/utils/benchmark/utils/fuzzer.py',
'torch/utils/benchmark/utils/sparse_fuzzer.py',
'torch/utils/benchmark/utils/timer.py',
'torch/utils/benchmark/utils/valgrind_wrapper/__init__.py',
'torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py',
'torch/utils/bottleneck/__init__.py',
'torch/utils/bottleneck/__main__.py',
'torch/utils/bundled_inputs.py',
'torch/utils/checkpoint.py',
'torch/utils/collect_env.py',
'torch/utils/cpp_backtrace.py',
'torch/utils/cpp_extension.py',
'torch/utils/dlpack.py',
'torch/utils/file_baton.py',
'torch/utils/flop_counter.py',
'torch/utils/hipify/__init__.py',
'torch/utils/hipify/constants.py',
'torch/utils/hipify/cuda_to_hip_mappings.py',
'torch/utils/hipify/hipify_python.py',
'torch/utils/hipify/version.py',
'torch/utils/hooks.py',
'torch/utils/jit/__init__.py',
'torch/utils/jit/log_extract.py',
'torch/utils/mkldnn.py',
'torch/utils/mobile_optimizer.py',
'torch/utils/model_dump/__init__.py',
'torch/utils/model_dump/__main__.py',
'torch/utils/model_zoo.py',
'torch/utils/show_pickle.py',
'torch/utils/tensorboard/__init__.py',
'torch/utils/tensorboard/_caffe2_graph.py',
'torch/utils/tensorboard/_convert_np.py',
'torch/utils/tensorboard/_embedding.py',
'torch/utils/tensorboard/_onnx_graph.py',
'torch/utils/tensorboard/_proto_graph.py',
'torch/utils/tensorboard/_pytorch_graph.py',
'torch/utils/tensorboard/_utils.py',
'torch/utils/tensorboard/summary.py',
'torch/utils/tensorboard/writer.py',
'torch/utils/throughput_benchmark.py',
'torch/utils/viz/__init__.py',
'torch/utils/viz/_cycles.py',
'torch/utils/weak.py',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'--no-black-binary',
'black==23.12.1',
'usort==1.0.8.post1',
'isort==5.13.2',
'ruff==0.8.4', # sync with RUFF
]
is_formatter = true
[[linter]]
code = 'COPYRIGHT'
include_patterns = ['**']
exclude_patterns = [
'.lintrunner.toml',
'fb/**',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=Confidential and proprietary',
'--linter-name=COPYRIGHT',
'--error-name=Confidential Code',
"""--error-description=\
Proprietary and confidential source code\
should not be contributed to PyTorch codebase\
""",
'--',
'@{{PATHSFILE}}'
]
Remove SHA checksum for bazel http_archive from GitHub (#95039) An action item from https://github.com/pytorch/pytorch/issues/94346 Although the security practice of setting the checksum is good, it doesn't work when the archive is downloaded from some sites like GitHub because it can change. Specifically, GitHub gives no guarantee to keep the same value forever https://github.com/community/community/discussions/46034. This also adds a new linter to make sure that SHA checksum from GitHub can be removed quickly. The WORKSPACE file is actually updated using the new linter: ``` >>> Lint for WORKSPACE: Advice (BAZEL_LINTER) format Redundant SHA checksum. Run `lintrunner -a` to apply this patch. You can run `lintrunner -a` to apply this patch. 5 5 | 6 6 | http_archive( 7 7 | name = "rules_cuda", 7 |- sha256 = "f80438bee9906e9ecb1a8a4ae2365374ac1e8a283897281a2db2fb7fcf746333", 9 8 | strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda", 10 9 | urls = ["https://github.com/tensorflow/runtime/archive/b1c7cce21ba4661c17ac72421c6a0e2015e7bef3.tar.gz"], 11 10 | ) -------------------------------------------------------------------------------- 29 28 | name = "pybind11_bazel", 30 29 | strip_prefix = "pybind11_bazel-992381ced716ae12122360b0fbadbc3dda436dbf", 31 30 | urls = ["https://github.com/pybind/pybind11_bazel/archive/992381ced716ae12122360b0fbadbc3dda436dbf.zip"], 31 |- sha256 = "3dc6435bd41c058453efe102995ef084d0a86b0176fd6a67a6b7100a2e9a940e", 33 31 | ) 34 32 | 35 33 | new_local_repository( -------------------------------------------------------------------------------- 52 50 | urls = [ 53 51 | "https://github.com/gflags/gflags/archive/v2.2.2.tar.gz", 54 52 | ], 54 |- sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf", 56 53 | ) 57 54 | 58 55 | new_local_repository( ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/95039 Approved by: https://github.com/ZainRizvi
2023-02-22 04:39:19 +00:00
[[linter]]
code = 'BAZEL_LINTER'
include_patterns = ['WORKSPACE']
command = [
'python3',
'tools/linter/adapters/bazel_linter.py',
'--binary=.lintbin/bazel',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=bazel',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=bazel',
]
is_formatter = true
[[linter]]
code = 'LINTRUNNER_VERSION'
include_patterns = ['**']
exclude_patterns = [
'fb/**',
'**/fb/**',
]
command = [
'python3',
'tools/linter/adapters/lintrunner_version_linter.py'
]
Enable ruff in lintrunner (#99785) ### This change - Implements the ruff linter in pytorch lintrunner. It is adapted from https://github.com/justinchuby/lintrunner-adapters/blob/main/lintrunner_adapters/adapters/ruff_linter.py. It does **both linting and fixing**. 🔧 - Migrated all flake8 configs to the ruff config and enabled it for the repo. ✅ - **`ruff` lints the whole repo in under 2s** 🤯 Fixes https://github.com/pytorch/pytorch/issues/94737 Replaces #99280 @huydhn @Skylion007 <!-- copilot:all --> ### <samp>🤖 Generated by Copilot at 6b982dd</samp> ### Summary 🧹🛠️🎨 <!-- 1. 🧹 This emoji represents cleaning or tidying up, which is what `ruff` does by formatting and linting the code. It also suggests improving the code quality and removing unnecessary or redundant code. 2. 🛠️ This emoji represents tools or fixing, which is what `ruff` is as a code formatter and linter. It also suggests enhancing the code functionality and performance, and resolving potential issues or bugs. 3. 🎨 This emoji represents art or creativity, which is what `ruff` allows by providing a consistent and configurable style for the code. It also suggests adding some flair or personality to the code, and making it more readable and enjoyable. --> Add `[tool.ruff]` section to `pyproject.toml` to configure `ruff` code formatter and linter. This change aims to improve code quality and consistency with a single tool. > _`ruff` cleans the code_ > _like a spring breeze in the fields_ > _`pyproject.toml`_ ### Walkthrough * Configure `ruff` code formatter and linter for the whole project ([link](https://github.com/pytorch/pytorch/pull/99785/files?diff=unified&w=0#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R22-R79)) Pull Request resolved: https://github.com/pytorch/pytorch/pull/99785 Approved by: https://github.com/malfet, https://github.com/Skylion007
2023-04-24 15:37:13 +00:00
[[linter]]
code = 'RUFF'
include_patterns = ['**/*.py', '**/*.pyi']
Enable ruff in lintrunner (#99785) ### This change - Implements the ruff linter in pytorch lintrunner. It is adapted from https://github.com/justinchuby/lintrunner-adapters/blob/main/lintrunner_adapters/adapters/ruff_linter.py. It does **both linting and fixing**. 🔧 - Migrated all flake8 configs to the ruff config and enabled it for the repo. ✅ - **`ruff` lints the whole repo in under 2s** 🤯 Fixes https://github.com/pytorch/pytorch/issues/94737 Replaces #99280 @huydhn @Skylion007 <!-- copilot:all --> ### <samp>🤖 Generated by Copilot at 6b982dd</samp> ### Summary 🧹🛠️🎨 <!-- 1. 🧹 This emoji represents cleaning or tidying up, which is what `ruff` does by formatting and linting the code. It also suggests improving the code quality and removing unnecessary or redundant code. 2. 🛠️ This emoji represents tools or fixing, which is what `ruff` is as a code formatter and linter. It also suggests enhancing the code functionality and performance, and resolving potential issues or bugs. 3. 🎨 This emoji represents art or creativity, which is what `ruff` allows by providing a consistent and configurable style for the code. It also suggests adding some flair or personality to the code, and making it more readable and enjoyable. --> Add `[tool.ruff]` section to `pyproject.toml` to configure `ruff` code formatter and linter. This change aims to improve code quality and consistency with a single tool. > _`ruff` cleans the code_ > _like a spring breeze in the fields_ > _`pyproject.toml`_ ### Walkthrough * Configure `ruff` code formatter and linter for the whole project ([link](https://github.com/pytorch/pytorch/pull/99785/files?diff=unified&w=0#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R22-R79)) Pull Request resolved: https://github.com/pytorch/pytorch/pull/99785 Approved by: https://github.com/malfet, https://github.com/Skylion007
2023-04-24 15:37:13 +00:00
exclude_patterns = [
'caffe2/**',
'functorch/docs/**',
'functorch/notebooks/**',
'torch/_inductor/fx_passes/serialized_patterns/**',
AutoHeuristic: mixed_mm heuristic for A100 (#131613) This PR introduces changes to AutoHeuristic that allow one to learn a heuristic as a decision tree. I used this to learn a heuristic for mixed_mm on A100 that consistenly performs better than the default choice (https://github.com/pytorch/pytorch/blob/main/torch/_inductor/kernel/mm.py#L402). This is how the results look like: Explanation of columns: **wrong_max_spdup**: In the worst case, how much better would the best choice have been **wrong_gman_spdup**: For inputs where the heuristic is wrong, how much better is the best choice on average (geomean) **max_spdup_default**: Highest speedup achieved by the learned heuristic over the default choice **gman_spdup_default**: Geomean speedup achived by the learned heuristic over the default choice **max_slowdown_default**: If the default choice is better than the choice predicted by the learned heuristic, how much is it better in the worst case **non_default_preds**: Number of times the learned heuristic predicted a choice that is not the default choice **default_better**: Number of times the default choice is better than the choice made by the heuristic ``` set crit max_depth min_samples_leaf correct wrong unsure total wrong_max_spdup wrong_gman_spdup max_spdup_default gman_spdup_default max_slowdown_default non_default_preds default_better train entropy 5 0.01 2376 740 323 3439 1.855386 1.063236 11.352318 3.438279 1.022164 3116 2 test entropy 5 0.01 563 183 71 817 1.622222 1.060897 10.084181 3.507741 1.017039 746 2 ``` While the number of wrong predictions is high, on average the best choice is only around 6% better. What is important is that the choice predicted by the learned heuristic performs better than the default choice. I evaluated my heuristic on gpt-fast `meta-llama/Llama-2-7b-chat-hf` with int8 weight quantization. To get the `tuned_mixed_mm` to trigger, I had to replace `F.linear()` in https://github.com/pytorch-labs/gpt-fast/blob/main/quantize.py#L355 with `torch.matmul(input, self.weight.t().to(dtype=input.dtype))` because the mixed_mm pattern does not match if there is a transpose between a cast and the matmul. |batch size|prompt length| fallback | heuristic | speedup | |----------|-------------|------------:|------------:|--------:| | 1 | 7 | 75.31 tok/s | 148.83 tok/s| 1.97 | | 1 | 11 | 75.99 tok/s | 148.15 tok/s| 1.94 | | 4 | 7 | 103.48 tok/s | 472.00 tok/s| 4.56 | | 4 | 11 | 103.56 tok/s | 371.36 tok/s| 3.58 | | 8 | 7 | 201.92 tok/s | 813.44 tok/s| 4.02 | | 8 | 11 | 201.76 tok/s | 699.36 tok/s| 3.46 | Currently, the heuristic only applies to the following inputs: - m <= 128, k >= 1024, n >= 1024 (For these sizes, one of the triton kernels wins in most cases, but the heuristic still has to be careful to not choose a config that performs worse than the fallback) - k % 256 == 0 (If k is not a multiple of the block size, some choices perform extremely bad. In one case one config, that usually performs very well, was 130x slower.) - mat1 not transposed - mat2 transposed (In some cases, it was hard for the learned heuristic to detect some cases where it Pull Request resolved: https://github.com/pytorch/pytorch/pull/131613 Approved by: https://github.com/eellison
2024-08-01 22:48:47 +00:00
'torch/_inductor/autoheuristic/artifacts/**',
Enable ruff in lintrunner (#99785) ### This change - Implements the ruff linter in pytorch lintrunner. It is adapted from https://github.com/justinchuby/lintrunner-adapters/blob/main/lintrunner_adapters/adapters/ruff_linter.py. It does **both linting and fixing**. 🔧 - Migrated all flake8 configs to the ruff config and enabled it for the repo. ✅ - **`ruff` lints the whole repo in under 2s** 🤯 Fixes https://github.com/pytorch/pytorch/issues/94737 Replaces #99280 @huydhn @Skylion007 <!-- copilot:all --> ### <samp>🤖 Generated by Copilot at 6b982dd</samp> ### Summary 🧹🛠️🎨 <!-- 1. 🧹 This emoji represents cleaning or tidying up, which is what `ruff` does by formatting and linting the code. It also suggests improving the code quality and removing unnecessary or redundant code. 2. 🛠️ This emoji represents tools or fixing, which is what `ruff` is as a code formatter and linter. It also suggests enhancing the code functionality and performance, and resolving potential issues or bugs. 3. 🎨 This emoji represents art or creativity, which is what `ruff` allows by providing a consistent and configurable style for the code. It also suggests adding some flair or personality to the code, and making it more readable and enjoyable. --> Add `[tool.ruff]` section to `pyproject.toml` to configure `ruff` code formatter and linter. This change aims to improve code quality and consistency with a single tool. > _`ruff` cleans the code_ > _like a spring breeze in the fields_ > _`pyproject.toml`_ ### Walkthrough * Configure `ruff` code formatter and linter for the whole project ([link](https://github.com/pytorch/pytorch/pull/99785/files?diff=unified&w=0#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R22-R79)) Pull Request resolved: https://github.com/pytorch/pytorch/pull/99785 Approved by: https://github.com/malfet, https://github.com/Skylion007
2023-04-24 15:37:13 +00:00
'scripts/**',
'third_party/**',
'fb/**',
'**/fb/**',
Enable ruff in lintrunner (#99785) ### This change - Implements the ruff linter in pytorch lintrunner. It is adapted from https://github.com/justinchuby/lintrunner-adapters/blob/main/lintrunner_adapters/adapters/ruff_linter.py. It does **both linting and fixing**. 🔧 - Migrated all flake8 configs to the ruff config and enabled it for the repo. ✅ - **`ruff` lints the whole repo in under 2s** 🤯 Fixes https://github.com/pytorch/pytorch/issues/94737 Replaces #99280 @huydhn @Skylion007 <!-- copilot:all --> ### <samp>🤖 Generated by Copilot at 6b982dd</samp> ### Summary 🧹🛠️🎨 <!-- 1. 🧹 This emoji represents cleaning or tidying up, which is what `ruff` does by formatting and linting the code. It also suggests improving the code quality and removing unnecessary or redundant code. 2. 🛠️ This emoji represents tools or fixing, which is what `ruff` is as a code formatter and linter. It also suggests enhancing the code functionality and performance, and resolving potential issues or bugs. 3. 🎨 This emoji represents art or creativity, which is what `ruff` allows by providing a consistent and configurable style for the code. It also suggests adding some flair or personality to the code, and making it more readable and enjoyable. --> Add `[tool.ruff]` section to `pyproject.toml` to configure `ruff` code formatter and linter. This change aims to improve code quality and consistency with a single tool. > _`ruff` cleans the code_ > _like a spring breeze in the fields_ > _`pyproject.toml`_ ### Walkthrough * Configure `ruff` code formatter and linter for the whole project ([link](https://github.com/pytorch/pytorch/pull/99785/files?diff=unified&w=0#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R22-R79)) Pull Request resolved: https://github.com/pytorch/pytorch/pull/99785 Approved by: https://github.com/malfet, https://github.com/Skylion007
2023-04-24 15:37:13 +00:00
]
command = [
'python3',
'tools/linter/adapters/ruff_linter.py',
'--config=pyproject.toml',
'--show-disable',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'ruff==0.8.4', # sync with PYFMT
Enable ruff in lintrunner (#99785) ### This change - Implements the ruff linter in pytorch lintrunner. It is adapted from https://github.com/justinchuby/lintrunner-adapters/blob/main/lintrunner_adapters/adapters/ruff_linter.py. It does **both linting and fixing**. 🔧 - Migrated all flake8 configs to the ruff config and enabled it for the repo. ✅ - **`ruff` lints the whole repo in under 2s** 🤯 Fixes https://github.com/pytorch/pytorch/issues/94737 Replaces #99280 @huydhn @Skylion007 <!-- copilot:all --> ### <samp>🤖 Generated by Copilot at 6b982dd</samp> ### Summary 🧹🛠️🎨 <!-- 1. 🧹 This emoji represents cleaning or tidying up, which is what `ruff` does by formatting and linting the code. It also suggests improving the code quality and removing unnecessary or redundant code. 2. 🛠️ This emoji represents tools or fixing, which is what `ruff` is as a code formatter and linter. It also suggests enhancing the code functionality and performance, and resolving potential issues or bugs. 3. 🎨 This emoji represents art or creativity, which is what `ruff` allows by providing a consistent and configurable style for the code. It also suggests adding some flair or personality to the code, and making it more readable and enjoyable. --> Add `[tool.ruff]` section to `pyproject.toml` to configure `ruff` code formatter and linter. This change aims to improve code quality and consistency with a single tool. > _`ruff` cleans the code_ > _like a spring breeze in the fields_ > _`pyproject.toml`_ ### Walkthrough * Configure `ruff` code formatter and linter for the whole project ([link](https://github.com/pytorch/pytorch/pull/99785/files?diff=unified&w=0#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R22-R79)) Pull Request resolved: https://github.com/pytorch/pytorch/pull/99785 Approved by: https://github.com/malfet, https://github.com/Skylion007
2023-04-24 15:37:13 +00:00
]
is_formatter = true
# This linter prevents merge conlicts in csv files in pytorch by enforcing
# three lines of whitespace between entries such that unless people are modifying
# the same line, merge conflicts should not arise in git or hg
[[linter]]
code = 'MERGE_CONFLICTLESS_CSV'
include_patterns = ['benchmarks/dynamo/ci_expected_accuracy/*.csv']
command = [
'python3',
'tools/linter/adapters/no_merge_conflict_csv_linter.py',
'--',
'@{{PATHSFILE}}'
]
is_formatter = true
[[linter]]
code = 'META_NO_CREATE_UNBACKED'
include_patterns = [
"torch/_meta_registrations.py"
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=create_unbacked',
'--linter-name=META_NO_CREATE_UNBACKED',
'--error-name=no create_unbacked in meta registrations',
"""--error-description=\
Data-dependent operators should have their meta \
registration in torch/_subclasses/fake_impls.py, \
not torch/_meta_registrations.py
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'ATEN_CPU_GPU_AGNOSTIC'
include_patterns = [
# aten source
"aten/src/ATen/*.cpp",
"aten/src/ATen/cpu/*.cpp",
"aten/src/ATen/functorch/**/*.cpp",
"aten/src/ATen/nnapi/*.cpp",
"aten/src/ATen/quantized/*.cpp",
"aten/src/ATen/vulkan/*.cpp",
"aten/src/ATen/metal/*.cpp",
"aten/src/ATen/detail/CPUGuardImpl.cpp",
"aten/src/ATen/detail/MetaGuardImpl.cpp",
# aten native source
"aten/src/ATen/native/cpu/*.cpp",
"aten/src/ATen/native/ao_sparse/cpu/kernels/*.cpp",
"aten/src/ATen/native/ao_sparse/quantized/cpu/kernels/*.cpp",
"aten/src/ATen/native/quantized/cpu/kernels/*.cpp",
"aten/src/ATen/native/*.cpp",
"aten/src/ATen/native/cpu/**/*.cpp",
"aten/src/ATen/native/ao_sparse/*.cpp",
"aten/src/ATen/native/ao_sparse/**/*.cpp",
"aten/src/ATen/native/ao_sparse/quantized/*.cpp",
"aten/src/ATen/native/ao_sparse/quantized/**/*.cpp",
"aten/src/ATen/native/nested/*.cpp",
"aten/src/ATen/native/quantized/*.cpp",
"aten/src/ATen/native/quantized/**/*.cpp",
"aten/src/ATen/native/sparse/*.cpp",
"aten/src/ATen/native/transformers/*.cpp",
"aten/src/ATen/native/utils/*.cpp",
"aten/src/ATen/native/xnnpack/*.cpp",
"aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp",
# aten headers
"aten/src/ATen/*.h",
"aten/src/ATen/functorch/**/*.h",
"aten/src/ATen/ops/*.h",
"aten/src/ATen/cpu/**/*.h",
"aten/src/ATen/nnapi/*.h",
"aten/src/ATen/quantized/*.h",
"aten/src/ATen/vulkan/*.h",
"aten/src/ATen/metal/*.h",
"aten/src/ATen/mps/*.h",
# aten native headers
"aten/src/ATen/native/*.h",
"aten/src/ATen/native/cpu/**/*.h",
"aten/src/ATen/native/nested/*.h",
"aten/src/ATen/native/sparse/*.h",
"aten/src/ATen/native/ao_sparse/*.h",
"aten/src/ATen/native/ao_sparse/cpu/*.h",
"aten/src/ATen/native/ao_sparse/quantized/*.h",
"aten/src/ATen/native/ao_sparse/quantized/cpu/*.h",
"aten/src/ATen/native/quantized/*.h",
"aten/src/ATen/native/quantized/cpu/*.h",
"aten/src/ATen/native/transformers/*.h",
"aten/src/ATen/native/quantized/cpu/qnnpack/include/*.h",
"aten/src/ATen/native/utils/*.h",
"aten/src/ATen/native/vulkan/ops/*.h",
"aten/src/ATen/native/xnnpack/*.h",
"aten/src/ATen/native/metal/MetalPrepackOpContext.h",
"aten/src/ATen/native/mps/Copy.h",
"aten/src/ATen/native/mkldnn/**/*.h",
]
exclude_patterns = [
"aten/src/ATen/Context.h",
"aten/src/ATen/Context.cpp",
"aten/src/ATen/DLConvertor.cpp",
"aten/src/ATen/core/Array.h",
"aten/src/ATen/native/quantized/ConvUtils.h",
"aten/src/ATen/native/sparse/SparseBlasImpl.cpp", # triton implementation
"aten/src/ATen/native/transformers/attention.cpp",
"aten/src/ATen/native/**/cudnn/**", # cudnn is cuda specific
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=(^#if.*USE_ROCM.*)|(^#if.*USE_CUDA.*)',
'--linter-name=ATEN_CPU',
'--error-name=aten-cpu should be gpu agnostic',
"""--error-description=\
We strongly discourage the compile-time divergence \
on ATen-CPU code for different GPU code. This \
disallows sharing the same aten-cpu shared object \
between different GPU backends \
""",
'--',
'@{{PATHSFILE}}'
]
is_formatter = true
# `set_linter` detects occurrences of built-in `set` in areas of Python code like
# _inductor where the instability of iteration in `set` has proven a problem.
[[linter]]
code = 'SET_LINTER'
command = [
'python3',
'tools/linter/adapters/set_linter.py',
'--lintrunner',
'--',
'@{{PATHSFILE}}'
]
include_patterns = [
"torch/_inductor/**/*.py",
"torch/_functorch/partitioners.py",
]
is_formatter = true
# `docstring_linter` reports on long Python classes, methods, and functions
# whose definitions have very small docstrings or none at all.
#
[[linter]]
code = 'DOCSTRING_LINTER'
command = [
'python3',
'tools/linter/adapters/docstring_linter.py',
'--lintrunner',
'--',
'@{{PATHSFILE}}'
]
include_patterns = [
'torch/**/not-exist.py'
]
is_formatter = false
# `import_linter` reports on importing disallowed third party libraries.
[[linter]]
code = 'IMPORT_LINTER'
command = [
'python3',
'tools/linter/adapters/import_linter.py',
'--',
'@{{PATHSFILE}}'
]
include_patterns = [
'torch/_dynamo/**',
]
is_formatter = false