mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
### Description - 4-bit QuantizeLinear(21). **Blocked quantization still missing (i.e., do not support the new `block_size` attribute)** - 4-bit DequantizeLinear(21). **Blocked dequantization still missing (i.e., do not support the new `block_size` attribute)** - 4-bit Transpose(21). - Update quantization tool with int4 types. - Disable QDQ fusions for 4-bit types. See: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc - MLAS 4-bit quantization kernels for intel, neon, powerpc. ##### Notes To calculate a tensor's storage size, we normally get the number of elements from the shape (i.e., `tensor_shape.Size()`) and multiply by the size of a single element. This does not directly work for sub-byte elements like int4 as each element in a `Tensor<Int4x2>` stores **two** packed int4 elements in a byte. The `Tensor:: CalculateTensorStorageSize` should be called to perform the correct calculation for any tensor element type. ### Motivation and Context ONNX 1.16 added the int4 and uint4 types. This initial PR adds the int4 type to ORT and adds int4 implementations for the Quant, Dequant, and Transpose ops on CPU EP. We still need to add int4 support for many ops and execution providers. See the ONNX 1.16 release notes: https://github.com/onnx/onnx/releases.
113 lines
3.6 KiB
YAML
113 lines
3.6 KiB
YAML
name: Lint
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
- rel-*
|
|
pull_request:
|
|
|
|
jobs:
|
|
optional-lint:
|
|
name: Optional Lint
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: misspell # Check spellings as well
|
|
uses: reviewdog/action-misspell@v1
|
|
with:
|
|
github_token: ${{ secrets.github_token }}
|
|
locale: "US"
|
|
reporter: github-pr-check
|
|
level: info
|
|
filter_mode: diff_context
|
|
- name: shellcheck # Static check shell scripts
|
|
uses: reviewdog/action-shellcheck@v1
|
|
with:
|
|
github_token: ${{ secrets.github_token }}
|
|
reporter: github-pr-check
|
|
level: info
|
|
filter_mode: file
|
|
|
|
lint-python-format:
|
|
# Required workflow
|
|
name: Python format
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
# Version range or exact version of Python to use, using SemVer's version range syntax. Reads from .python-version if unset.
|
|
python-version: "3.10"
|
|
- name: Setup Rust
|
|
uses: actions-rs/toolchain@v1
|
|
with:
|
|
toolchain: stable
|
|
components: rustfmt
|
|
- name: Install dependencies
|
|
run: |
|
|
python -m pip install -r requirements-dev.txt
|
|
python -m pip install lintrunner lintrunner-adapters
|
|
lintrunner init
|
|
- name: Run lintrunner on all files
|
|
run: |
|
|
set +e
|
|
if ! lintrunner --force-color --all-files --tee-json=lint.json -v; then
|
|
echo ""
|
|
echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner\`. To set up lintrunner locally, see https://github.com/microsoft/onnxruntime/blob/main/docs/Coding_Conventions_and_Standards.md#linting .\e[0m"
|
|
exit 1
|
|
fi
|
|
- name: Produce SARIF
|
|
if: always()
|
|
run: |
|
|
python -m lintrunner_adapters to-sarif lint.json lintrunner.sarif
|
|
- name: Upload SARIF file
|
|
if: always()
|
|
continue-on-error: true
|
|
uses: github/codeql-action/upload-sarif@v3
|
|
with:
|
|
# Path to SARIF file relative to the root of the repository
|
|
sarif_file: lintrunner.sarif
|
|
category: lintrunner
|
|
checkout_path: ${{ github.workspace }}
|
|
|
|
lint-cpp:
|
|
name: Lint C++
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@master
|
|
- name: Install ninja
|
|
run: python -m pip install --upgrade ninja
|
|
- name: Generate compile_commands.json
|
|
run: |
|
|
python tools/ci_build/build.py \
|
|
--cmake_generator "Ninja" \
|
|
--build_dir build \
|
|
--update \
|
|
--cmake_extra_defines CMAKE_EXPORT_COMPILE_COMMANDS=ON
|
|
- name: Generate ONNX protobuf files
|
|
run: cmake --build build/Debug --config Debug --target onnx_proto
|
|
- uses: reviewdog/action-cpplint@master
|
|
with:
|
|
github_token: ${{ secrets.github_token }}
|
|
reporter: github-pr-check
|
|
level: warning
|
|
flags: --linelength=120
|
|
--exclude=java/src/main/native/*.c
|
|
--exclude=onnxruntime/core/mlas/inc/*
|
|
--exclude=onnxruntime/core/mlas/lib/*
|
|
filter: "-runtime/references"
|
|
|
|
lint-js:
|
|
name: Lint JavaScript
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: reviewdog/action-eslint@v1
|
|
with:
|
|
reporter: github-pr-check
|
|
level: error
|
|
filter_mode: file
|
|
eslint_flags: "--ext .ts --ext .tsx"
|
|
workdir: "js/"
|