pytorch

mirror of https://github.com/saymrwulf/pytorch.git synced 2026-05-14 20:57:59 +00:00

History

eellison 2268319596 dont attempt to fuse in unaligned accesses to mm (#142435 ) This isn't profitable - we were trying to fuse in a padding of unaligned mm, which defeats padding's purpose. Pull Request resolved: https://github.com/pytorch/pytorch/pull/142435 Approved by: https://github.com/jansel ghstack dependencies: #134532, #142350, #142400, #142401, #142402		2024-12-10 21:35:26 +00:00
..
cpp
extension_backends
__init__.py
CMakeLists.txt
custom_ops.cpp	[AOTI] Fix an issue when fallback op does not return a value (#142339 )	2024-12-09 23:24:29 +00:00
indirect_assert_helper.py
minifier_smoke.py
opinfo_harness.py
s429861_repro.py
test_aot_inductor.py	[AOTI XPU] Support AOT Inductor for Intel GPU. (#140269 )	2024-12-10 05:05:08 +00:00
test_aot_inductor_arrayref.py	Add and use borrow_arrayref_tensor_as_tensor (#142183 )	2024-12-09 22:23:21 +00:00
test_aot_inductor_custom_ops.py	[AOTI] Fix an issue when fallback op does not return a value (#142339 )	2024-12-09 23:24:29 +00:00
test_aot_inductor_package.py	[AOTI] Fix #140546 and support AOTI package load for Intel GPU. (#140664 )	2024-12-10 05:05:08 +00:00
test_aot_inductor_utils.py	[AOTI XPU] Support AOT Inductor for Intel GPU. (#140269 )	2024-12-10 05:05:08 +00:00
test_auto_functionalize.py
test_autoheuristic.py
test_b2b_gemm.py
test_benchmark_fusion.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_benchmarking.py
test_binary_folding.py	[Inductor] add flag for linear binary folding and turn it off by default (#142108 )	2024-12-06 07:12:29 +00:00
test_ck_backend.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_codecache.py	Introduce remote cache key prefix to break cache (#142148 )	2024-12-10 00:35:50 +00:00
test_codegen_triton.py
test_combo_kernels.py
test_compile_worker.py
test_compiled_autograd.py	[ca] expose option to collect sizes as dynamic (#141153 )	2024-11-22 19:26:27 +00:00
test_compiled_optimizers.py	Revert "Always unspecialize float in OSS (#138922 )"	2024-11-26 00:03:03 +00:00
test_config.py
test_control_flow.py	[while_loop] change to guard_equals for checking output and carry (#141734 )	2024-12-03 04:00:21 +00:00
test_cooperative_reductions.py	Enable CI on SM89 (#140305 )	2024-12-03 04:49:46 +00:00
test_coordinate_descent_tuner.py	[Inductor] Represent size_hints as a dict (#142249 )	2024-12-09 22:31:53 +00:00
test_cpp_wrapper_hipify.py	Test cpp_wrapper_hipify string comparison (#141353 )	2024-12-03 18:25:32 +00:00
test_cpu_cpp_wrapper.py
test_cpu_repro.py	[inductor] update numbytes_hint for NoneLayout to allow more fusions (#141766 )	2024-12-10 06:45:07 +00:00
test_cpu_select_algorithm.py	[Inductor] add flag for linear binary folding and turn it off by default (#142108 )	2024-12-06 07:12:29 +00:00
test_cuda_repro.py	Add support for bfloat16 atomic adds in fbcode (#141857 )	2024-12-10 11:40:15 +00:00
test_cudacodecache.py
test_cudagraph_trees.py	automatic dynamic unspecialize float (#141647 )	2024-11-29 22:36:53 +00:00
test_cudagraph_trees_expandable_segments.py
test_custom_lowering.py
test_custom_post_grad_passes.py
test_cutlass_backend.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_debug_trace.py	[inductor] Fix 3d tiling (#141709 )	2024-12-01 19:47:41 +00:00
test_decompose_mem_bound_mm.py
test_dependencies.py
test_distributed_patterns.py	[ca] expose option to collect sizes as dynamic (#141153 )	2024-11-22 19:26:27 +00:00
test_efficient_conv_bn_eval.py	Revert "Always unspecialize float in OSS (#138922 )"	2024-11-26 00:03:03 +00:00
test_extension_backend.py	Run only listed tests on s390x (#140265 )	2024-11-20 22:53:09 +00:00
test_external_callables.py
test_flex_attention.py	[inductor][cpp] Add FlexAttention support for CPU inference (#141453 )	2024-12-10 11:11:09 +00:00
test_flex_decoding.py	[ROCm] Update to AOTriton 0.8b (#140172 )	2024-12-06 21:45:18 +00:00
test_foreach.py
test_fp8.py	Adding lowering to persistent-tma device kernel for _scaled_mm (#142045 )	2024-12-09 01:48:40 +00:00
test_fused_attention.py
test_fx_fusion.py
test_gpu_cpp_wrapper.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_graph_transform_observer.py
test_group_batch_fusion.py
test_halide.py
test_indexing.py	Fix test_indexing on MacOS (#142440 )	2024-12-10 01:46:28 +00:00
test_inductor_annotations.py	Inductor annotations (#130429 )	2024-12-10 08:53:39 +00:00
test_inductor_freezing.py
test_inductor_utils.py
test_inplacing_pass.py
test_kernel_benchmark.py	Enable CI on SM89 (#140305 )	2024-12-03 04:49:46 +00:00
test_layout_optim.py
test_loop_ordering.py	Enable CI on SM89 (#140305 )	2024-12-03 04:49:46 +00:00
test_max_autotune.py	dont attempt to fuse in unaligned accesses to mm (#142435 )	2024-12-10 21:35:26 +00:00
test_memory.py
test_memory_planning.py	Move Sympy printers to torch/utils/_sympy/printers.py (#140597 )	2024-11-26 18:11:00 +00:00
test_metrics.py
test_minifier.py	Aoti minifier flatten (#141156 )	2024-12-06 07:12:45 +00:00
test_minifier_isolate.py
test_minifier_utils.py	[AOTI Minifier] Save EP instead of graphs (#141159 )	2024-11-22 01:51:10 +00:00
test_mkldnn_pattern_matcher.py	[Quant][Inductor][X86] add fusion pass for linear_dynamic_fp16 with relu (#141556 )	2024-12-09 05:05:11 +00:00
test_mmdecomp.py
test_move_constructors_to_cuda.py
test_multi_kernel.py	[AOTI] Fix multi-kernel codegen when using one-pass (#142333 )	2024-12-09 14:49:10 +00:00
test_op_dtype_prop.py	Update low prec codegen for div/mod (#142350 )	2024-12-10 16:50:28 +00:00
test_ordered_set.py
test_pad_mm.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_padding.py	Move Sympy printers to torch/utils/_sympy/printers.py (#140597 )	2024-11-26 18:11:00 +00:00
test_pattern_matcher.py	Enable CI on SM89 (#140305 )	2024-12-03 04:49:46 +00:00
test_perf.py	Revert "Always unspecialize float in OSS (#138922 )"	2024-11-26 00:03:03 +00:00
test_profiler.py
test_scatter_optimization.py
test_select_algorithm.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_smoke.py
test_snode_runtime.py
test_split_cat_fx_passes.py	[PT2][Optimus] Fix a corner case in merge splits (#141194 )	2024-11-22 19:04:40 +00:00
test_standalone_compile.py
test_torchbind.py
test_torchinductor.py	[Inductor] Represent size_hints as a dict (#142249 )	2024-12-09 22:31:53 +00:00
test_torchinductor_codegen_config_overrides.py	Enable concat support through inductor using pointwise kernels (#141966 )	2024-12-06 14:28:07 +00:00
test_torchinductor_codegen_dynamic_shapes.py
test_torchinductor_dynamic_shapes.py	Fix fbcode tests for automatic dynamic unspecialize float (#141975 )	2024-12-03 23:59:06 +00:00
test_torchinductor_opinfo.py	[submodule] Revert "Adds support for accelerated sorting with x86-simd-sort (#127936 ) (#141901 )	2024-12-03 00:16:35 +00:00
test_torchinductor_strided_blocks.py	Refactor test_torchinductor_strided_blocks to also support triton CPU (#141587 )	2024-12-05 09:57:08 +00:00
test_triton_cpu_backend.py	Refactor test_torchinductor_strided_blocks to also support triton CPU (#141587 )	2024-12-05 09:57:08 +00:00
test_triton_extension_backend.py
test_triton_heuristics.py	[Inductor] Represent size_hints as a dict (#142249 )	2024-12-09 22:31:53 +00:00
test_triton_kernels.py	[AOTI] Swith GPU codegen to one-pass (#141980 )	2024-12-09 14:40:34 +00:00
test_triton_wrapper.py
test_unbacked_symints.py	[inductor] Refactor is_big_gpu (#142220 )	2024-12-08 18:51:36 +00:00
test_utils.py
test_xpu_basic.py