transformers

saymrwulf/transformers

Fork 0

mirror of https://github.com/saymrwulf/transformers.git synced 2026-05-14 20:58:08 +00:00

Commit graph

Select branches

Hide pull requests

'delete-delete-doc'

29625_add_prefix_space

29625_prefix_space

30824-spmconverter-user-defined-symbol

31187_depreciate_clean_up_tokenization_spaced

BritneyMuller-housekeeping-patch

LysandreJik-patch-1

_dummy_fix_weight_only_usage

_dummy_fix_weight_only_usage_2

add-deci-lm

add-flash-decoding

add-fp8-llama-script

add-git-lfs-to-amd-image

add-qgalore

add-rwkv5

add-warning-4bit-opti

add_amd_daily_ci

add_back_generative_class

add_deformable_detr

add_fa2_bart

add_important_warning_padding_attention_mask

add_kosmos_2_remote

add_kosmos_2_temp

add_kosmos_2_utm5_attn

add_kosmos_2_utm5_attn_rebased

add_kosmos_2_utm5_attn_rebased_flat_layer_structure

add_num_workers_for_tf

add_pipeline_equivalence_testing

add_prefix_space_29625

add_prefix_space_clean

add_siglip_fast_tokenizer_bis

add_tf_export_doc

add_word_level_timestamp_long

adding_fp8_quantization

agent_callback

agents-count-tokens

agents-make-easier-tags

agents-messages

albertvillanova-patch-1

allow_old_falcon_name

amd-nightly-ci

amdgpu-multi-gpu-tests

another_prepare_dataset_fix

api_big2

arijitx/wav2vec2_alignment

assistant_decoding_batch

attn-implementation-vision-enc-dec

auot-convert-tekken

auto-assign-reviewers

auto_gpt4_conversion

autoformer-test-seq-to-seq-add-atol

best_benchmark

best_benchmark_new

best_benchmark_on_static_cache

best_benchmark_on_static_cache_new

bloom_big

bos_eos_token_fix

bos_eos_token_slow

build-check-deepspeed-image

build-docker-torch-2.2

build_ci_docker_image_amd1

build_ci_docker_image_amd2

build_ci_docker_image_amd3

build_docker_on_kube

build_docker_on_kube_2

build_docker_on_kube_3

build_image_abc

byebye

byebye_py_37

cache_exp

change-ci

chat_template_kwargs

check-deepspeed

check-send-headers-when-converting-safetensors

check-whisper-slow-tests

check_amd_image_build

check_compile

check_compile_if_flaky

check_doc_test

check_docker_i

check_ds

check_example_ci

check_example_job

check_fastspeech

check_fix_fix_fix

check_fix_torch_pip

check_flaky

check_flax_example

check_gated_repo

check_gemma

check_gemma_compile

check_gemma_compile_2

check_gen_2

check_layoutlm

check_layoutlmv2

check_limit

check_mem

check_mem_00c1d87

check_mem_3cefac1d

check_mem_56b64bf

check_mem_838b87a

check_nightly_build

check_nightly_build_build_image

check_nougat

check_past_runner

check_permission

check_quant

check_quantized_param_bnb4

check_report

check_safetensors_rc

check_slow_pr

check_speed_no_empty

check_strange_doctest

check_tiny_creation

check_torch_2.2

check_trigger_4d8427f7

check_trigger_a81cf9ee

check_update_cache_number

check_ved_trocr

check_what_wrong_in_tiny_creation

checkout-layoutlm-tokenizers

chunk_length_ctc

ci-amdgpu-build-docker-images

ci-amdgpu-mi250

ci-amdgpu-nightly

ci-test-huggingface-hub-v0.15.0.rc0

ci-test-huggingface-hub-v0.16.0.rc0

ci-test-huggingface-hub-v0.17.0.rc0

ci-test-huggingface-hub-v0.18.0.rc0

ci-test-huggingface-hub-v0.19.0.rc0

ci-test-huggingface-hub-v0.20.0.rc1

ci-test-huggingface-hub-v0.21.0.rc0

ci-test-huggingface-hub-v0.22.0.rc0

ci-test-huggingface-hub-v0.23.0.rc0

ci-test-huggingface-hub-v0.23.0.rc1

ci-test-huggingface-hub-v0.24.0.rc0

ci-test-huggingface-hub-v0.25.0.rc0

ci-test-huggingface-hub-v0.25.0.rc1

ci-test-huggingface-hub-v0.26.0.rc0

ci-test-huggingface-hub-v0.27.0.rc0

ci-test-huggingface-hub-v0.27.0.rc1

ci-test-huggingface-hub-v0.27.0rc1

ci-test-huggingface-hub-v0.28.0.rc0

ci-test-huggingface-hub-v0.28.0.rc5

circleci_combine_reports

circleci_debug_base

circleci_debug_base_MobileNetV1ModelTest_test_batching_equivalence

circleci_debug_base_timm

circleci_debug_base_timm_3

clean_spmcoverter

cohere-diff

cohere-diff-2

compare_ci_with_torch_2.2

compile-rope

continuous-batching

custom_bloom_kernel

databricks

dduf-compability

dduf-compatibility-with-file-explorer

deberta-xla-fixes

debug_bloom

debug_get_jobs

debug_kosmos_2_output

debug_mem_0b192de1

debug_mem_95b37495

debug_metadata_run

debugdebug

debugdebug-2

debugdebug-5

deepseek_v2_support

deepspeed-amd-pytorch-version-fix

default-auto

default-fast-load

delete_big_tokenizer_block

deprecate_LegacyIndex

disable_multi_gpu

doc-builder

doc-link-one-or-two-papers-lysandre

doc-regactor

doc_builder_rename

doc_pr

docker_change_awq_version

docs-ctrl-lys

dont-use-deprecated-method

drop_py38_build_img_2

ds-fix-resume

ds-ignore_mismatched_sizes

dynamic_length_in_static_cache

dynamic_length_in_static_cache_001

dynamic_length_in_static_cache_002

dynamic_length_in_static_cache_reconstruct_tensors_from_length

dynamic_length_on_0ae789e0

dynamic_length_on_75bbfd5b

dynamic_length_on_95b3c381

dynamic_length_on_b6eb708b

enable_tf_numpy

faster_cache_without_compile

faster_copies

faster_set_initialized_submodules

fire

fix-Parameter-init

fix-Seq2SeqTrainingArguments-doc

fix-autoprocessor-import-order

fix-bnb-warning

fix-ci

fix-ci-setup

fix-copies

fix-device-map

fix-doc-builder-edit

fix-flash-comment

fix-from-pretrained

fix-gemma2-sdpa

fix-gemma2-sliding-window

fix-gpt2-scaled-init

fix-int8-serilation

fix-kwargs-issues

fix-llama-3-gguf

fix-pipeline-predict-transform-methods

fix-pixtral-tests

fix-pytorch-deepspeed-image

fix-quality-2

fix-red-ci-atol

fix-slow-tests-shieh-trigger

fix-task-mappings

fix-word-ids

fix-workflow

fix_TFMarianModelTest_test_xla_generate_slow

fix_aria_ci

fix_auto_test

fix_autoawq_docker

fix_autoawq_test

fix_bamba_test

fix_check_copies

fix_chinese_clip

fix_convert_spm_bpe

fix_deprecation_warnings

fix_dinat_2

fix_dinov2

fix_docker_autoawq

fix_docker_autogptq_from_source

fix_doctest

fix_doctest_based_on_refactor_doctest_2

fix_eetq_test

fix_falcon_processor

fix_flaky_4

fix_flaky_test_assisted_decoding_matches_greedy_search

fix_flaky_test_pt_tf_model_equivalence

fix_fp_32

fix_generate_embeds

fix_gptq_test

fix_gptq_tests

fix_jetmoe

fix_more_input_out

fix_not_init

fix_peft_model_in_pipelines

fix_pipe_tests_001

fix_quanto_llama27b

fix_remote_tool

fix_slow_gen_on_0ae789e0

fix_slow_gen_on_75bbfd5b

fix_slow_gen_on_b6eb708b

fix_st5_docs

fix_stupid_cond

fix_test_encode_decode

fix_test_fetcher_tests

fix_tie

fix_whisper_ci

fix_whisper_tflite_export

fix_zh_quicktour_md

flex_attention_qwen2

flex_attn_example

for_test_run_squad_no_trainer

force-convert

full-bf16-train

full_length_on_468f7cca

full_length_on_68b71c85

full_length_on_862cde4c

gemma_allow_compile

general_test_low_cpu_mem

get_bad_commits_for_daily_ci_11_20

get_bad_commits_for_daily_ci_11_26

gpt-flex-attention

gpt-mqa

gptneo_gpt4_port

gptneo_gpt4_port_new

hardware-auto-setup

hardware-auto-setup-ci

hotfix_ci_222

hqq_serialization

idefics3

ifix_aqlm_modules_to_not_convert

improve_error_message_asr_pipeline

improve_error_message_when_transformers_is_misconfigured

improve_torch_version_check

informative-detr-message

init_round_2

init_round_5

int

inverse_chat_templates

jeffboudier-transformers-docs-ad-copy

jnp_devicearray

keras-core-support

keras3_compatibility_phase_2

keras_3_compatibility

larger_runner

less-constraints

link-to-the-hub

llama-break-fix

llama-pad-side

llama-refactor

load_pretrainedfast_auto

make-cache-traceable

mark_whisper_test_slow

master

measure_all_tests

measure_gen

measure_gen_on_0ae789e0

measure_gen_on_75bbfd5b

measure_gen_on_b6eb708b

merge_text2text_into_text_generation

ministral

mitigate_tf_stride_vulnerability

modular-roberta

modular-stablelm

more_reduced_dummy_memory_usage

more_tf_int_dtypes

moshi-integration

move_jobs_from_daily_ci_channel

move_part_2

muellerzr-accum-plugin

muellerzr-ds-investigation

muellerzr-dummy-pr

muellerzr-enable-quant

muellerzr-enable-torchdata

muellerzr-fix-autocast

muellerzr-fix-integration-tests

muellerzr-fix-reentrant

muellerzr-fix-timeout

muellerzr-free-memory

muellerzr-free-memory-passthrough

muellerzr-jobs

muellerzr-less-fixes

muellerzr-lr-sched-right-version

muellerzr-metrics

muellerzr-modeling

muellerzr-more-ga-tests-fast

muellerzr-more-models-sadface

muellerzr-multinode-save

muellerzr-network-retry

muellerzr-skip-dvc

muellerzr-skip-failing-example

muellerzr-speedup-modular-conversion

muellerzr-trainer-refactor

muellerzr-transformers-should-not-set-env-variables

muellerzr-use-scientific

mymain

new-split

nezha_slow

nit-ga-condition

nit-modular-reame

nit-refactor

nit-remove-irrelevant-comment

nit_cleanup

nits-attention

no_more_shape_list

no_overwrite_test_batching_equivalence

noua/bloom_cugraph

np2

on_predict

onnx_gpt2_io_definition

output_ragged

pin-ffspec

pin-gguf

pin_ds

ping_author

ping_author_6

pipeline-revision-mirror

pipelines_signatures

pixtral_batchmixfeature_fix

pixtral_processor_structure_fix

post-action-build-test-tokenizers-main

processor-template-duplicated-tokens

protobuf-4

push-ci-image

quickfix_generate_tests

reenable_test

refactor-attention-converesion

refactor-from-pretrained

refactor_doctest_2

refactoring-new-version

relative-paths

remove-cache-migration-script

remove-items

remove-torch-pre-releases-amd-image

remove-warnings

repro-bug-pytorch-compile

repro-bug-pytorch-compile-cudagraph

reset_logger_level_2

resnet_with_variants

reverse_templating

revert-17547-update-support-image

revert-17646-skip_repo_not_found

revert-31494-add_dac

revert-33934-patch-1

revert-checkpoint-tmp-dir

revert-commit-30302

revert_hard_error2

robust_config_ckpt_check

run-amd

run-fix-Parameter-init

run-move-integrations

run_add_tts_pip

run_amd_push_ci_caller

run_amd_scheduled_ci_caller

run_amd_scheduled_ci_caller_testing

run_amd_scheduled_ci_caller_testing1

run_better_job_artifact_name

run_better_report

run_bon_courage

run_check_auto_mapping_importable

run_check_natten

run_ci_manually

run_daily_ci

run_daily_ci_11_20

run_daily_ci_11_21

run_deepspeed_ci

run_doctest_after_merge

run_doctest_ci

run_ds_ci

run_fc639143

run_fc639143_001

run_fc639143_002

run_fc639143_003

run_feat/kv_cache_class

run_fix_doc_on_circleci

run_fix_error_not_captured

run_nightly_ci_test_new_runner

run_no_job_name

run_past_ci_2nd

run_refactor_doctest

run_run_all_tests

run_run_amd_scheduled_ci_caller_deepspeed_test

run_scheduled_ci_now

run_scheduled_ci_now_2

run_scheduled_ci_now_3

run_scheduled_ci_now_4

run_scheduled_ci_now_5

run_scheduled_ci_now_6

run_sep_model_and_other_no_model_n_8_all_models

run_show_failure_better

run_split_daily_ci_based_on_no_job_name

run_split_daily_ci_based_on_no_job_name_2

run_tiny_with_fix_tiny_model_creation

run_torch_v_2_1

run_trigger_ci_when_tiny_summary_modified

run_truncate

run_update_tiny_002

run_use_main_in_conversion_script

safe_ci_report

safe_serialization_always_valid

safetensors-0.4.2

safetensors-step-2-2

safetensors_pre_release

safetensors_rc

scale

secure-amd-ci

shieh-length_in_compile

show_failure

simplify

simplify-contributions-init

simplify-contributions-inits

simplify-contributions-main-init

simplify-contributions-model_init

skip-tokenizer-test

skip_2_hub_tests

skip_blip2_torchscript

skip_idefics_doctest

sliding-window

smangrul/fix-auto-batch-finder-trainer-issue

smangrul/integrate-accelerate

smangrul/starcoder-int4-ddp-flash-attn

split_daily_ci

spmconverter_user_symbols

ssh_new_cluster

starcoder-2-fix

stas00-patch-1

state_spaces_call_for_contribution

stop-ci-on-fail-doc

stop-throwing-cache-warning

support-copy

sync-table-question-answering

sync_dqa_pipeline

sync_token_classification_and_zero_shot

sync_vqa_pipeline

t5-fp16-no-nans

temp-disable-scheduled-amd-ci

temp-kosmos25

temporary_pin_torch_2

tensor-cache

test-bin-format

test-build-ci-uv

test-datasets-2.14

test-datasets-2.21

test-datasets-3.0

test-datasets-main

test-datasets-pr

test-deepseek-fp8

test-doctests

test-eetq-dockerfile

test-fa2

test-huggingface_hub-pre-release

test-new-doc-builder-workflow

test-seentok

test-tokenizer-release

test-tokenizers-main

test_bc_tokenizers

test_ci

test_composition_2

test_composition_lysandre

test_composition_remote_tool

test_doc

test_docker_run_quantization

test_if_token

test_release_candidates

test_run_scheduler_ci

test_safetensors

test_safetensors_0.5.0

test_safetensors_abi3

test_tokenizers_0.19.0rc0

test_tokenizers_abi3

tf_forced_logits_xla_compatible

tf_int64_tests

tf_llama_port

tf_new_dummy_building

tf_quicktour_fix

tfconvnext

thomas/accelerate_gpt2

thomas/accelerate_gptj

thomas/add_custom_kernels

thomas/bloom_allow_fp32_lm_head

thomas/dirty_bloom_tp

thomas/fix_bloom

thomas/improve_bloom_generation_speed

thomas/llama

thomas/make_tp_bloom_generate_work

thomas/make_tp_work_with_bloom

tied_weights_load

tied_weights_warning_check

timm_wrapper_kwargs

tok-update

tokenizer-release

tokenizers_rc1

tools-inference-endpoints

torch-2.2-on-daily-ci

torch_versions

tp-loading

tp-support

trad_fixes

trainer-hyperparameter-search-kwargs-docs-update

trigger-amd-image-build

trigger_ci_with_torch_2_4

trigger_daily_ci

trigger_debug

trigger_disable_multi_gpu

trigger_doc

trigger_pt_10_past_ci

trigger_run_amd_scheduled_ci_caller_deepspeed_test

trigger_slow

trigger_test_cached_model_has_minimum_calls_to_head

trigger_upload_artifacts_3

try_comment_bot

try_fix_whisper_slow_test

try_matrix_fail

try_new_natten

try_new_natten_2

try_pydantic_v2_build_images

try_run_amd_push_ci_caller

try_sub

update-add-new-model

update-cooki

update-doc-gpu

update-from-pretrained

update-quantization-docker

update-special-tokens

update-tokenizers-version

update_27265

update_kosmos_2_file

update_llama_template

update_ssh

upgrade_tokenizers2

use-hfh-loading-saving-state-dict-helpers

use-process-retry-on-amd-smi

use-safetensors-from-pr

use-uv-

use_pt_25_image

v3.5.1hotfix

v4.0.0-release

v4.0.1-release

v4.10.0

v4.10.0-release

v4.10.1-release

v4.10.2-release

v4.10.3-release

v4.12.1-release

v4.12.2-release

v4.12.3-release

v4.12.4-release

v4.12.5-release

v4.14.1-release

v4.16.1-release

v4.16.2-release

v4.17.0-release

v4.18-release

v4.19-release

v4.2.1-patch

v4.2.2-patch

v4.20-release

v4.21-release

v4.22-release

v4.23-release

v4.24-release

v4.25-release

v4.26-release

v4.27-release

v4.28-release

v4.29-release

v4.3.0-release

v4.3.1-release

v4.3.2-release

v4.3.3-release

v4.30-release

v4.31-release

v4.32-release

v4.33-release

v4.34-release

v4.35-release

v4.36-release

v4.37-release

v4.38-release

v4.39-release

v4.4.2-release

v4.40-release

v4.41-release

v4.42-release

v4.43-release

v4.44-release

v4.45-release

v4.46-release

v4.47-release

v4.48-release

v4.5.0-release

v4.5.1-release

v4.6.0-release

v4.8.0-release

v4.8.2-release

v4.9.1

v4.9.2-release

warn-pre-allocation

whisper_chunking

whisper_out_of_range

wip_test_safetensors_rc

xenova-patch-1

xla_concrete_fn_dynamic_shape_debug

ydshieh-push-ci-image

ydshieh_check_run_nightly_ci_test_new_runner

yih-dar-try-compile_models

younes-opt-350-m

younes-test-workflow

younesbelkada-patch-1

zach-accelerate-integration

0.1.2

0.5.0

1.0

1.1.0

1.2.0

3.0.1

4.3.0.rc1

list

localattn1

v0.1.2

v0.2.0

v0.3.0

v0.4.0

v0.5.0

v0.5.1

v0.6.0

v0.6.1

v0.6.2

v1.0.0

v2.0.0

v2.1.0

v2.1.1

v2.10.0

v2.11.0

v2.2.0

v2.2.1

v2.2.2

v2.3.0

v2.4.0

v2.4.1

v2.5.0

v2.5.1

v2.6.0

v2.7.0

v2.8.0

v2.9.0

v2.9.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.2.0

v3.3.0

v3.3.1

v3.4.0

v3.5.0

v3.5.1

v4.0.0

v4.0.0-rc-1

v4.0.1

v4.1.0

v4.1.1

v4.10.0

v4.10.1

v4.10.2

v4.10.3

v4.11.0

v4.11.1

v4.11.2

v4.11.3

v4.12.0

v4.12.1

v4.12.2

v4.12.3

v4.12.4

v4.12.5

v4.13.0

v4.14.0

v4.14.1

v4.15.0

v4.16.0

v4.16.1

v4.16.2

v4.17.0

v4.18.0

v4.19.0

v4.19.1

v4.19.2

v4.19.3

v4.19.4

v4.2.0

v4.2.1

v4.2.2

v4.20.0

v4.20.1

v4.21.0

v4.21.1

v4.21.2

v4.21.3

v4.22.0

v4.22.1

v4.22.2

v4.23.0

v4.23.1

v4.24.0

v4.25.1

v4.26.0

v4.26.1

v4.27.0

v4.27.1

v4.27.2

v4.27.3

v4.27.4

v4.28.0

v4.28.1

v4.29.0

v4.29.1

v4.29.2

v4.3.0

v4.3.0.rc1

v4.3.1

v4.3.2

v4.3.3

v4.30.0

v4.30.1

v4.30.2

v4.31.0

v4.32.0

v4.32.1

v4.33.0

v4.33.1

v4.33.2

v4.33.3

v4.34.0

v4.34.1

v4.35.0

v4.35.1

v4.35.2

v4.36.0

v4.36.1

v4.36.2

v4.37.0

v4.37.1

v4.37.2

v4.38.0

v4.38.1

v4.38.2

v4.39.0

v4.39.1

v4.39.2

v4.39.3

v4.4.0

v4.4.1

v4.4.2

v4.40.0

v4.40.1

v4.40.2

v4.41.0

v4.41.1

v4.41.2

v4.42.0

v4.42.1

v4.42.2

v4.42.3

v4.42.4

v4.43.0

v4.43.1

v4.43.2

v4.43.3

v4.43.4

v4.44.0

v4.44.1

v4.44.2

v4.45.0

v4.45.1

v4.45.2

v4.46.0

v4.46.1

v4.46.2

v4.46.3

v4.47.0

v4.47.1

v4.48.0

v4.48.1

v4.48.2

v4.48.3

v4.5.0

v4.5.1

v4.6.0

v4.6.1

v4.7.0

v4.8.0

v4.8.1

v4.8.2

v4.9.0

v4.9.1

v4.9.2

737ee3af62

Merge branch 'main' into feature/#35425 Minho Ryu 2025-01-28 14:46:31 +0900
704767e05c add deepseekv3 modeling ryan u 2025-01-28 14:42:30 +0900
a16e46b080 Use process_retry on amd-smi Ivar Flakstad 2025-01-27 14:40:36 +0100
4d15ba4458

Merge branch 'main' into remove-torch-pre-releases-amd-image remove-torch-pre-releases-amd-image ivarflakstad 2025-01-27 14:32:46 +0100
37df1312fc Do not use pre-releases for torch libs Ivar Flakstad 2025-01-27 13:18:23 +0100
9d4657c3be fix osme missing atols fix-red-ci-atol Arthur Zucker 2025-01-27 11:04:34 +0100
a33dd6e488

Merge branch 'main' into make-cache-traceable make-cache-traceable Ilyas Moutawwakil 2025-01-26 20:40:50 +0100
5a2ff5dfb0 Set to pull_request_target, testing works! auto-assign-reviewers Matt 2025-01-24 15:42:53 +0000
fdaacaaf03 Set back to pull_request for testing Matt 2025-01-24 15:40:34 +0000
feafbe087e Update the script Matt 2025-01-24 15:40:15 +0000
f124ec012b Use pull-request-target instead Matt 2025-01-24 14:57:26 +0000
090d9c4b2a

Merge branch 'main' into tensor-cache tensor-cache Ilyas Moutawwakil 2025-01-24 12:02:45 +0100
e13e9c1e25 fix gemma that needed kwargs fix-kwargs-issues Arthur Zucker 2025-01-24 11:19:47 +0100
67dd5524d3 simply make cache traceable IlyasMoutawwakil 2025-01-24 11:19:36 +0100
016ae273a2 Add TODO Matt 2025-01-23 17:50:00 +0000
5ccb79c16d fixed dynamic cache IlyasMoutawwakil 2025-01-23 16:45:28 +0100
2d480eccc7 fix copies tp-support Arthur Zucker 2025-01-23 16:40:33 +0100
3a12f71ab9 Request reviews instead of assigning Matt 2025-01-22 20:10:49 +0000
580aa713cf Request reviews instead of assigning Matt 2025-01-22 20:05:56 +0000
8b20315634 Remove prefix Matt 2025-01-22 19:53:16 +0000
adad02848a Strip inline comments Matt 2025-01-22 19:39:14 +0000
27d2961545 Update debug logs Matt 2025-01-22 19:35:34 +0000
3d6105a8d8 Update workflow permissions Matt 2025-01-22 19:29:17 +0000
8dc084682c Update workflow permissions Matt 2025-01-22 19:23:27 +0000
6b0f5b9b24 Correct path for codeowners file Matt 2025-01-22 19:15:51 +0000
ef3df762f3 Temporarily comment out the opened line so we can test the script Matt 2025-01-22 19:13:07 +0000
e96ba83ad4 Don't reassign reviewers if we already have them Matt 2025-01-22 19:08:37 +0000
4333c61971 fix missing import Matt 2025-01-22 19:06:54 +0000
e17ab9831e First draft of github action on PR opening for auto-assigning reviewers Matt 2025-01-22 19:04:36 +0000
80b49d721b rebased IlyasMoutawwakil 2025-01-22 17:31:39 +0100
dc1bd15ba9 Merge branch 'main' into tensor-cache IlyasMoutawwakil 2025-01-22 17:30:23 +0100
338f5954b9 more reverts IlyasMoutawwakil 2025-01-22 17:29:48 +0100
2f4e0bc93e

Update src/transformers/cache_utils.py Ilyas Moutawwakil 2025-01-22 17:18:28 +0100
485f959f85 revert IlyasMoutawwakil 2025-01-22 17:17:17 +0100
2bbbbbcf97 add device and dtype setters IlyasMoutawwakil 2025-01-22 17:15:12 +0100
85c71b004b

Merge branch 'main' into tensor-cache Ilyas Moutawwakil 2025-01-22 15:53:33 +0100
da60604f2c fix test_cache_utils IlyasMoutawwakil 2025-01-22 15:43:14 +0100
6e9799c817 add clone and to IlyasMoutawwakil 2025-01-22 15:42:43 +0100
4950a9e3f0 extract wrapper kwargs from init signature to correctly instantate IlyasMoutawwakil 2025-01-22 13:49:01 +0100
2af7730cb2 More updates to timm image processor, kwarg handling * default to train input size (less surprising) * add properties to mimic .size .crop_size .image_mean .image_std attributes in many Transformers image preproc (works with autotrain now) * try to make key check / inspect code more clear timm_wrapper_kwargs Ross Wightman 2025-01-21 15:57:54 -0800
da30662b81 Exploring use of kwargs for timm model and transforms creation Ross Wightman 2025-01-21 08:47:25 -0800
f2acf5fe34 Expound x2 muellerzr-trainer-refactor [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-21 09:25:18 -0500
59e10153da Document what's happening in the code [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-21 09:24:17 -0500
a0ce95c7dc Readbility muellerzr-more-ga-tests-fast [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-21 09:02:06 -0500
5d1545370e better error message circleci_debug_base_MobileNetV1ModelTest_test_batching_equivalence ydshieh 2025-01-21 12:41:05 +0100
063286f228

Remove cache migration script remove-cache-migration-script Wauplin 2025-01-21 11:37:39 +0100
57c02ccf15 bump rocm image build_ci_docker_image_amd2 Ivar Flakstad 2025-01-20 20:31:16 +0100
c075d2cd62 Fix AutoProcessor import order issue with custom classes fix-autoprocessor-import-order openhands 2025-01-20 18:14:34 +0000
b67b6eb9b2 make cache class exportable and executorch compatible IlyasMoutawwakil 2025-01-20 18:47:30 +0100
78257cac9f skip ydshieh 2025-01-20 18:00:35 +0100
1212cb5eae fix ydshieh 2025-01-20 17:31:57 +0100
53e70d9c69 fix ydshieh 2025-01-20 17:26:13 +0100
d269417aab fix zamba and jamba dynamic cache IlyasMoutawwakil 2025-01-20 17:21:49 +0100
95c1686ee0 style IlyasMoutawwakil 2025-01-20 17:09:21 +0100
8606594ad4 fix boolean evaluation IlyasMoutawwakil 2025-01-20 17:08:37 +0100
2e752ead46 revert my changes v4.48.1 Arthur Zucker 2025-01-20 17:05:34 +0100
45bb39bb80 torch tensor subclassing IlyasMoutawwakil 2025-01-20 17:01:49 +0100
785b5cf444 v4.48.1 Arthur Zucker 2025-01-20 16:20:06 +0100
3b09464364 Patch moonshine (#35731) eustlb 2025-01-20 16:19:29 +0100
b00807fac2 Fix condition when GA loss bug fix is not performed (#35651) kang sheng 2025-01-16 20:59:53 +0800
612bfd0801 [Phi] bias should be True (#35650) Arthur 2025-01-13 13:15:07 +0100
a77a94b209 unproxy cache IlyasMoutawwakil 2025-01-20 14:43:41 +0100
d4b631edd0 use tensor cache instead of module cache IlyasMoutawwakil 2025-01-20 14:17:28 +0100
8a462d13d3

Merge branch 'main' into secure-amd-ci secure-amd-ci ivarflakstad 2025-01-17 20:47:18 +0100
4afffcf9a6 Revert some changes that were deemed no longer required Ivar Flakstad 2025-01-17 20:46:17 +0100
ef0b5e279c add more TP support Arthur Zucker 2025-01-17 11:27:13 +0100
9f6481796d fix the small freeblocks issue continuous-batching Arthur Zucker 2025-01-16 15:13:44 +0100
f56824b0cb update Arthur Zucker 2025-01-16 14:19:25 +0100
cdd1d6e44c finish working example Arthur Zucker 2025-01-16 14:13:15 +0100
fac571ac65 don't loop too much Arthur Zucker 2025-01-16 11:40:07 +0100
aafc48b654 nits and fixes Arthur Zucker 2025-01-16 11:38:17 +0100
74e09dc4e0 works! Arthur Zucker 2025-01-16 11:24:54 +0100
32e7e7b6b1 make style fix_quanto_llama27b MekkCyber 2025-01-15 17:15:49 +0000
76815d1360 fix_quanto MekkCyber 2025-01-15 17:15:38 +0000
517cae97bb up Arthur Zucker 2025-01-15 18:07:25 +0100
c800a2c913 up Arthur Zucker 2025-01-15 17:51:12 +0100
960e176910 small updated Arthur Zucker 2025-01-15 17:47:36 +0100
3fc1e02e3c initial commit Arthur Zucker 2025-01-15 16:55:04 +0100
19c73cb0b1 Remove redundant variable Ivar Flakstad 2025-01-15 13:02:21 +0100
b0a095ba50 Merge branch 'main' into secure-amd-ci Ivar Flakstad 2025-01-15 12:08:50 +0100
526bb303d2 Fix call to get_workflow_id. ruff format Ivar Flakstad 2025-01-15 12:07:47 +0100
cc6f662a54 Testing success, remove debug block faster_set_initialized_submodules Matt 2025-01-14 18:24:57 +0000
edda0c1390 Formatting cleanup Matt 2025-01-14 18:12:04 +0000
dcbc8c9cce Fix the old keys comparison Matt 2025-01-14 18:05:07 +0000
3ec087ed73 make fixup Matt 2025-01-14 17:49:49 +0000
88aac166db Make set_initialized_submodules O(kN + log(N)) instead of O(N^2), where k << N Matt 2025-01-14 17:42:33 +0000
0d90a51f72 Add workflow_id (defaults to Self-hosted runner (scheduled)) Ivar Flakstad 2025-01-14 15:00:18 +0100
da3448dacf handle empty string REPORT_REPO_ID correctly Ivar Flakstad 2025-01-14 14:35:47 +0100
0652d891a7 Actually fix in the modular file fix-gemma2-sliding-window Pedro Cuenca 2025-01-14 12:24:29 +0100
6564e152ed Fix Gemma2 sliding window attention Pedro Cuenca 2025-01-14 12:15:48 +0100
637cadb26b test on transformers-supported revision fix_aria_ci Pablo 2025-01-13 18:23:53 +0100
b0be2eda9b Re-add space [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-13 11:27:52 -0500
7306624f45 Further nits [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-13 11:24:12 -0500
776758b597 Add more rigerous non-slow grad accum tests [[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL 2025-01-13 11:18:41 -0500
b73bf1d1bd [run-slow] bamba fix_bamba_test Pablo 2025-01-13 15:59:52 +0100
e2cb0b96d1 make explicit gpu dep Pablo 2025-01-13 15:57:59 +0100
e00858ffd6 stash for now Arthur Zucker 2025-01-13 09:41:19 +0100
6bc0fbcfa7 [WIP] Emu3: add model (#33770) v4.48.0 Raushan Turganbay 2025-01-10 12:23:00 +0100
59e28c30fa Fix flex_attention in training mode (#35605) Cyril Vallez 2025-01-10 11:49:12 +0100
7cf6230e25 push a fix for now Arthur Zucker 2025-01-10 11:34:08 +0100