mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Commit graph
Select branches
Hide pull requests
'delete-delete-doc'
29625_add_prefix_space
29625_prefix_space
30824-spmconverter-user-defined-symbol
31187_depreciate_clean_up_tokenization_spaced
BritneyMuller-housekeeping-patch
LysandreJik-patch-1
_dummy_fix_weight_only_usage
_dummy_fix_weight_only_usage_2
add-deci-lm
add-flash-decoding
add-fp8-llama-script
add-git-lfs-to-amd-image
add-qgalore
add-rwkv5
add-warning-4bit-opti
add_amd_daily_ci
add_back_generative_class
add_deformable_detr
add_fa2_bart
add_important_warning_padding_attention_mask
add_kosmos_2_remote
add_kosmos_2_temp
add_kosmos_2_utm5_attn
add_kosmos_2_utm5_attn_rebased
add_kosmos_2_utm5_attn_rebased_flat_layer_structure
add_num_workers_for_tf
add_pipeline_equivalence_testing
add_prefix_space_29625
add_prefix_space_clean
add_siglip_fast_tokenizer_bis
add_tf_export_doc
add_word_level_timestamp_long
adding_fp8_quantization
agent_callback
agents-count-tokens
agents-make-easier-tags
agents-messages
albertvillanova-patch-1
allow_old_falcon_name
amd-nightly-ci
amdgpu-multi-gpu-tests
another_prepare_dataset_fix
api_big2
arijitx/wav2vec2_alignment
assistant_decoding_batch
attn-implementation-vision-enc-dec
auot-convert-tekken
auto-assign-reviewers
auto_gpt4_conversion
autoformer-test-seq-to-seq-add-atol
best_benchmark
best_benchmark_new
best_benchmark_on_static_cache
best_benchmark_on_static_cache_new
bloom_big
bos_eos_token_fix
bos_eos_token_slow
build-check-deepspeed-image
build-docker-torch-2.2
build_ci_docker_image_amd1
build_ci_docker_image_amd2
build_ci_docker_image_amd3
build_docker_on_kube
build_docker_on_kube_2
build_docker_on_kube_3
build_image_abc
byebye
byebye_py_37
cache_exp
change-ci
chat_template_kwargs
check-deepspeed
check-send-headers-when-converting-safetensors
check-whisper-slow-tests
check_amd_image_build
check_compile
check_compile_if_flaky
check_doc_test
check_docker_i
check_ds
check_example_ci
check_example_job
check_fastspeech
check_fix_fix_fix
check_fix_torch_pip
check_flaky
check_flax_example
check_gated_repo
check_gemma
check_gemma_compile
check_gemma_compile_2
check_gen_2
check_layoutlm
check_layoutlmv2
check_limit
check_mem
check_mem_00c1d87
check_mem_3cefac1d
check_mem_56b64bf
check_mem_838b87a
check_nightly_build
check_nightly_build_build_image
check_nougat
check_past_runner
check_permission
check_quant
check_quantized_param_bnb4
check_report
check_safetensors_rc
check_slow_pr
check_speed_no_empty
check_strange_doctest
check_tiny_creation
check_torch_2.2
check_trigger_4d8427f7
check_trigger_a81cf9ee
check_update_cache_number
check_ved_trocr
check_what_wrong_in_tiny_creation
checkout-layoutlm-tokenizers
chunk_length_ctc
ci-amdgpu-build-docker-images
ci-amdgpu-mi250
ci-amdgpu-nightly
ci-test-huggingface-hub-v0.15.0.rc0
ci-test-huggingface-hub-v0.16.0.rc0
ci-test-huggingface-hub-v0.17.0.rc0
ci-test-huggingface-hub-v0.18.0.rc0
ci-test-huggingface-hub-v0.19.0.rc0
ci-test-huggingface-hub-v0.20.0.rc1
ci-test-huggingface-hub-v0.21.0.rc0
ci-test-huggingface-hub-v0.22.0.rc0
ci-test-huggingface-hub-v0.23.0.rc0
ci-test-huggingface-hub-v0.23.0.rc1
ci-test-huggingface-hub-v0.24.0.rc0
ci-test-huggingface-hub-v0.25.0.rc0
ci-test-huggingface-hub-v0.25.0.rc1
ci-test-huggingface-hub-v0.26.0.rc0
ci-test-huggingface-hub-v0.27.0.rc0
ci-test-huggingface-hub-v0.27.0.rc1
ci-test-huggingface-hub-v0.27.0rc1
ci-test-huggingface-hub-v0.28.0.rc0
ci-test-huggingface-hub-v0.28.0.rc5
circleci_combine_reports
circleci_debug_base
circleci_debug_base_MobileNetV1ModelTest_test_batching_equivalence
circleci_debug_base_timm
circleci_debug_base_timm_3
clean_spmcoverter
cohere-diff
cohere-diff-2
compare_ci_with_torch_2.2
compile-rope
continuous-batching
custom_bloom_kernel
databricks
dduf-compability
dduf-compatibility-with-file-explorer
deberta-xla-fixes
debug_bloom
debug_get_jobs
debug_kosmos_2_output
debug_mem_0b192de1
debug_mem_95b37495
debug_metadata_run
debugdebug
debugdebug-2
debugdebug-5
deepseek_v2_support
deepspeed-amd-pytorch-version-fix
default-auto
default-fast-load
delete_big_tokenizer_block
deprecate_LegacyIndex
disable_multi_gpu
doc-builder
doc-link-one-or-two-papers-lysandre
doc-regactor
doc_builder_rename
doc_pr
docker_change_awq_version
docs-ctrl-lys
dont-use-deprecated-method
drop_py38_build_img_2
ds-fix-resume
ds-ignore_mismatched_sizes
dynamic_length_in_static_cache
dynamic_length_in_static_cache_001
dynamic_length_in_static_cache_002
dynamic_length_in_static_cache_reconstruct_tensors_from_length
dynamic_length_on_0ae789e0
dynamic_length_on_75bbfd5b
dynamic_length_on_95b3c381
dynamic_length_on_b6eb708b
enable_tf_numpy
faster_cache_without_compile
faster_copies
faster_set_initialized_submodules
fire
fix-Parameter-init
fix-Seq2SeqTrainingArguments-doc
fix-autoprocessor-import-order
fix-bnb-warning
fix-ci
fix-ci-setup
fix-copies
fix-device-map
fix-doc-builder-edit
fix-flash-comment
fix-from-pretrained
fix-gemma2-sdpa
fix-gemma2-sliding-window
fix-gpt2-scaled-init
fix-int8-serilation
fix-kwargs-issues
fix-llama-3-gguf
fix-pipeline-predict-transform-methods
fix-pixtral-tests
fix-pytorch-deepspeed-image
fix-quality-2
fix-red-ci-atol
fix-slow-tests-shieh-trigger
fix-task-mappings
fix-word-ids
fix-workflow
fix_TFMarianModelTest_test_xla_generate_slow
fix_aria_ci
fix_auto_test
fix_autoawq_docker
fix_autoawq_test
fix_bamba_test
fix_check_copies
fix_chinese_clip
fix_convert_spm_bpe
fix_deprecation_warnings
fix_dinat_2
fix_dinov2
fix_docker_autoawq
fix_docker_autogptq_from_source
fix_doctest
fix_doctest_based_on_refactor_doctest_2
fix_eetq_test
fix_falcon_processor
fix_flaky_4
fix_flaky_test_assisted_decoding_matches_greedy_search
fix_flaky_test_pt_tf_model_equivalence
fix_fp_32
fix_generate_embeds
fix_gptq_test
fix_gptq_tests
fix_jetmoe
fix_more_input_out
fix_not_init
fix_peft_model_in_pipelines
fix_pipe_tests_001
fix_quanto_llama27b
fix_remote_tool
fix_slow_gen_on_0ae789e0
fix_slow_gen_on_75bbfd5b
fix_slow_gen_on_b6eb708b
fix_st5_docs
fix_stupid_cond
fix_test_encode_decode
fix_test_fetcher_tests
fix_tie
fix_whisper_ci
fix_whisper_tflite_export
fix_zh_quicktour_md
flex_attention_qwen2
flex_attn_example
for_test_run_squad_no_trainer
force-convert
full-bf16-train
full_length_on_468f7cca
full_length_on_68b71c85
full_length_on_862cde4c
gemma_allow_compile
general_test_low_cpu_mem
get_bad_commits_for_daily_ci_11_20
get_bad_commits_for_daily_ci_11_26
gpt-flex-attention
gpt-mqa
gptneo_gpt4_port
gptneo_gpt4_port_new
hardware-auto-setup
hardware-auto-setup-ci
hotfix_ci_222
hqq_serialization
idefics3
ifix_aqlm_modules_to_not_convert
improve_error_message_asr_pipeline
improve_error_message_when_transformers_is_misconfigured
improve_torch_version_check
informative-detr-message
init_round_2
init_round_5
int
inverse_chat_templates
jeffboudier-transformers-docs-ad-copy
jnp_devicearray
keras-core-support
keras3_compatibility_phase_2
keras_3_compatibility
larger_runner
less-constraints
link-to-the-hub
llama-break-fix
llama-pad-side
llama-refactor
load_pretrainedfast_auto
make-cache-traceable
mark_whisper_test_slow
master
measure_all_tests
measure_gen
measure_gen_on_0ae789e0
measure_gen_on_75bbfd5b
measure_gen_on_b6eb708b
merge_text2text_into_text_generation
ministral
mitigate_tf_stride_vulnerability
modular-roberta
modular-stablelm
more_reduced_dummy_memory_usage
more_tf_int_dtypes
moshi-integration
move_jobs_from_daily_ci_channel
move_part_2
muellerzr-accum-plugin
muellerzr-ds-investigation
muellerzr-dummy-pr
muellerzr-enable-quant
muellerzr-enable-torchdata
muellerzr-fix-autocast
muellerzr-fix-integration-tests
muellerzr-fix-reentrant
muellerzr-fix-timeout
muellerzr-free-memory
muellerzr-free-memory-passthrough
muellerzr-jobs
muellerzr-less-fixes
muellerzr-lr-sched-right-version
muellerzr-metrics
muellerzr-modeling
muellerzr-more-ga-tests-fast
muellerzr-more-models-sadface
muellerzr-multinode-save
muellerzr-network-retry
muellerzr-skip-dvc
muellerzr-skip-failing-example
muellerzr-speedup-modular-conversion
muellerzr-trainer-refactor
muellerzr-transformers-should-not-set-env-variables
muellerzr-use-scientific
mymain
new-split
nezha_slow
nit-ga-condition
nit-modular-reame
nit-refactor
nit-remove-irrelevant-comment
nit_cleanup
nits-attention
no_more_shape_list
no_overwrite_test_batching_equivalence
noua/bloom_cugraph
np2
on_predict
onnx_gpt2_io_definition
output_ragged
pin-ffspec
pin-gguf
pin_ds
ping_author
ping_author_6
pipeline-revision-mirror
pipelines_signatures
pixtral_batchmixfeature_fix
pixtral_processor_structure_fix
post-action-build-test-tokenizers-main
processor-template-duplicated-tokens
protobuf-4
push-ci-image
quickfix_generate_tests
reenable_test
refactor-attention-converesion
refactor-from-pretrained
refactor_doctest_2
refactoring-new-version
relative-paths
remove-cache-migration-script
remove-items
remove-torch-pre-releases-amd-image
remove-warnings
repro-bug-pytorch-compile
repro-bug-pytorch-compile-cudagraph
reset_logger_level_2
resnet_with_variants
reverse_templating
revert-17547-update-support-image
revert-17646-skip_repo_not_found
revert-31494-add_dac
revert-33934-patch-1
revert-checkpoint-tmp-dir
revert-commit-30302
revert_hard_error2
robust_config_ckpt_check
run-amd
run-fix-Parameter-init
run-move-integrations
run_add_tts_pip
run_amd_push_ci_caller
run_amd_scheduled_ci_caller
run_amd_scheduled_ci_caller_testing
run_amd_scheduled_ci_caller_testing1
run_better_job_artifact_name
run_better_report
run_bon_courage
run_check_auto_mapping_importable
run_check_natten
run_ci_manually
run_daily_ci
run_daily_ci_11_20
run_daily_ci_11_21
run_deepspeed_ci
run_doctest_after_merge
run_doctest_ci
run_ds_ci
run_fc639143
run_fc639143_001
run_fc639143_002
run_fc639143_003
run_feat/kv_cache_class
run_fix_doc_on_circleci
run_fix_error_not_captured
run_nightly_ci_test_new_runner
run_no_job_name
run_past_ci_2nd
run_refactor_doctest
run_run_all_tests
run_run_amd_scheduled_ci_caller_deepspeed_test
run_scheduled_ci_now
run_scheduled_ci_now_2
run_scheduled_ci_now_3
run_scheduled_ci_now_4
run_scheduled_ci_now_5
run_scheduled_ci_now_6
run_sep_model_and_other_no_model_n_8_all_models
run_show_failure_better
run_split_daily_ci_based_on_no_job_name
run_split_daily_ci_based_on_no_job_name_2
run_tiny_with_fix_tiny_model_creation
run_torch_v_2_1
run_trigger_ci_when_tiny_summary_modified
run_truncate
run_update_tiny_002
run_use_main_in_conversion_script
safe_ci_report
safe_serialization_always_valid
safetensors-0.4.2
safetensors-step-2-2
safetensors_pre_release
safetensors_rc
scale
secure-amd-ci
shieh-length_in_compile
show_failure
simplify
simplify-contributions-init
simplify-contributions-inits
simplify-contributions-main-init
simplify-contributions-model_init
skip-tokenizer-test
skip_2_hub_tests
skip_blip2_torchscript
skip_idefics_doctest
sliding-window
smangrul/fix-auto-batch-finder-trainer-issue
smangrul/integrate-accelerate
smangrul/starcoder-int4-ddp-flash-attn
split_daily_ci
spmconverter_user_symbols
ssh_new_cluster
starcoder-2-fix
stas00-patch-1
state_spaces_call_for_contribution
stop-ci-on-fail-doc
stop-throwing-cache-warning
support-copy
sync-table-question-answering
sync_dqa_pipeline
sync_token_classification_and_zero_shot
sync_vqa_pipeline
t5-fp16-no-nans
temp-disable-scheduled-amd-ci
temp-kosmos25
temporary_pin_torch_2
tensor-cache
test-bin-format
test-build-ci-uv
test-datasets-2.14
test-datasets-2.21
test-datasets-3.0
test-datasets-main
test-datasets-pr
test-deepseek-fp8
test-doctests
test-eetq-dockerfile
test-fa2
test-huggingface_hub-pre-release
test-new-doc-builder-workflow
test-seentok
test-tokenizer-release
test-tokenizers-main
test_bc_tokenizers
test_ci
test_composition_2
test_composition_lysandre
test_composition_remote_tool
test_doc
test_docker_run_quantization
test_if_token
test_release_candidates
test_run_scheduler_ci
test_safetensors
test_safetensors_0.5.0
test_safetensors_abi3
test_tokenizers_0.19.0rc0
test_tokenizers_abi3
tf_forced_logits_xla_compatible
tf_int64_tests
tf_llama_port
tf_new_dummy_building
tf_quicktour_fix
tfconvnext
thomas/accelerate_gpt2
thomas/accelerate_gptj
thomas/add_custom_kernels
thomas/bloom_allow_fp32_lm_head
thomas/dirty_bloom_tp
thomas/fix_bloom
thomas/improve_bloom_generation_speed
thomas/llama
thomas/make_tp_bloom_generate_work
thomas/make_tp_work_with_bloom
tied_weights_load
tied_weights_warning_check
timm_wrapper_kwargs
tok-update
tokenizer-release
tokenizers_rc1
tools-inference-endpoints
torch-2.2-on-daily-ci
torch_versions
tp-loading
tp-support
trad_fixes
trainer-hyperparameter-search-kwargs-docs-update
trigger-amd-image-build
trigger_ci_with_torch_2_4
trigger_daily_ci
trigger_debug
trigger_disable_multi_gpu
trigger_doc
trigger_pt_10_past_ci
trigger_run_amd_scheduled_ci_caller_deepspeed_test
trigger_slow
trigger_test_cached_model_has_minimum_calls_to_head
trigger_upload_artifacts_3
try_comment_bot
try_fix_whisper_slow_test
try_matrix_fail
try_new_natten
try_new_natten_2
try_pydantic_v2_build_images
try_run_amd_push_ci_caller
try_sub
update-add-new-model
update-cooki
update-doc-gpu
update-from-pretrained
update-quantization-docker
update-special-tokens
update-tokenizers-version
update_27265
update_kosmos_2_file
update_llama_template
update_ssh
upgrade_tokenizers2
use-hfh-loading-saving-state-dict-helpers
use-process-retry-on-amd-smi
use-safetensors-from-pr
use-uv-
use_pt_25_image
v3.5.1hotfix
v4.0.0-release
v4.0.1-release
v4.10.0
v4.10.0-release
v4.10.1-release
v4.10.2-release
v4.10.3-release
v4.12.1-release
v4.12.2-release
v4.12.3-release
v4.12.4-release
v4.12.5-release
v4.14.1-release
v4.16.1-release
v4.16.2-release
v4.17.0-release
v4.18-release
v4.19-release
v4.2.1-patch
v4.2.2-patch
v4.20-release
v4.21-release
v4.22-release
v4.23-release
v4.24-release
v4.25-release
v4.26-release
v4.27-release
v4.28-release
v4.29-release
v4.3.0-release
v4.3.1-release
v4.3.2-release
v4.3.3-release
v4.30-release
v4.31-release
v4.32-release
v4.33-release
v4.34-release
v4.35-release
v4.36-release
v4.37-release
v4.38-release
v4.39-release
v4.4.2-release
v4.40-release
v4.41-release
v4.42-release
v4.43-release
v4.44-release
v4.45-release
v4.46-release
v4.47-release
v4.48-release
v4.5.0-release
v4.5.1-release
v4.6.0-release
v4.8.0-release
v4.8.2-release
v4.9.1
v4.9.2-release
warn-pre-allocation
whisper_chunking
whisper_out_of_range
wip_test_safetensors_rc
xenova-patch-1
xla_concrete_fn_dynamic_shape_debug
ydshieh-push-ci-image
ydshieh_check_run_nightly_ci_test_new_runner
yih-dar-try-compile_models
younes-opt-350-m
younes-test-workflow
younesbelkada-patch-1
zach-accelerate-integration
0.1.2
0.5.0
1.0
1.1.0
1.2.0
3.0.1
4.3.0.rc1
list
localattn1
v0.1.2
v0.2.0
v0.3.0
v0.4.0
v0.5.0
v0.5.1
v0.6.0
v0.6.1
v0.6.2
v1.0.0
v2.0.0
v2.1.0
v2.1.1
v2.10.0
v2.11.0
v2.2.0
v2.2.1
v2.2.2
v2.3.0
v2.4.0
v2.4.1
v2.5.0
v2.5.1
v2.6.0
v2.7.0
v2.8.0
v2.9.0
v2.9.1
v3.0.0
v3.0.1
v3.0.2
v3.1.0
v3.2.0
v3.3.0
v3.3.1
v3.4.0
v3.5.0
v3.5.1
v4.0.0
v4.0.0-rc-1
v4.0.1
v4.1.0
v4.1.1
v4.10.0
v4.10.1
v4.10.2
v4.10.3
v4.11.0
v4.11.1
v4.11.2
v4.11.3
v4.12.0
v4.12.1
v4.12.2
v4.12.3
v4.12.4
v4.12.5
v4.13.0
v4.14.0
v4.14.1
v4.15.0
v4.16.0
v4.16.1
v4.16.2
v4.17.0
v4.18.0
v4.19.0
v4.19.1
v4.19.2
v4.19.3
v4.19.4
v4.2.0
v4.2.1
v4.2.2
v4.20.0
v4.20.1
v4.21.0
v4.21.1
v4.21.2
v4.21.3
v4.22.0
v4.22.1
v4.22.2
v4.23.0
v4.23.1
v4.24.0
v4.25.1
v4.26.0
v4.26.1
v4.27.0
v4.27.1
v4.27.2
v4.27.3
v4.27.4
v4.28.0
v4.28.1
v4.29.0
v4.29.1
v4.29.2
v4.3.0
v4.3.0.rc1
v4.3.1
v4.3.2
v4.3.3
v4.30.0
v4.30.1
v4.30.2
v4.31.0
v4.32.0
v4.32.1
v4.33.0
v4.33.1
v4.33.2
v4.33.3
v4.34.0
v4.34.1
v4.35.0
v4.35.1
v4.35.2
v4.36.0
v4.36.1
v4.36.2
v4.37.0
v4.37.1
v4.37.2
v4.38.0
v4.38.1
v4.38.2
v4.39.0
v4.39.1
v4.39.2
v4.39.3
v4.4.0
v4.4.1
v4.4.2
v4.40.0
v4.40.1
v4.40.2
v4.41.0
v4.41.1
v4.41.2
v4.42.0
v4.42.1
v4.42.2
v4.42.3
v4.42.4
v4.43.0
v4.43.1
v4.43.2
v4.43.3
v4.43.4
v4.44.0
v4.44.1
v4.44.2
v4.45.0
v4.45.1
v4.45.2
v4.46.0
v4.46.1
v4.46.2
v4.46.3
v4.47.0
v4.47.1
v4.48.0
v4.48.1
v4.48.2
v4.48.3
v4.5.0
v4.5.1
v4.6.0
v4.6.1
v4.7.0
v4.8.0
v4.8.1
v4.8.2
v4.9.0
v4.9.1
v4.9.2
-
cb5de19d8fadd key_mapping keyword refactor-from-pretrained
Cyril Vallez
2025-02-10 15:09:45 +0100 -
65d0cbc495Update modeling_utils.py
Cyril Vallez
2025-02-10 13:58:13 +0100 -
8971d57339
fix
fix-bnb-warning
Marc Sun
2025-02-10 13:41:38 +0100 -
0bd85ed634Merge branch 'main' into skip_blip2_torchscript skip_blip2_torchscript
Yih-Dar
2025-02-10 13:37:04 +0100 -
3b465d3472add forgotten check
Cyril Vallez
2025-02-10 13:04:30 +0100 -
bea683cf18style
Cyril Vallez
2025-02-10 13:01:03 +0100 -
bd3b5fa50ffinalize sound renaming logic
Cyril Vallez
2025-02-10 13:00:36 +0100 -
b6b0981483Merge branch 'main' into revert-checkpoint-tmp-dir revert-checkpoint-tmp-dir
Marc Sun
2025-02-10 12:54:22 +0100 -
f11085d6eaUpdate modeling_utils.py
Cyril Vallez
2025-02-10 12:35:57 +0100 -
7a86256c89add sanity check back (a test depends on it)
Cyril Vallez
2025-02-10 12:23:55 +0100 -
6bbe016c78remove unused check
Cyril Vallez
2025-02-10 12:10:39 +0100 -
89c740d07asimplify renaming logic
Cyril Vallez
2025-02-10 11:52:55 +0100 -
1c0c5cf7b4simplify
Cyril Vallez
2025-02-10 11:15:18 +0100 -
c9d2b88ffe
Revert "Save checkpoint to temporary directory to handle partial saves during failures (#35580)"
Marc Sun
2025-02-10 10:58:06 +0100 -
b3d1068951
Revert "Fix OS err (#36094)"
Marc Sun
2025-02-10 10:57:34 +0100 -
f7f2c896c2Update modeling_utils.py
Cyril Vallez
2025-02-10 10:45:46 +0100 -
dd4207c3edUpdate hub.py
Cyril Vallez
2025-02-10 10:41:13 +0100 -
820ae396a4
update
adding_fp8_quantization
MekkCyber
2025-02-10 07:06:53 +0000 -
f22ff5c2e1CIs
Cyril Vallez
2025-02-08 22:39:27 +0100 -
c7b175ec95simplify
Cyril Vallez
2025-02-08 20:15:48 +0100 -
e6e75102e8fix tp plan registry
Cyril Vallez
2025-02-08 19:14:34 +0100 -
7a87812f3eRemove unused function and move back _get_tp_registry
Cyril Vallez
2025-02-08 17:12:48 +0100 -
aca9b22c25remove old functions
Cyril Vallez
2025-02-08 16:44:50 +0100 -
1a5bfc1de5
fix style
MekkCyber
2025-02-08 14:52:28 +0000 -
d41e11b3f1
update
MekkCyber
2025-02-08 14:37:19 +0000 -
98aa2bdad6remove old function
Cyril Vallez
2025-02-08 15:30:23 +0100 -
68a3234aa8
fix consistency
MekkCyber
2025-02-08 12:25:38 +0000 -
8d637f15de
fix imports
MekkCyber
2025-02-08 11:36:54 +0000 -
3ead98b2f6
fix style
MekkCyber
2025-02-08 11:23:12 +0000 -
33f73712dc
end2end
MekkCyber
2025-02-08 11:18:52 +0000 -
83912834cc
fix quantization logic
MekkCyber
2025-02-05 09:44:31 +0000 -
70749dfd9b
fix create_quantized_param
MekkCyber
2025-02-04 17:09:58 +0000 -
3700bbc09f
adding kernels
MekkCyber
2025-02-04 14:57:18 +0000 -
b0c3641f56
first commit
MekkCyber
2025-02-04 09:58:42 +0000 -
33f9e49a5bfast download first prototype
Cyril Vallez
2025-02-08 00:10:30 +0100 -
0642700ecdfix
Cyril Vallez
2025-02-07 23:18:10 +0100 -
9f66405c2e
add sync
test-deepseek-fp8
MekkCyber
2025-02-07 21:04:53 +0000 -
f8a963c116
Loss_function
muellerzr-more-models-sadface
[[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL
2025-02-07 12:35:22 -0500 -
f839aa20fe
Style
[[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL
2025-02-07 12:23:57 -0500 -
f4c3dadfb1Update modeling_utils.py
Cyril Vallez
2025-02-07 18:21:47 +0100 -
919bcbeca7
Moar bronked
[[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL
2025-02-07 12:20:52 -0500 -
f459bf8177Update modeling_utils.py
Cyril Vallez
2025-02-07 17:19:48 +0100 -
87e97ea54cUpdate modeling_utils.py
Cyril Vallez
2025-02-07 16:24:42 +0100 -
8aa45e177e
Fix bug in apply_rotary_pos_emb_flashatt: in Qwen2-5-VL (#36065)
DeepWave
2025-02-07 17:43:45 +0800 -
128b840247
Adding RT-DETRv2 for object detection (#34773)
Jade Choghari
2025-02-06 11:28:45 -0800 -
4d1480d567
[docs] fix outdated example code in
trainer.md(#36066)
Fanli Lin
2025-02-07 02:54:22 +0800 -
9c3009a391
Fix StopStringCriteria to handle tokens above len(tokenizer) (#35797)
Matt
2025-02-06 16:53:28 +0000 -
e173ffd3ba
Fix model kwargs (#35875)
Zach Mueller
2025-02-06 11:35:25 -0500 -
b0cb3d0cc5
Fix words typos in ggml test. (#36060)
湛露先生
2025-02-06 23:32:40 +0800 -
c3e530a184
Nail in edge case of torch dtype being overriden permantly in the case of an error (#35845)
Zach Mueller
2025-02-06 09:05:23 -0500 -
20d17358c4
Save checkpoint to temporary directory to handle partial saves during failures (#35580)
SilverSoldier
2025-02-06 19:18:05 +0530 -
987e09e549
Paligemma: fix generation with Gemma2 (#36044)
Raushan Turganbay
2025-02-06 14:31:32 +0100 -
4886cb79c1
Update
test_flash_attn_2_can_dispatch_composite_models(#36050)
Yih-Dar
2025-02-06 12:09:49 +0100 -
8201506d28
Fix repo consistency (#36063)
Yih-Dar
2025-02-06 11:53:15 +0100 -
1bfcddbba7
Fix usage of unpad_input function (#35925)
Pavel Gein
2025-02-06 15:33:42 +0500 -
d68779b358
Iterative generation using Input embeds and
past_key_values(#35890)
Yaswanth Gali
2025-02-06 15:36:05 +0530 -
b44d36e10f
Add
Qwen2VLImageProcessorFastintoQwen2VLProcessor(#35987)
Ye Liu
2025-02-06 17:03:09 +0800 -
5f597b7460
Fix Audio Classification Pipeline top_k Documentation Mismatch and Bug #35736 (#35771)
Sambhav Dixit
2025-02-05 21:55:08 +0530 -
31bbef04b9
Fix how we compute the final non-padding token for ForSequenceClassification models (#35911)
Matt
2025-02-05 16:23:33 +0000 -
ac4acde462
[docs] no hard-coding cuda (#36043)
Fanli Lin
2025-02-06 00:22:33 +0800 -
8e499a203c
[docs] fix bugs in the bitsandbytes documentation (#35868)
Fanli Lin
2025-02-06 00:21:20 +0800 -
741865eebf
[docs] no hard coding cuda as bnb has multi-backend support (#35867)
Fanli Lin
2025-02-06 00:20:02 +0800 -
1e929f357f
DeepSpeed github repo move sync (#36021)
Stas Bekman
2025-02-05 08:19:31 -0800 -
c14c2cc006
add support for empty list as input to create_model_card (#36042)
ROZBEH
2025-02-05 04:29:17 -0800 -
c21f13fe21
Add XPU type for work-around -inf mask causing sdpa NaN issue in modeling files (#35647)
Liangliang Ma
2025-02-05 20:28:31 +0800 -
dc3a523e9d
Fix synced multi-GPU generation with LLMs and VLMs (#35893)
ManukyanD
2025-02-05 14:15:11 +0400 -
99f9afb079
commit
MekkCyber
2025-02-07 10:41:53 +0000 -
c341f8c6fbstyle
Cyril Vallez
2025-02-07 11:41:07 +0100 -
c95dc4ebe7update
Cyril Vallez
2025-02-07 11:39:28 +0100 -
f88bb46428update
Cyril Vallez
2025-02-07 11:07:53 +0100 -
298b3f1930
v4.48.3
v4.48.3
v4.48-release
Arthur Zucker
2025-02-07 10:32:49 +0100 -
d28f0207d5
GPTNeoX needs kwargs
Arthur Zucker
2025-02-07 10:14:53 +0100 -
8025c92c7btrigger CIs
Cyril Vallez
2025-02-07 02:03:21 +0100 -
a17393187fUpdate test_modeling_common.py
Cyril Vallez
2025-02-07 01:54:55 +0100 -
42f02f6ee1update
Cyril Vallez
2025-02-07 01:47:44 +0100 -
3d6e55c7e7
Fix model kwargs (#35875)
Zach Mueller
2025-02-06 11:35:25 -0500 -
45c858a5d8fix
Cyril Vallez
2025-02-06 18:38:30 +0100 -
3b56ad06c4much clearer renaming of keys
Cyril Vallez
2025-02-06 18:32:25 +0100 -
c631f36cca
quality
muellerzr-speedup-modular-conversion
[[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL
2025-02-06 12:26:38 -0500 -
139f2cae03
Brr
[[ -z $EMAIL ]] && read -e -p "Enter your email (for git configuration): " EMAIL
2025-02-06 12:22:44 -0500 -
421bf8611a
fix 2
nit_cleanup
ydshieh
2025-02-06 15:58:29 +0100 -
99f9e044d7
commit
MekkCyber
2025-02-06 14:56:12 +0000 -
f0adc01768
add special tokens
update-special-tokens
Arthur Zucker
2025-02-06 15:14:03 +0100 -
093bebcdd9
Paligemma: fix generation with Gemma2 (#36044)
Raushan Turganbay
2025-02-06 14:31:32 +0100 -
97a6cf9072
Fix device in rope module when using dynamic updates (#35608)
Cyril Vallez
2025-01-13 10:11:17 +0100 -
8aca12c774Update modeling_utils.py
Cyril Vallez
2025-02-06 11:19:55 +0100 -
c4cbed8081
Processor: prevent duplicated tokens
processor-template-duplicated-tokens
Pedro Cuenca
2025-02-06 10:41:05 +0100 -
b149b1f6fetrigger CIs
Cyril Vallez
2025-02-05 23:36:06 +0100 -
11c0bde616fix weird in-place op
Cyril Vallez
2025-02-05 23:28:26 +0100 -
27e1615466style
Cyril Vallez
2025-02-05 22:12:55 +0100 -
ff1078387enew first tp loading version
Cyril Vallez
2025-02-05 21:44:46 +0100 -
c3e818561eUpdate modeling_utils.py
Cyril Vallez
2025-02-05 15:35:10 +0100 -
1bdb7bba52Update modeling_utils.py
Cyril Vallez
2025-02-05 15:27:08 +0100 -
a3401c3e23keep improving
Cyril Vallez
2025-02-05 15:14:50 +0100 -
574e3f76c9remove _fast_init
Cyril Vallez
2025-02-05 13:57:47 +0100 -
a6ffd7b4d2Update modeling_utils.py
Cyril Vallez
2025-02-05 13:53:54 +0100 -
11e378024dadd type hints/docstring
Cyril Vallez
2025-02-05 13:52:57 +0100 -
6ba13f577b
update
ydshieh
2025-02-05 13:44:09 +0100 -
82ca6920c6
update
ydshieh
2025-02-05 13:40:59 +0100 -
11e31ec24f
Add future import for Py < 3.10 (#35666)
Matt
2025-01-15 12:45:43 +0000