DeepSpeed
8998da04
- Merge branch 'master' into tohtana/log_run_tests
Go
Login via GitHub
Home
Pricing
FAQ
Install
Login
via GitHub
Commit
View On
GitHub
Hide Minimap (CTRL+M)
Commit
98 days ago
Merge branch 'master' into tohtana/log_run_tests
Author
loadams
Parents
bb4c5b6f
8ec1af5f
Files
352
.github
ISSUE_TEMPLATE
deepspeed_chat_bug_report.md
inference_bug_report.md
workflows
amd-mi200.yml
cpu-torch-latest.yml
hpu-gaudi2-nightly.yml
hpu-gaudi2.yml
no-torch.yml
nv-a6000.yml
nv-accelerate-v100.yml
nv-ds-chat.yml
nv-flash-attn.yml
nv-human-eval.yml
nv-inference.yml
nv-lightning-v100.yml
nv-mii.yml
nv-nightly.yml
nv-torch-latest-v100.yml
nv-torch-nightly-v100.yml
nv-torch110-p40.yml
nv-transformers-v100.yml
python.yml
release.yml
xpu-compile.yml
xpu-max1100.yml
.pre-commit-config.yaml
CODEOWNERS
COMMITTERS.md
CONTRIBUTING.md
GOVERNANCE.md
README.md
SECURITY.md
accelerator
cpu_accelerator.py
hpu_accelerator.py
real_accelerator.py
sdaa_accelerator.py
benchmarks
README.md
blogs
deepspeed-chat
README.md
chinese
README.md
ds-chat-release-8-31
README.md
japanese
README.md
deepspeed-domino
README.md
images
design-base.png
design-column.png
design-hybrid.png
design-row.png
domino-hero.png
domino-logo.png
gpt3-scale.png
implement-bwd.png
implement-fwd.png
tp-ar.png
tp-comm-overhead.png
deepspeed-fastgen
2024-01-19
README.md
README.md
chinese
README.md
japanese
README.md
deepspeed-fp6/03-05-2024
README-Chinese.md
README.md
deepspeed-gds
README.md
chinese
README.md
japanese
README.md
deepspeed-offloadpp
README.md
deepspeed-triton
README.md
deepspeed-ucp
README.md
chinese
README.md
japanese
README.md
deepspeed-ulysses
README.md
chinese
README.md
japanese
README.md
deepspeed-visualchat/10-03-2023
README-Chinese.md
README-Japanese.md
README.md
deepspeed4science
chinese
README.md
japanese
README.md
intel-inference
README.md
ulysses-offload
README.md
media
image1.png
image2.png
image3.png
image4.png
image5.png
windows/08-2024
README.md
chinese
README.md
japanese
README.md
zeropp
chinese
README.md
japanese
README.md
build_win.bat
csrc
adagrad
cpu_adagrad.cpp
adam
cpu_adam_impl.cpp
aio
common
deepspeed_aio_common.cpp
deepspeed_aio_utils.cpp
deepspeed_aio_utils.h
py_lib
deepspeed_aio_op_desc.cpp
deepspeed_aio_op_desc.h
deepspeed_cpu_op.cpp
deepspeed_cpu_op.h
deepspeed_py_aio.cpp
deepspeed_py_io_handle.cpp
deepspeed_py_io_handle.h
py_ds_aio.cpp
py_test
ds_aio_handle.py
fp_quantizer
fp_quantize.cpp
gds/py_lib
deepspeed_gds_op.cpp
deepspeed_gds_op.h
deepspeed_py_gds_handle.cpp
deepspeed_py_gds_handle.h
py_ds_gds.cpp
includes
cpu_adagrad.h
cpu_adam.h
cpu_lion.h
quantization.h
quantization_utils.h
lion
cpu_lion_impl.cpp
quantization
pt_binding.cpp
quant_reduce.cu
swizzled_quantize.cu
xpu/adam
multi_tensor_apply.dp.hpp
deepspeed
__init__.py
autotuning
README.md
autotuner.py
checkpoint
deepspeed_checkpoint.py
ds_to_universal.py
universal_checkpoint.py
utils.py
zero_checkpoint.py
comm
comm.py
torch.py
inference
config.py
engine.py
v2
checkpoint
huggingface_engine.py
engine_factory.py
model_implementations
inference_policy_base.py
layer_container_base.py
launcher
multinode_runner.py
runner.py
linear
config.py
quantization.py
model_implementations/transformers
ds_transformer.py
module_inject
__init__.py
auto_tp.py
containers
bloom.py
features
meta_tensor.py
layers.py
load_checkpoint.py
replace_module.py
tp_shard.py
moe
sharded_moe.py
ops
fp_quantizer
__init__.py
fp8_gemm.py
fp8_gemm_triton.py
quantize.py
sparse_attention
bert_sparse_self_attention.py
transformer/inference/triton
matmul_ext.py
profiling/flops_profiler
README.md
profiler.py
runtime
activation_checkpointing
checkpointing.py
base_optimizer.py
bf16_optimizer.py
checkpoint_engine
nebula_checkpoint_engine.py
torch_checkpoint_engine.py
comm
coalesced_collectives.py
compressed.py
hccl.py
compiler.py
compression
cupy.py
config.py
domino
__init__.py
transformer.py
engine.py
fp16/onebit
lamb.py
zoadam.py
hybrid_engine.py
lr_schedules.py
pipe
module.py
swap_tensor
aio_config.py
constants.py
optimizer_utils.py
partitioned_optimizer_swapper.py
partitioned_param_swapper.py
pipelined_optimizer_swapper.py
utils.py
tensor_parallel
__init__.py
config.py
tp_manager.py
utils.py
zero
config.py
mics.py
parameter_offload.py
partition_parameters.py
partitioned_param_coordinator.py
stage3.py
stage_1_and_2.py
sequence
fpdt_layer.py
layer.py
utils
__init__.py
groups.py
logging.py
numa.py
nvtx.py
torch.py
z3_leaf_module.py
zero_to_fp32.py
docker
Dockerfile
docs
CNAME
_data
navigation.yml
_pages
config-json.md
deepspeed4science.md
inference.md
_posts
2020-05-19-bert-record.md
2020-05-28-fastest-bert-training.md
2020-09-08-sparse-attention-news.md
2020-09-09-ZeRO-Offload.md
2020-09-09-onebit-adam-news.md
2020-09-09-pipeline-parallelism.md
2020-09-09-sparse-attention.md
2020-10-28-progressive-layer-dropping-news.md
2021-11-15-autotuning.md
2021-12-09-deepspeed-moe-nlg.md
2022-07-26-deepspeed-azure.md
2022-09-10-zero-inference.md
2022-10-11-mii.md
2022-12-12-data-efficiency.md
2023-03-31-multi-modal.md
2023-04-24-deepspeed-chat-chinese.md
2023-04-24-deepspeed-chat-japanese.md
2023-04-24-deepspeed-chat.md
2023-06-22-zeropp-chinese.md
2023-06-22-zeropp-japanese.md
2023-08-24-ulysses-chinese.md
2023-08-24-ulysses-japanese.md
2023-08-24-ulysses.md
2023-09-12-ZeRO-Inference.md
2023-09-19-deepspeed4science-chinese.md
2023-09-19-deepspeed4science-japanese.md
2023-10-04-deepspeed-visualchat-chinese.md
2023-10-04-deepspeed-visualchat-japanese.md
2023-10-04-deepspeed-visualchat.md
2023-11-06-deepspeed-fastgen-chinese.md
2023-11-06-deepspeed-fastgen-japanese.md
2023-11-06-deepspeed-fastgen.md
_tutorials
accelerator-abstraction-interface.md
accelerator-setup-guide.md
advanced-install.md
automatic-tensor-parallelism.md
autotuning.md
azure.md
bert-finetuning.md
bert-pretraining.md
cifar-10.md
comms-logging.md
curriculum-learning.md
data-efficiency.md
deepnvme.md
domino.md
ds-sequence.md
flops-profiler.md
gan.md
getting-started.md
inference-tutorial.md
large-models-w-deepspeed.md
megatron.md
mixed_precision_zeropp.md
mixture-of-experts-inference.md
mixture-of-experts-nlg.md
mixture-of-experts.md
model-compression.md
monitor.md
onebit-adam.md
onebit-lamb.md
sparse-attention.md
ulysses-offload.md
universal-checkpointing.md
zero-offload.md
zero-one-adam.md
zero.md
zeropp.md
code-docs/source
index.rst
inference-engine.rst
initialize.rst
model-checkpointing.rst
moe.rst
monitor.rst
schedulers.rst
zero3.rst
contributing.md
index.md
examples
README.md
install.sh
op_builder
async_io.py
builder.py
fp_quantizer.py
hpu
builder.py
fp_quantizer.py
inference_core_ops.py
inference_cutlass_builder.py
ragged_ops.py
ragged_utils.py
sdaa
__init__.py
builder.py
cpu_adam.py
fused_adam.py
no_impl.py
sparse_attn.py
transformer_inference.py
release
release.sh
requirements
requirements-dev.txt
requirements.txt
setup.py
tests
model/BingBertSquad
run_BingBertSquad.sh
run_BingBertSquad_sanity.sh
run_tests.sh
test_e2e_squad.py
unit
alexnet_model.py
checkpoint
common.py
test_convert_checkpoint.py
test_universal_checkpoint.py
test_zero_optimizer.py
inference
test_inference.py
launcher
test_multinode_runner.py
linear
test_linear.py
test_quant_param.py
model_parallelism
test_autotp_training.py
test_configurable_parallel_mp.py
test_configurable_parallel_pp.py
moe
test_moe.py
ops
aio
test_aio.py
test_gds.py
fp_quantizer
test_fp8_gemm.py
test_fp_quant.py
transformer/inference
inference_test_utils.py
test_attention.py
test_bias_add.py
test_bias_geglu.py
test_bias_gelu.py
test_bias_relu.py
test_gelu.py
test_layer_norm.py
test_matmul.py
test_residual_add.py
test_softmax.py
pipe
test_pipe_module.py
runtime
activation_checkpointing
test_activation_checkpointing.py
comm
test_coalesced_collectives.py
test_no_sync_ctxt.py
zero
test_nvme_checkpointing.py
test_offload_states.py
test_zero.py
test_zero_context_ancestry.py
test_zero_leaf_module.py
test_zero_multiple_run.py
sequence_parallelism
test_ulysses.py
version.txt
Loading