DeepSpeed
acab9d63
- Merge branch 'master' into loadams/update-container-a6000
Go
Login via GitHub
Home
Pricing
FAQ
Install
Login
via GitHub
Commit
View On
GitHub
Hide Minimap (CTRL+M)
Commit
1 year ago
Merge branch 'master' into loadams/update-container-a6000
References
loadams/update-container-a6000
Author
loadams
Parents
2408b5df
731fd682
Files
157
.github/workflows
cpu-inference.yml
cpu-torch-latest.yml
hpu-gaudi2.yml
nv-a6000.yml
nv-accelerate-v100.yml
nv-ds-chat.yml
nv-inference.yml
nv-mii.yml
nv-nightly.yml
nv-pre-compile-ops.yml
nv-torch-latest-v100.yml
nv-torch-nightly-v100.yml
nv-transformers-v100.yml
README.md
accelerator
cpu_accelerator.py
real_accelerator.py
blogs
deepspeed-fp6/03-05-2024
README-Chinese.md
README.md
assets
fp6-design.png
hero-figure.png
servingllm
100-1000.png
100-250.png
100-500.png
deepspeed-ulysses
README.md
csrc
cpu/comm
ccl.cpp
shm.cpp
shm.h
fp_quantizer
includes
context.h
quantize.h
quantize.cpp
quantize.cu
includes
memory_access_utils.h
reduction_utils.h
deepspeed
__init__.py
autotuning
utils.py
checkpoint
constants.py
ds_to_universal.py
reshape_utils.py
universal_checkpoint.py
zero_checkpoint.py
elasticity
elastic_agent.py
env_report.py
git_version_info.py
inference/v2
config_v2.py
engine_factory.py
kernels/core_ops
__init__.py
blas_kernels
blas_utils.h
core_ops.cpp
cuda_linear
__init__.py
cuda_linear.py
cuda_linear_kernels.cpp
cuda_linear_kernels.h
fp6_linear.cu
fp6_linear.cuh
include
configs.h
kernel_matmul.cuh
kernel_reduction.cuh
ptx_cp.async.cuh
ptx_mma.cuh
utils_core.cuh
utils_gmem.cuh
utils_paralleldequant.cuh
weight_prepacking.h
gated_activations
gated_activation_kernels_cuda.cu
model_implementations
__init__.py
flat_model_helpers.py
qwen_v2
__init__.py
container.py
model.py
policy.py
modules
heuristics.py
implementations/linear
__init__.py
quantized_linear.py
ragged/csrc
ragged_ops.cpp
launcher
multinode_runner.py
module_inject
auto_tp.py
containers
bloom.py
fusedqkv_utils.py
moe
layer.py
sharded_moe.py
utils.py
ops
__init__.py
fp_quantizer
__init__.py
quantize.py
transformer/inference
config.py
ds_attention.py
triton
matmul_ext.py
runtime
__init__.py
base_optimizer.py
bf16_optimizer.py
comm
coalesced_collectives.py
config.py
engine.py
fp16
fused_optimizer.py
unfused_optimizer.py
utils.py
zero
partition_parameters.py
stage3.py
stage_1_and_2.py
utils
__init__.py
tensor_fragment.py
docs
_tutorials
accelerator-abstraction-interface.md
index.md
op_builder
all_ops.py
builder.py
cpu
comm.py
fp_quantizer.py
inference_core_ops.py
xpu
builder.py
requirements
requirements-dev.txt
requirements-inf.txt
setup.py
tests/unit
checkpoint
common.py
test_latest_checkpoint.py
test_lr_scheduler.py
test_moe_checkpoint.py
test_other_optimizer.py
test_pipeline.py
test_universal_checkpoint.py
test_zero_optimizer.py
common.py
compression
test_dequantization.py
elasticity
test_elastic.py
inference
test_inference.py
v2
modules
test_quantized_linear_module.py
ragged
test_manager_configs.py
launcher
test_user_args.py
moe
test_moe.py
multi_output_model.py
ops
accelerators
test_accelerator_backward.py
fp_quantizer
test_fp_quant.py
runtime
activation_checkpointing
test_activation_checkpointing.py
comm
test_coalesced_collectives.py
compile
test_compile_wrapper.py
test_compile_zero.py
test_load_config.py
half_precision
onebit
test_onebit.py
test_bf16.py
test_dynamic_loss_scale.py
test_fp16.py
sparse_tensor
test_averaging_sparse_gradients.py
test_sparse_grads.py
test_data_efficiency.py
test_ds_config_dict.py
test_ds_initialize.py
test_multi_output_model.py
test_mup_optimizers.py
test_pld.py
zero
test_ignore_unused_parameters.py
test_zero.py
test_zero_context.py
test_zero_context_return.py
test_zero_leaf_module.py
test_zero_tensor_fragment.py
simple_model.py
util.py
version.txt
Loading