DeepSpeed
acab9d63 - Merge branch 'master' into loadams/update-container-a6000

Commit

View On GitHub

Commit

1 year ago

Merge branch 'master' into loadams/update-container-a6000

References

loadams/update-container-a6000

Author

loadams

Parents

2408b5df

731fd682

Files157

.github/workflows
- cpu-inference.yml
- cpu-torch-latest.yml
- hpu-gaudi2.yml
- nv-a6000.yml
- nv-accelerate-v100.yml
- nv-ds-chat.yml
- nv-inference.yml
- nv-mii.yml
- nv-nightly.yml
- nv-pre-compile-ops.yml
- nv-torch-latest-v100.yml
- nv-torch-nightly-v100.yml
- nv-transformers-v100.yml
README.md
accelerator
- cpu_accelerator.py
- real_accelerator.py
blogs
- deepspeed-fp6/03-05-2024
  - README-Chinese.md
  - README.md
  - assets
    - fp6-design.png
    - hero-figure.png
    - servingllm
      - 100-1000.png
      - 100-250.png
      - 100-500.png
- deepspeed-ulysses
  - README.md
csrc
- cpu/comm
  - ccl.cpp
  - shm.cpp
  - shm.h
- fp_quantizer
  - includes
    - context.h
    - quantize.h
  - quantize.cpp
  - quantize.cu
- includes
  - memory_access_utils.h
  - reduction_utils.h
deepspeed
- __init__.py
- autotuning
  - utils.py
- checkpoint
  - constants.py
  - ds_to_universal.py
  - reshape_utils.py
  - universal_checkpoint.py
  - zero_checkpoint.py
- elasticity
  - elastic_agent.py
- env_report.py
- git_version_info.py
- inference/v2
  - config_v2.py
  - engine_factory.py
  - kernels/core_ops
    - __init__.py
    - blas_kernels
      - blas_utils.h
    - core_ops.cpp
    - cuda_linear
      - __init__.py
      - cuda_linear.py
      - cuda_linear_kernels.cpp
      - cuda_linear_kernels.h
      - fp6_linear.cu
      - fp6_linear.cuh
      - include
        configs.h
        kernel_matmul.cuh
        kernel_reduction.cuh
        ptx_cp.async.cuh
        ptx_mma.cuh
        utils_core.cuh
        utils_gmem.cuh
        utils_paralleldequant.cuh
        weight_prepacking.h
    - gated_activations
      - gated_activation_kernels_cuda.cu
  - model_implementations
    - __init__.py
    - flat_model_helpers.py
    - qwen_v2
      - __init__.py
      - container.py
      - model.py
      - policy.py
  - modules
    - heuristics.py
    - implementations/linear
      - __init__.py
      - quantized_linear.py
  - ragged/csrc
    - ragged_ops.cpp
- launcher
  - multinode_runner.py
- module_inject
  - auto_tp.py
  - containers
    - bloom.py
  - fusedqkv_utils.py
- moe
  - layer.py
  - sharded_moe.py
  - utils.py
- ops
  - __init__.py
  - fp_quantizer
    - __init__.py
    - quantize.py
  - transformer/inference
    - config.py
    - ds_attention.py
    - triton
      - matmul_ext.py
- runtime
  - __init__.py
  - base_optimizer.py
  - bf16_optimizer.py
  - comm
    - coalesced_collectives.py
  - config.py
  - engine.py
  - fp16
    - fused_optimizer.py
    - unfused_optimizer.py
  - utils.py
  - zero
    - partition_parameters.py
    - stage3.py
    - stage_1_and_2.py
- utils
  - __init__.py
  - tensor_fragment.py
docs
- _tutorials
  - accelerator-abstraction-interface.md
- index.md
op_builder
- all_ops.py
- builder.py
- cpu
  - comm.py
- fp_quantizer.py
- inference_core_ops.py
- xpu
  - builder.py
requirements
- requirements-dev.txt
- requirements-inf.txt
setup.py
tests/unit
- checkpoint
  - common.py
  - test_latest_checkpoint.py
  - test_lr_scheduler.py
  - test_moe_checkpoint.py
  - test_other_optimizer.py
  - test_pipeline.py
  - test_universal_checkpoint.py
  - test_zero_optimizer.py
- common.py
- compression
  - test_dequantization.py
- elasticity
  - test_elastic.py
- inference
  - test_inference.py
  - v2
    - modules
      - test_quantized_linear_module.py
    - ragged
      - test_manager_configs.py
- launcher
  - test_user_args.py
- moe
  - test_moe.py
- multi_output_model.py
- ops
  - accelerators
    - test_accelerator_backward.py
  - fp_quantizer
    - test_fp_quant.py
- runtime
  - activation_checkpointing
    - test_activation_checkpointing.py
  - comm
    - test_coalesced_collectives.py
  - compile
    - test_compile_wrapper.py
    - test_compile_zero.py
    - test_load_config.py
  - half_precision
    - onebit
      - test_onebit.py
    - test_bf16.py
    - test_dynamic_loss_scale.py
    - test_fp16.py
  - sparse_tensor
    - test_averaging_sparse_gradients.py
    - test_sparse_grads.py
  - test_data_efficiency.py
  - test_ds_config_dict.py
  - test_ds_initialize.py
  - test_multi_output_model.py
  - test_mup_optimizers.py
  - test_pld.py
  - zero
    - test_ignore_unused_parameters.py
    - test_zero.py
    - test_zero_context.py
    - test_zero_context_return.py
    - test_zero_leaf_module.py
    - test_zero_tensor_fragment.py
- simple_model.py
- util.py
version.txt

DeepSpeed acab9d63 - Merge branch 'master' into loadams/update-container-a6000

DeepSpeed
acab9d63 - Merge branch 'master' into loadams/update-container-a6000