DeepSpeed
8998da04 - Merge branch 'master' into tohtana/log_run_tests

Commit
98 days ago
Merge branch 'master' into tohtana/log_run_tests
Author
  • .github
    • ISSUE_TEMPLATE
      • File
        deepspeed_chat_bug_report.md
      • File
        inference_bug_report.md
    • workflows
      • File
        amd-mi200.yml
      • File
        cpu-torch-latest.yml
      • File
        hpu-gaudi2-nightly.yml
      • File
        hpu-gaudi2.yml
      • File
        no-torch.yml
      • File
        nv-a6000.yml
      • File
        nv-accelerate-v100.yml
      • File
        nv-ds-chat.yml
      • File
        nv-flash-attn.yml
      • File
        nv-human-eval.yml
      • File
        nv-inference.yml
      • File
        nv-lightning-v100.yml
      • File
        nv-mii.yml
      • File
        nv-nightly.yml
      • File
        nv-torch-latest-v100.yml
      • File
        nv-torch-nightly-v100.yml
      • File
        nv-torch110-p40.yml
      • File
        nv-transformers-v100.yml
      • File
        python.yml
      • File
        release.yml
      • File
        xpu-compile.yml
      • File
        xpu-max1100.yml
  • File
    .pre-commit-config.yaml
  • File
    CODEOWNERS
  • File
    COMMITTERS.md
  • File
    CONTRIBUTING.md
  • File
    GOVERNANCE.md
  • File
    README.md
  • File
    SECURITY.md
  • accelerator
    • File
      cpu_accelerator.py
    • File
      hpu_accelerator.py
    • File
      real_accelerator.py
    • File
      sdaa_accelerator.py
  • benchmarks
    • File
      README.md
  • blogs
    • deepspeed-chat
      • File
        README.md
      • chinese
        • File
          README.md
      • ds-chat-release-8-31
        • File
          README.md
      • japanese
        • File
          README.md
    • deepspeed-domino
      • File
        README.md
      • images
        • File
          design-base.png
        • File
          design-column.png
        • File
          design-hybrid.png
        • File
          design-row.png
        • File
          domino-hero.png
        • File
          domino-logo.png
        • File
          gpt3-scale.png
        • File
          implement-bwd.png
        • File
          implement-fwd.png
        • File
          tp-ar.png
        • File
          tp-comm-overhead.png
    • deepspeed-fastgen
      • 2024-01-19
        • File
          README.md
      • File
        README.md
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • deepspeed-fp6/03-05-2024
      • File
        README-Chinese.md
      • File
        README.md
    • deepspeed-gds
      • File
        README.md
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • deepspeed-offloadpp
      • File
        README.md
    • deepspeed-triton
      • File
        README.md
    • deepspeed-ucp
      • File
        README.md
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • deepspeed-ulysses
      • File
        README.md
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • deepspeed-visualchat/10-03-2023
      • File
        README-Chinese.md
      • File
        README-Japanese.md
      • File
        README.md
    • deepspeed4science
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • intel-inference
      • File
        README.md
    • ulysses-offload
      • File
        README.md
      • media
        • File
          image1.png
        • File
          image2.png
        • File
          image3.png
        • File
          image4.png
        • File
          image5.png
    • windows/08-2024
      • File
        README.md
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
    • zeropp
      • chinese
        • File
          README.md
      • japanese
        • File
          README.md
  • File
    build_win.bat
  • csrc
    • adagrad
      • File
        cpu_adagrad.cpp
    • adam
      • File
        cpu_adam_impl.cpp
    • aio
      • common
        • File
          deepspeed_aio_common.cpp
        • File
          deepspeed_aio_utils.cpp
        • File
          deepspeed_aio_utils.h
      • py_lib
        • File
          deepspeed_aio_op_desc.cpp
        • File
          deepspeed_aio_op_desc.h
        • File
          deepspeed_cpu_op.cpp
        • File
          deepspeed_cpu_op.h
        • File
          deepspeed_py_aio.cpp
        • File
          deepspeed_py_io_handle.cpp
        • File
          deepspeed_py_io_handle.h
        • File
          py_ds_aio.cpp
      • py_test
        • File
          ds_aio_handle.py
    • fp_quantizer
      • File
        fp_quantize.cpp
    • gds/py_lib
      • File
        deepspeed_gds_op.cpp
      • File
        deepspeed_gds_op.h
      • File
        deepspeed_py_gds_handle.cpp
      • File
        deepspeed_py_gds_handle.h
      • File
        py_ds_gds.cpp
    • includes
      • File
        cpu_adagrad.h
      • File
        cpu_adam.h
      • File
        cpu_lion.h
      • File
        quantization.h
      • File
        quantization_utils.h
    • lion
      • File
        cpu_lion_impl.cpp
    • quantization
      • File
        pt_binding.cpp
      • File
        quant_reduce.cu
      • File
        swizzled_quantize.cu
    • xpu/adam
      • File
        multi_tensor_apply.dp.hpp
  • deepspeed
    • File
      __init__.py
    • autotuning
      • File
        README.md
      • File
        autotuner.py
    • checkpoint
      • File
        deepspeed_checkpoint.py
      • File
        ds_to_universal.py
      • File
        universal_checkpoint.py
      • File
        utils.py
      • File
        zero_checkpoint.py
    • comm
      • File
        comm.py
      • File
        torch.py
    • inference
      • File
        config.py
      • File
        engine.py
      • v2
        • checkpoint
          • File
            huggingface_engine.py
        • File
          engine_factory.py
        • model_implementations
          • File
            inference_policy_base.py
          • File
            layer_container_base.py
    • launcher
      • File
        multinode_runner.py
      • File
        runner.py
    • linear
      • File
        config.py
      • File
        quantization.py
    • model_implementations/transformers
      • File
        ds_transformer.py
    • module_inject
      • File
        __init__.py
      • File
        auto_tp.py
      • containers
        • File
          bloom.py
        • features
          • File
            meta_tensor.py
      • File
        layers.py
      • File
        load_checkpoint.py
      • File
        replace_module.py
      • File
        tp_shard.py
    • moe
      • File
        sharded_moe.py
    • ops
      • fp_quantizer
        • File
          __init__.py
        • File
          fp8_gemm.py
        • File
          fp8_gemm_triton.py
        • File
          quantize.py
      • sparse_attention
        • File
          bert_sparse_self_attention.py
      • transformer/inference/triton
        • File
          matmul_ext.py
    • profiling/flops_profiler
      • File
        README.md
      • File
        profiler.py
    • runtime
      • activation_checkpointing
        • File
          checkpointing.py
      • File
        base_optimizer.py
      • File
        bf16_optimizer.py
      • checkpoint_engine
        • File
          nebula_checkpoint_engine.py
        • File
          torch_checkpoint_engine.py
      • comm
        • File
          coalesced_collectives.py
        • File
          compressed.py
        • File
          hccl.py
      • File
        compiler.py
      • compression
        • File
          cupy.py
      • File
        config.py
      • domino
        • File
          __init__.py
        • File
          transformer.py
      • File
        engine.py
      • fp16/onebit
        • File
          lamb.py
        • File
          zoadam.py
      • File
        hybrid_engine.py
      • File
        lr_schedules.py
      • pipe
        • File
          module.py
      • swap_tensor
        • File
          aio_config.py
        • File
          constants.py
        • File
          optimizer_utils.py
        • File
          partitioned_optimizer_swapper.py
        • File
          partitioned_param_swapper.py
        • File
          pipelined_optimizer_swapper.py
        • File
          utils.py
      • tensor_parallel
        • File
          __init__.py
        • File
          config.py
        • File
          tp_manager.py
      • File
        utils.py
      • zero
        • File
          config.py
        • File
          mics.py
        • File
          parameter_offload.py
        • File
          partition_parameters.py
        • File
          partitioned_param_coordinator.py
        • File
          stage3.py
        • File
          stage_1_and_2.py
    • sequence
      • File
        fpdt_layer.py
      • File
        layer.py
    • utils
      • File
        __init__.py
      • File
        groups.py
      • File
        logging.py
      • File
        numa.py
      • File
        nvtx.py
      • File
        torch.py
      • File
        z3_leaf_module.py
      • File
        zero_to_fp32.py
  • docker
    • File
      Dockerfile
  • docs
    • File
      CNAME
    • _data
      • File
        navigation.yml
    • _pages
      • File
        config-json.md
      • File
        deepspeed4science.md
      • File
        inference.md
    • _posts
      • File
        2020-05-19-bert-record.md
      • File
        2020-05-28-fastest-bert-training.md
      • File
        2020-09-08-sparse-attention-news.md
      • File
        2020-09-09-ZeRO-Offload.md
      • File
        2020-09-09-onebit-adam-news.md
      • File
        2020-09-09-pipeline-parallelism.md
      • File
        2020-09-09-sparse-attention.md
      • File
        2020-10-28-progressive-layer-dropping-news.md
      • File
        2021-11-15-autotuning.md
      • File
        2021-12-09-deepspeed-moe-nlg.md
      • File
        2022-07-26-deepspeed-azure.md
      • File
        2022-09-10-zero-inference.md
      • File
        2022-10-11-mii.md
      • File
        2022-12-12-data-efficiency.md
      • File
        2023-03-31-multi-modal.md
      • File
        2023-04-24-deepspeed-chat-chinese.md
      • File
        2023-04-24-deepspeed-chat-japanese.md
      • File
        2023-04-24-deepspeed-chat.md
      • File
        2023-06-22-zeropp-chinese.md
      • File
        2023-06-22-zeropp-japanese.md
      • File
        2023-08-24-ulysses-chinese.md
      • File
        2023-08-24-ulysses-japanese.md
      • File
        2023-08-24-ulysses.md
      • File
        2023-09-12-ZeRO-Inference.md
      • File
        2023-09-19-deepspeed4science-chinese.md
      • File
        2023-09-19-deepspeed4science-japanese.md
      • File
        2023-10-04-deepspeed-visualchat-chinese.md
      • File
        2023-10-04-deepspeed-visualchat-japanese.md
      • File
        2023-10-04-deepspeed-visualchat.md
      • File
        2023-11-06-deepspeed-fastgen-chinese.md
      • File
        2023-11-06-deepspeed-fastgen-japanese.md
      • File
        2023-11-06-deepspeed-fastgen.md
    • _tutorials
      • File
        accelerator-abstraction-interface.md
      • File
        accelerator-setup-guide.md
      • File
        advanced-install.md
      • File
        automatic-tensor-parallelism.md
      • File
        autotuning.md
      • File
        azure.md
      • File
        bert-finetuning.md
      • File
        bert-pretraining.md
      • File
        cifar-10.md
      • File
        comms-logging.md
      • File
        curriculum-learning.md
      • File
        data-efficiency.md
      • File
        deepnvme.md
      • File
        domino.md
      • File
        ds-sequence.md
      • File
        flops-profiler.md
      • File
        gan.md
      • File
        getting-started.md
      • File
        inference-tutorial.md
      • File
        large-models-w-deepspeed.md
      • File
        megatron.md
      • File
        mixed_precision_zeropp.md
      • File
        mixture-of-experts-inference.md
      • File
        mixture-of-experts-nlg.md
      • File
        mixture-of-experts.md
      • File
        model-compression.md
      • File
        monitor.md
      • File
        onebit-adam.md
      • File
        onebit-lamb.md
      • File
        sparse-attention.md
      • File
        ulysses-offload.md
      • File
        universal-checkpointing.md
      • File
        zero-offload.md
      • File
        zero-one-adam.md
      • File
        zero.md
      • File
        zeropp.md
    • code-docs/source
      • File
        index.rst
      • File
        inference-engine.rst
      • File
        initialize.rst
      • File
        model-checkpointing.rst
      • File
        moe.rst
      • File
        monitor.rst
      • File
        schedulers.rst
      • File
        zero3.rst
    • File
      contributing.md
    • File
      index.md
  • examples
    • File
      README.md
  • File
    install.sh
  • op_builder
    • File
      async_io.py
    • File
      builder.py
    • File
      fp_quantizer.py
    • hpu
      • File
        builder.py
      • File
        fp_quantizer.py
    • File
      inference_core_ops.py
    • File
      inference_cutlass_builder.py
    • File
      ragged_ops.py
    • File
      ragged_utils.py
    • sdaa
      • File
        __init__.py
      • File
        builder.py
      • File
        cpu_adam.py
      • File
        fused_adam.py
      • File
        no_impl.py
    • File
      sparse_attn.py
    • File
      transformer_inference.py
  • release
    • File
      release.sh
  • requirements
    • File
      requirements-dev.txt
    • File
      requirements.txt
  • File
    setup.py
  • tests
    • model/BingBertSquad
      • File
        run_BingBertSquad.sh
      • File
        run_BingBertSquad_sanity.sh
      • File
        run_tests.sh
      • File
        test_e2e_squad.py
    • unit
      • File
        alexnet_model.py
      • checkpoint
        • File
          common.py
        • File
          test_convert_checkpoint.py
        • File
          test_universal_checkpoint.py
        • File
          test_zero_optimizer.py
      • inference
        • File
          test_inference.py
      • launcher
        • File
          test_multinode_runner.py
      • linear
        • File
          test_linear.py
        • File
          test_quant_param.py
      • model_parallelism
        • File
          test_autotp_training.py
        • File
          test_configurable_parallel_mp.py
        • File
          test_configurable_parallel_pp.py
      • moe
        • File
          test_moe.py
      • ops
        • aio
          • File
            test_aio.py
          • File
            test_gds.py
        • fp_quantizer
          • File
            test_fp8_gemm.py
          • File
            test_fp_quant.py
        • transformer/inference
          • File
            inference_test_utils.py
          • File
            test_attention.py
          • File
            test_bias_add.py
          • File
            test_bias_geglu.py
          • File
            test_bias_gelu.py
          • File
            test_bias_relu.py
          • File
            test_gelu.py
          • File
            test_layer_norm.py
          • File
            test_matmul.py
          • File
            test_residual_add.py
          • File
            test_softmax.py
      • pipe
        • File
          test_pipe_module.py
      • runtime
        • activation_checkpointing
          • File
            test_activation_checkpointing.py
        • comm
          • File
            test_coalesced_collectives.py
        • File
          test_no_sync_ctxt.py
        • zero
          • File
            test_nvme_checkpointing.py
          • File
            test_offload_states.py
          • File
            test_zero.py
          • File
            test_zero_context_ancestry.py
          • File
            test_zero_leaf_module.py
          • File
            test_zero_multiple_run.py
      • sequence_parallelism
        • File
          test_ulysses.py
  • File
    version.txt