DeepSpeed
acab9d63 - Merge branch 'master' into loadams/update-container-a6000

Comment changes are shownComment changes are hidden
Commit
1 year ago
Merge branch 'master' into loadams/update-container-a6000
Author
  • .github/workflows
    • File
      cpu-inference.yml
    • File
      cpu-torch-latest.yml
    • File
      hpu-gaudi2.yml
    • File
      nv-a6000.yml
    • File
      nv-accelerate-v100.yml
    • File
      nv-ds-chat.yml
    • File
      nv-inference.yml
    • File
      nv-mii.yml
    • File
      nv-nightly.yml
    • File
      nv-pre-compile-ops.yml
    • File
      nv-torch-latest-v100.yml
    • File
      nv-torch-nightly-v100.yml
    • File
      nv-transformers-v100.yml
  • File
    README.md
  • accelerator
    • File
      cpu_accelerator.py
    • File
      real_accelerator.py
  • blogs
    • deepspeed-fp6/03-05-2024
      • File
        README-Chinese.md
      • File
        README.md
      • assets
        • File
          fp6-design.png
        • File
          hero-figure.png
        • servingllm
          • File
            100-1000.png
          • File
            100-250.png
          • File
            100-500.png
    • deepspeed-ulysses
      • File
        README.md
  • csrc
    • cpu/comm
      • File
        ccl.cpp
      • File
        shm.cpp
      • File
        shm.h
    • fp_quantizer
      • includes
        • File
          context.h
        • File
          quantize.h
      • File
        quantize.cpp
      • File
        quantize.cu
    • includes
      • File
        memory_access_utils.h
      • File
        reduction_utils.h
  • deepspeed
    • File
      __init__.py
    • autotuning
      • File
        utils.py
    • checkpoint
      • File
        constants.py
      • File
        ds_to_universal.py
      • File
        reshape_utils.py
      • File
        universal_checkpoint.py
      • File
        zero_checkpoint.py
    • elasticity
      • File
        elastic_agent.py
    • File
      env_report.py
    • File
      git_version_info.py
    • inference/v2
      • File
        config_v2.py
      • File
        engine_factory.py
      • kernels/core_ops
        • File
          __init__.py
        • blas_kernels
          • File
            blas_utils.h
        • File
          core_ops.cpp
        • cuda_linear
          • File
            __init__.py
          • File
            cuda_linear.py
          • File
            cuda_linear_kernels.cpp
          • File
            cuda_linear_kernels.h
          • File
            fp6_linear.cu
          • File
            fp6_linear.cuh
          • include
            • File
              configs.h
            • File
              kernel_matmul.cuh
            • File
              kernel_reduction.cuh
            • File
              ptx_cp.async.cuh
            • File
              ptx_mma.cuh
            • File
              utils_core.cuh
            • File
              utils_gmem.cuh
            • File
              utils_paralleldequant.cuh
            • File
              weight_prepacking.h
        • gated_activations
          • File
            gated_activation_kernels_cuda.cu
      • model_implementations
        • File
          __init__.py
        • File
          flat_model_helpers.py
        • qwen_v2
          • File
            __init__.py
          • File
            container.py
          • File
            model.py
          • File
            policy.py
      • modules
        • File
          heuristics.py
        • implementations/linear
          • File
            __init__.py
          • File
            quantized_linear.py
      • ragged/csrc
        • File
          ragged_ops.cpp
    • launcher
      • File
        multinode_runner.py
    • module_inject
      • File
        auto_tp.py
      • containers
        • File
          bloom.py
      • File
        fusedqkv_utils.py
    • moe
      • File
        layer.py
      • File
        sharded_moe.py
      • File
        utils.py
    • ops
      • File
        __init__.py
      • fp_quantizer
        • File
          __init__.py
        • File
          quantize.py
      • transformer/inference
        • File
          config.py
        • File
          ds_attention.py
        • triton
          • File
            matmul_ext.py
    • runtime
      • File
        __init__.py
      • File
        base_optimizer.py
      • File
        bf16_optimizer.py
      • comm
        • File
          coalesced_collectives.py
      • File
        config.py
      • File
        engine.py
      • fp16
        • File
          fused_optimizer.py
        • File
          unfused_optimizer.py
      • File
        utils.py
      • zero
        • File
          partition_parameters.py
        • File
          stage3.py
        • File
          stage_1_and_2.py
    • utils
      • File
        __init__.py
      • File
        tensor_fragment.py
  • docs
    • _tutorials
      • File
        accelerator-abstraction-interface.md
    • File
      index.md
  • op_builder
    • File
      all_ops.py
    • File
      builder.py
    • cpu
      • File
        comm.py
    • File
      fp_quantizer.py
    • File
      inference_core_ops.py
    • xpu
      • File
        builder.py
  • requirements
    • File
      requirements-dev.txt
    • File
      requirements-inf.txt
  • File
    setup.py
  • tests/unit
    • checkpoint
      • File
        common.py
      • File
        test_latest_checkpoint.py
      • File
        test_lr_scheduler.py
      • File
        test_moe_checkpoint.py
      • File
        test_other_optimizer.py
      • File
        test_pipeline.py
      • File
        test_universal_checkpoint.py
      • File
        test_zero_optimizer.py
    • File
      common.py
    • compression
      • File
        test_dequantization.py
    • elasticity
      • File
        test_elastic.py
    • inference
      • File
        test_inference.py
      • v2
        • modules
          • File
            test_quantized_linear_module.py
        • ragged
          • File
            test_manager_configs.py
    • launcher
      • File
        test_user_args.py
    • moe
      • File
        test_moe.py
    • File
      multi_output_model.py
    • ops
      • accelerators
        • File
          test_accelerator_backward.py
      • fp_quantizer
        • File
          test_fp_quant.py
    • runtime
      • activation_checkpointing
        • File
          test_activation_checkpointing.py
      • comm
        • File
          test_coalesced_collectives.py
      • compile
        • File
          test_compile_wrapper.py
        • File
          test_compile_zero.py
        • File
          test_load_config.py
      • half_precision
        • onebit
          • File
            test_onebit.py
        • File
          test_bf16.py
        • File
          test_dynamic_loss_scale.py
        • File
          test_fp16.py
      • sparse_tensor
        • File
          test_averaging_sparse_gradients.py
        • File
          test_sparse_grads.py
      • File
        test_data_efficiency.py
      • File
        test_ds_config_dict.py
      • File
        test_ds_initialize.py
      • File
        test_multi_output_model.py
      • File
        test_mup_optimizers.py
      • File
        test_pld.py
      • zero
        • File
          test_ignore_unused_parameters.py
        • File
          test_zero.py
        • File
          test_zero_context.py
        • File
          test_zero_context_return.py
        • File
          test_zero_leaf_module.py
        • File
          test_zero_tensor_fragment.py
    • File
      simple_model.py
    • File
      util.py
  • File
    version.txt