DeepSpeed
a2984d0a - Merge pull request #1 from microsoft/master

Commit
4 years ago
Merge pull request #1 from microsoft/master Pulling upstream
Author
  • File
    .clang-format
  • File
    .gitignore
  • File
    .pre-commit-config.yaml
  • File
    CODEOWNERS
  • File
    DeepSpeedExamples
  • File
    Dockerfile
  • File
    README.md
  • File
    azure-pipelines.yml
  • File
    basic_install_test.py
  • bin
    • ds
  • csrc
    • File
      fused_lamb_cuda.cpp
    • File
      fused_lamb_cuda_kernel.cu
    • includes
      • File
        StopWatch.h
      • File
        Timer.h
      • File
        context.h
      • File
        cublas_wrappers.h
      • File
        custom_cuda_layers.h
      • File
        dropout.h
      • File
        ds_transformer_cuda.h
      • File
        feed_forward.h
      • File
        gelu.h
      • File
        gemm_test.h
      • File
        general_kernels.h
      • File
        normalize_layer.h
      • File
        softmax.h
      • File
        strided_batch_gemm.h
      • File
        type_shim.h
    • lamb
      • File
        fused_lamb_cuda.cpp
      • File
        fused_lamb_cuda_kernel.cu
    • sparse_attention
      • File
        utils.cpp
    • transformer
      • File
        cublas_wrappers.cu
      • File
        dropout_kernels.cu
      • File
        ds_transformer_cuda.cpp
      • File
        gelu_kernels.cu
      • File
        general_kernels.cu
      • File
        normalize_kernels.cu
      • File
        softmax_kernels.cu
      • File
        transform_kernels.cu
    • File
      type_shim.h
  • deepspeed
    • File
      __init__.py
    • launcher
      • File
        __init__.py
      • File
        launch.py
      • File
        runner.py
    • ops
      • File
        __init__.py
      • lamb
        • File
          __init__.py
        • File
          fused_lamb.py
      • sparse_attention
        • File
          __init__.py
        • File
          bert_sparse_self_attention.py
        • File
          matmul.py
        • File
          softmax.py
        • File
          sparse_attention_utils.py
        • File
          sparse_self_attention.py
        • File
          sparsity_config.py
        • trsrc
          • File
            __init__.py
          • matmul.tr
          • softmax_bwd.tr
          • softmax_fwd.tr
      • transformer
        • File
          __init__.py
        • File
          transformer.py
    • pt
      • File
        deepspeed_zero_optimizer.py
    • runtime
      • File
        __init__.py
      • activation_checkpointing
        • File
          __init__.py
        • File
          checkpointing.py
        • File
          config.py
      • File
        config.py
      • File
        config_utils.py
      • File
        constants.py
      • File
        csr_tensor.py
      • File
        dataloader.py
      • File
        engine.py
      • fp16
        • File
          __init__.py
        • File
          fused_optimizer.py
        • File
          loss_scaler.py
        • File
          unfused_optimizer.py
      • File
        lr_schedules.py
      • File
        utils.py
      • zero
        • File
          __init__.py
        • File
          config.py
        • File
          stage1.py
        • File
          stage2.py
        • File
          utils.py
    • utils
      • File
        __init__.py
      • File
        logging.py
      • File
        timer.py
  • docs
    • File
      Gemfile.lock
    • File
      README.md
    • File
      _config.yml
    • _data
      • File
        navigation.yml
    • _layouts
      • File
        news-home.html
    • _pages
      • File
        config-json.md
      • File
        features.md
    • _posts
      • File
        2020-03-17-zero-stage2.md
      • File
        2020-05-19-bert-record.md
      • File
        2020-05-19-press-release.md
      • File
        2020-05-19-zero-stage2.md
      • File
        2020-05-28-fastest-bert-training.md
      • File
        2020-07-24-deepspeed-webinar.md
      • File
        2020-08-07-webinar-on-demand.md
      • File
        2020-09-09-sparse-attention.md
    • _tutorials
      • File
        azure.md
      • File
        bert-finetuning.md
      • File
        bert-pretraining.md
      • File
        cifar-10.md
      • File
        getting-started.md
      • File
        megatron.md
      • File
        sparse_attention.md
      • File
        transformer_kernel.md
    • assets/images
      • File
        deepspeed-speedup.png
      • File
        deepspeed-throughput-seq128.png
      • File
        deepspeed-throughput-seq512.png
      • File
        end-to-end-bert-training.PNG
      • File
        layernorm_animation.gif
      • File
        layernorm_deepspeed.gif
      • File
        layernorm_ds.png
      • File
        layernorm_pytorch.gif
      • File
        layernorm_torch.png
      • File
        qkv_fusion.png
      • File
        sa_backward_pass.png
      • File
        sa_bert_base_time_result.png
      • File
        sa_bert_large_time_result.png
      • File
        sa_fixed_sparsity_structure.png
      • File
        sa_forward_pass.png
      • File
        sa_gpt2_time_result.png
      • File
        sa_long_document_comprehension_result.png
      • File
        sa_maximum_sequence_runnable_on_bert.png
      • File
        sa_variable_sparsity_structure.png
      • File
        softmax_animation.gif
      • File
        softmax_deepspeed.gif
      • File
        softmax_ds.png
      • File
        softmax_pytorch.gif
      • File
        softmax_torch.png
      • File
        transformer_kernel_perf.png
      • File
        transformer_kernel_perf_seq128.PNG
      • File
        transformer_kernel_perf_seq512.PNG
      • File
        transformer_preln_arch.png
      • File
        variable_sparsity_pattern.png
      • File
        webinar-aug2020.png
      • File
        zero-full.png
    • code-docs
      • File
        requirements.readthedocs.txt
      • source
        • File
          activation-checkpointing.rst
        • File
          conf.py
        • File
          index.rst
        • File
          initialize.rst
        • File
          kernel.rst
        • File
          model-checkpointing.rst
        • File
          training.rst
    • File
      contributing.md
    • File
      index.md
  • File
    install.sh
  • File
    requirements.txt
  • requirements
    • File
      requirements-dev.txt
    • File
      requirements-sparse-attn.txt
    • File
      requirements.txt
  • File
    setup.py
  • tests
    • model
      • BingBertSquad
        • File
          BingBertSquad_run_func_test.py
        • File
          deepspeed_bsz24_fp16_config.json
        • File
          deepspeed_bsz24_fp16_zero2_config.json
        • File
          run_BingBertSquad.sh
      • Megatron_GPT2
        • File
          ds_config_func_bs4_zero1.json
        • File
          ds_config_func_bs4_zero2.json
        • File
          ds_config_func_bs8_no_zero.json
        • File
          ds_config_func_bs8_zero0_gas3.json
        • File
          ds_config_func_bs8_zero1.json
        • File
          ds_config_func_bs8_zero2.json
        • File
          ds_config_func_bs8_zero2_gas3.json
        • File
          ds_config_func_scheduler.json
        • File
          ds_config_perf_bs16.json
        • File
          ds_config_perf_bs32.json
        • File
          ds_config_perf_bs8.json
        • File
          ds_gpt2_test.sh
        • File
          run_checkpoint_test.py
        • File
          run_func_test.py
        • File
          test_common.py
      • File
        run_sanity_check.py
    • small_model_debugging
      • File
        test_model.py
    • unit
      • File
        common.py
      • File
        ds_batch_config.json
      • File
        modeling.py
      • File
        modelingpreln.py
      • File
        simple_model.py
      • File
        test_checkpointing.py
      • File
        test_config.py
      • File
        test_csr.py
      • File
        test_cuda_backward.py
      • File
        test_cuda_forward.py
      • File
        test_ds_config.py
      • File
        test_fp16.py
      • File
        test_lr_schedulers.py
      • File
        test_multi_output_model.py
      • File
        test_run.py
      • File
        test_sparse_attention.py