DeepSpeed
e2ef102f - Merge branch 'add-llama2-support' into quantization-refresh

Comment changes are shownComment changes are hidden
Commit
1 year ago
  • .github/workflows
    • File
      nv-pre-compile-ops.yml
  • File
    README.md
  • csrc
    • deepspeed4science/evoformer_attn
      • File
        attention.cpp
      • File
        attention.cu
      • File
        attention_back.cu
      • epilogue
        • File
          epilogue_grad_bias.h
        • File
          epilogue_pipelined.h
        • File
          epilogue_rescale_output.h
        • File
          epilogue_thread_apply_logsumexp.h
      • gemm
        • File
          custom_mma.h
        • File
          custom_mma_base.h
        • File
          custom_mma_multistage.h
        • File
          custom_mma_pipelined.h
        • File
          find_default_mma.h
        • File
          mma_accum_lambda_iterator.h
        • File
          mma_from_smem.h
      • File
        gemm_kernel_utils.h
      • iterators
        • File
          epilogue_predicated_tile_iterator.h
        • File
          make_residual_last.h
        • File
          predicated_tile_access_iterator_residual_last.h
        • File
          predicated_tile_iterator_atomic.h
        • File
          predicated_tile_iterator_residual_last.h
        • File
          transpose_warp_iterator.h
        • File
          warp_iterator_from_smem.h
      • File
        kernel_backward.h
      • File
        kernel_forward.h
      • transform
        • File
          bias_broadcast.h
        • File
          tile_smem_loader.h
    • transformer/inference/csrc
      • File
        pt_binding.cpp
      • File
        transform.cu
  • deepspeed
    • module_inject
      • containers
        • File
          __init__.py
        • File
          internlm.py
      • File
        replace_module.py
      • File
        replace_policy.py
      • File
        utils.py
    • ops
      • deepspeed4science
        • File
          __init__.py
        • File
          evoformer_attn.py
      • transformer/inference
        • File
          ds_attention.py
        • op_binding
          • File
            mlp_gemm.py
          • File
            qkv_gemm.py
  • docs
    • File
      _config.yml
    • _data
      • File
        navigation.yml
    • _pages
      • File
        deepspeed4science.md
    • _tutorials
      • File
        ds4sci_evoformerattention.md
    • assets/images
      • File
        3pillars.png
      • File
        DeepSpeed-pillars.png
      • File
        evoformer.png
      • File
        new-megatron-ds.png
    • File
      index.md
  • op_builder
    • File
      evoformer_attn.py
  • tests
    • benchmarks
      • File
        DS4Sci_EvoformerAttention_bench.py
    • unit/ops/deepspeed4science
      • File
        test_DS4Sci_EvoformerAttention.py