vllm
1d0c9d6b - [Kernel] some optimizations for dense marlin and moe marlin (#16850)

Commit
14 days ago
[Kernel] some optimizations for dense marlin and moe marlin (#16850) Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com>
Author
Parents
  • File
    CMakeLists.txt
  • csrc
    • moe/marlin_moe_wna16
      • File
        .gitignore
      • File
        generate_kernels.py
      • File
        kernel.h
      • File
        marlin_template.h
      • File
        ops.cu
    • quantization/gptq_marlin
      • File
        .gitignore
      • File
        dequant.h
      • File
        generate_kernels.py
      • File
        gptq_marlin.cu
      • File
        kernel.h
      • File
        marlin_template.h
    • File
      torch_bindings.cpp
  • tests/kernels
    • moe
      • File
        test_moe.py
    • quantization
      • File
        test_awq_marlin.py
      • File
        test_marlin_gemm.py
  • vllm
    • File
      _custom_ops.py
    • model_executor/layers
      • fused_moe
        • File
          fused_marlin_moe.py
      • quantization
        • File
          awq_marlin.py
        • compressed_tensors/schemes
          • File
            compressed_tensors_w8a16_fp8.py
        • File
          fp8.py
        • File
          gptq_marlin.py
        • kernels/mixed_precision
          • File
            marlin.py
        • utils
          • File
            marlin_utils.py
          • File
            marlin_utils_fp8.py
    • File
      scalar_type.py