onnxruntime
Mlas int4 int8 with avx2/512
#20687
Merged

  • cmake
    • File
      onnxruntime_mlas.cmake
  • onnxruntime
    • contrib_ops/cpu/quantization
      • File
        matmul_nbits.cc
    • core/mlas
      • inc
        • File
          mlas_qnbit.h
      • lib
        • File
          mlasi.h
        • File
          platform.cpp
        • File
          sqnbitgemm.cpp
        • File
          sqnbitgemm.h
        • File
          sqnbitgemm_kernel_avx2.cpp
        • File
          sqnbitgemm_kernel_avx2_int8_blklen16.h
        • File
          sqnbitgemm_kernel_avx2_int8_blklen32.h
        • File
          sqnbitgemm_kernel_avx2_int8_blklen64.h
        • File
          sqnbitgemm_kernel_avx512.cpp
        • File
          sqnbitgemm_kernel_avx512_int8.h
        • File
          sqnbitgemm_kernel_avx512_int8_blklen128.h
        • File
          sqnbitgemm_kernel_avx512_int8_blklen16.h
        • File
          sqnbitgemm_kernel_avx512_int8_blklen32.h
        • File
          sqnbitgemm_kernel_avx512_int8_blklen64.h
        • File
          sqnbitgemm_kernel_avx512vnni.cpp
        • File
          sqnbitgemm_kernel_avx_common.h
        • File
          sqnbitgemm_kernel_avx_common_int8.h
        • File
          sqnbitgemm_m1_sym_kernel_avx2_int8_blklen32.h
        • File
          sqnbitgemm_m1_sym_kernel_avx2_int8_blklen64.h
    • test
      • contrib_ops
        • File
          matmul_4bits_test.cc
      • mlas
        • bench
          • File
            bench_q4dq.cpp
          • File
            bench_sqnbitgemm.cpp
        • unittest
          • File
            test_sqnbitgemm.cpp

Loading comments...