onnxruntime
Mlas int4 int8 with avx2/512
#20687
Merged
Go
Login via GitHub
Home
Pricing
FAQ
Install
Login
via GitHub
Overview
Commits
48
Changes
View On
GitHub
Hide Minimap (CTRL+M)
Files
26
Threads
cmake
onnxruntime_mlas.cmake
onnxruntime
contrib_ops/cpu/quantization
matmul_nbits.cc
core/mlas
inc
mlas_qnbit.h
lib
mlasi.h
platform.cpp
sqnbitgemm.cpp
sqnbitgemm.h
sqnbitgemm_kernel_avx2.cpp
sqnbitgemm_kernel_avx2_int8_blklen16.h
sqnbitgemm_kernel_avx2_int8_blklen32.h
sqnbitgemm_kernel_avx2_int8_blklen64.h
sqnbitgemm_kernel_avx512.cpp
sqnbitgemm_kernel_avx512_int8.h
sqnbitgemm_kernel_avx512_int8_blklen128.h
sqnbitgemm_kernel_avx512_int8_blklen16.h
sqnbitgemm_kernel_avx512_int8_blklen32.h
sqnbitgemm_kernel_avx512_int8_blklen64.h
sqnbitgemm_kernel_avx512vnni.cpp
sqnbitgemm_kernel_avx_common.h
sqnbitgemm_kernel_avx_common_int8.h
sqnbitgemm_m1_sym_kernel_avx2_int8_blklen32.h
sqnbitgemm_m1_sym_kernel_avx2_int8_blklen64.h
test
contrib_ops
matmul_4bits_test.cc
mlas
bench
bench_q4dq.cpp
bench_sqnbitgemm.cpp
unittest
test_sqnbitgemm.cpp
Loading comments...
Loading