llama.cpp
f3f65429 - llama : reorganize source code + improve CMake (#8006)

Commit
1 year ago
llama : reorganize source code + improve CMake (#8006) * scripts : update sync [no ci] * files : relocate [no ci] * ci : disable kompute build [no ci] * cmake : fixes [no ci] * server : fix mingw build ggml-ci * cmake : minor [no ci] * cmake : link math library [no ci] * cmake : build normal ggml library (not object library) [no ci] * cmake : fix kompute build ggml-ci * make,cmake : fix LLAMA_CUDA + replace GGML_CDEF_PRIVATE ggml-ci * move public backend headers to the public include directory (#8122) * move public backend headers to the public include directory * nix test * spm : fix metal header --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * scripts : fix sync paths [no ci] * scripts : sync ggml-blas.h [no ci] --------- Co-authored-by: slaren <slarengh@gmail.com>
Author
Parents
  • .devops/nix
    • File
      package.nix
  • .github
    • File
      labeler.yml
    • workflows
      • File
        bench.yml
      • File
        build.yml
      • File
        server.yml
  • File
    .gitignore
  • File
    .gitmodules
  • File
    CMakeLists.txt
  • File
    CMakePresets.json
  • File
    Makefile
  • File
    Package.swift
  • File
    README-sycl.md
  • File
    README.md
  • ci
    • File
      run.sh
  • cmake
    • File
      build-info.cmake
    • File
      git-vars.cmake
    • llama-config.cmake.in
  • common
    • File
      CMakeLists.txt
    • cmake
      • File
        build-info-gen-cpp.cmake
  • docs
    • File
      BLIS.md
  • examples
    • File
      CMakeLists.txt
    • imatrix
      • File
        README.md
    • llava
      • File
        MobileVLM-README.md
    • rpc
      • File
        README.md
    • server
      • File
        CMakeLists.txt
    • sycl
      • File
        build.sh
      • File
        win-build-sycl.bat
  • ggml
    • File
      CMakeLists.txt
    • cmake
      • File
        FindSIMD.cmake
    • File
      ggml_vk_generate_shaders.py
    • include
      • File
        ggml-alloc.h
      • File
        ggml-backend.h
      • File
        ggml-blas.h
      • File
        ggml-cuda.h
      • File
        ggml-kompute.h
      • File
        ggml-metal.h
      • File
        ggml-rpc.h
      • File
        ggml-sycl.h
      • File
        ggml-vulkan.h
      • File
        ggml.h
    • src
      • File
        CMakeLists.txt
      • File
        ggml-alloc.c
      • File
        ggml-backend-impl.h
      • File
        ggml-backend.c
      • File
        ggml-blas.cpp
      • File
        ggml-common.h
      • File
        ggml-cuda.cu
      • ggml-cuda
        • File
          acc.cu
        • File
          acc.cuh
        • File
          arange.cu
        • File
          arange.cuh
        • File
          argsort.cu
        • File
          argsort.cuh
        • File
          binbcast.cu
        • File
          binbcast.cuh
        • File
          clamp.cu
        • File
          clamp.cuh
        • File
          common.cuh
        • File
          concat.cu
        • File
          concat.cuh
        • File
          convert.cu
        • File
          convert.cuh
        • File
          cpy.cu
        • File
          cpy.cuh
        • File
          dequantize.cuh
        • File
          diagmask.cu
        • File
          diagmask.cuh
        • File
          dmmv.cu
        • File
          dmmv.cuh
        • File
          fattn-common.cuh
        • File
          fattn-tile-f16.cu
        • File
          fattn-tile-f16.cuh
        • File
          fattn-tile-f32.cu
        • File
          fattn-tile-f32.cuh
        • File
          fattn-vec-f16.cuh
        • File
          fattn-vec-f32.cuh
        • File
          fattn-wmma-f16.cuh
        • File
          fattn.cu
        • File
          fattn.cuh
        • File
          getrows.cu
        • File
          getrows.cuh
        • File
          im2col.cu
        • File
          im2col.cuh
        • File
          mma.cuh
        • File
          mmq.cu
        • File
          mmq.cuh
        • File
          mmvq.cu
        • File
          mmvq.cuh
        • File
          norm.cu
        • File
          norm.cuh
        • File
          pad.cu
        • File
          pad.cuh
        • File
          pool2d.cu
        • File
          pool2d.cuh
        • File
          quantize.cu
        • File
          quantize.cuh
        • File
          rope.cu
        • File
          rope.cuh
        • File
          scale.cu
        • File
          scale.cuh
        • File
          softmax.cu
        • File
          softmax.cuh
        • File
          sumrows.cu
        • File
          sumrows.cuh
        • template-instances
          • File
            fattn-vec-f16-instance-hs128-f16-f16.cu
          • File
            fattn-vec-f16-instance-hs128-f16-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-f16-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-f16-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-f16-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-f16-q8_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-f16.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-f16.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-f16.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-f16.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-f16.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
          • File
            fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
          • File
            fattn-vec-f16-instance-hs256-f16-f16.cu
          • File
            fattn-vec-f16-instance-hs64-f16-f16.cu
          • File
            fattn-vec-f16-instance-hs64-f16-q4_0.cu
          • File
            fattn-vec-f16-instance-hs64-f16-q4_1.cu
          • File
            fattn-vec-f16-instance-hs64-f16-q5_0.cu
          • File
            fattn-vec-f16-instance-hs64-f16-q5_1.cu
          • File
            fattn-vec-f16-instance-hs64-f16-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-f16-f16.cu
          • File
            fattn-vec-f32-instance-hs128-f16-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-f16-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-f16-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-f16-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-f16-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-f16.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-f16.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-f16.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-f16.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-f16.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
          • File
            fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
          • File
            fattn-vec-f32-instance-hs256-f16-f16.cu
          • File
            fattn-vec-f32-instance-hs64-f16-f16.cu
          • File
            fattn-vec-f32-instance-hs64-f16-q4_0.cu
          • File
            fattn-vec-f32-instance-hs64-f16-q4_1.cu
          • File
            fattn-vec-f32-instance-hs64-f16-q5_0.cu
          • File
            fattn-vec-f32-instance-hs64-f16-q5_1.cu
          • File
            fattn-vec-f32-instance-hs64-f16-q8_0.cu
          • File
            fattn-wmma-f16-instance-kqfloat-cpb16.cu
          • File
            fattn-wmma-f16-instance-kqfloat-cpb32.cu
          • File
            fattn-wmma-f16-instance-kqhalf-cpb16.cu
          • File
            fattn-wmma-f16-instance-kqhalf-cpb32.cu
          • File
            fattn-wmma-f16-instance-kqhalf-cpb8.cu
          • File
            generate_cu_files.py
          • File
            mmq-instance-q2_k.cu
          • File
            mmq-instance-q3_k.cu
          • File
            mmq-instance-q4_0.cu
          • File
            mmq-instance-q4_1.cu
          • File
            mmq-instance-q4_k.cu
          • File
            mmq-instance-q5_0.cu
          • File
            mmq-instance-q5_1.cu
          • File
            mmq-instance-q5_k.cu
          • File
            mmq-instance-q6_k.cu
          • File
            mmq-instance-q8_0.cu
        • File
          tsembd.cu
        • File
          tsembd.cuh
        • File
          unary.cu
        • File
          unary.cuh
        • File
          upscale.cu
        • File
          upscale.cuh
        • File
          vecdotq.cuh
      • File
        ggml-impl.h
      • File
        ggml-kompute.cpp
      • File
        ggml-metal.m
      • ggml-metal.metal
      • File
        ggml-quants.c
      • File
        ggml-quants.h
      • File
        ggml-rpc.cpp
      • File
        ggml-sycl.cpp
      • ggml-sycl
        • File
          backend.hpp
        • File
          common.cpp
        • File
          common.hpp
        • File
          convert.cpp
        • File
          convert.hpp
        • File
          dequantize.hpp
        • File
          dmmv.cpp
        • File
          dmmv.hpp
        • dpct
          • File
            helper.hpp
        • File
          mmq.cpp
        • File
          mmq.hpp
        • File
          mmvq.cpp
        • File
          mmvq.hpp
        • File
          presets.hpp
        • File
          vecdotq.hpp
      • File
        ggml-vulkan-shaders.hpp
      • File
        ggml-vulkan.cpp
      • File
        ggml.c
      • File
        kompute
      • kompute-shaders
        • File
          common.comp
        • File
          op_add.comp
        • File
          op_addrow.comp
        • File
          op_cpy_f16_f16.comp
        • File
          op_cpy_f16_f32.comp
        • File
          op_cpy_f32_f16.comp
        • File
          op_cpy_f32_f32.comp
        • File
          op_diagmask.comp
        • File
          op_gelu.comp
        • File
          op_getrows.comp
        • File
          op_getrows_f16.comp
        • File
          op_getrows_f32.comp
        • File
          op_getrows_q4_0.comp
        • File
          op_getrows_q4_1.comp
        • File
          op_getrows_q6_k.comp
        • File
          op_mul.comp
        • File
          op_mul_mat_f16.comp
        • File
          op_mul_mat_mat_f32.comp
        • File
          op_mul_mat_q4_0.comp
        • File
          op_mul_mat_q4_1.comp
        • File
          op_mul_mat_q6_k.comp
        • File
          op_mul_mat_q8_0.comp
        • File
          op_mul_mv_q_n.comp
        • File
          op_mul_mv_q_n_pre.comp
        • File
          op_norm.comp
        • File
          op_relu.comp
        • File
          op_rmsnorm.comp
        • File
          op_rope_f16.comp
        • File
          op_rope_f32.comp
        • File
          op_scale.comp
        • File
          op_scale_8.comp
        • File
          op_silu.comp
        • File
          op_softmax.comp
        • File
          rope_common.comp
      • File
        sgemm.cpp
      • File
        sgemm.h
      • vulkan-shaders
        • File
          add.comp
        • File
          argsort.comp
        • File
          clamp.comp
        • File
          copy.comp
        • File
          dequant_f32.comp
        • File
          dequant_funcs.comp
        • File
          dequant_head.comp
        • File
          dequant_q2_k.comp
        • File
          dequant_q3_k.comp
        • File
          dequant_q4_0.comp
        • File
          dequant_q4_1.comp
        • File
          dequant_q4_k.comp
        • File
          dequant_q5_0.comp
        • File
          dequant_q5_1.comp
        • File
          dequant_q5_k.comp
        • File
          dequant_q6_k.comp
        • File
          dequant_q8_0.comp
        • File
          diag_mask_inf.comp
        • File
          div.comp
        • File
          gelu.comp
        • File
          generic_binary_head.comp
        • File
          generic_head.comp
        • File
          generic_unary_head.comp
        • File
          get_rows.comp
        • File
          get_rows_quant.comp
        • File
          mul.comp
        • File
          mul_mat_split_k_reduce.comp
        • File
          mul_mat_vec.comp
        • File
          mul_mat_vec_base.comp
        • File
          mul_mat_vec_nc.comp
        • File
          mul_mat_vec_p021.comp
        • File
          mul_mat_vec_q2_k.comp
        • File
          mul_mat_vec_q3_k.comp
        • File
          mul_mat_vec_q4_k.comp
        • File
          mul_mat_vec_q5_k.comp
        • File
          mul_mat_vec_q6_k.comp
        • File
          mul_mm.comp
        • File
          norm.comp
        • File
          relu.comp
        • File
          rms_norm.comp
        • File
          rope_head.comp
        • File
          rope_neox.comp
        • File
          rope_norm.comp
        • File
          scale.comp
        • File
          silu.comp
        • File
          soft_max.comp
        • File
          square.comp
        • File
          sum_rows.comp
        • File
          types.comp
  • include
    • File
      llama.h
  • scripts
    • File
      build-info.sh
    • File
      compare-commits.sh
    • File
      debug-test.sh
    • File
      pod-llama.sh
    • File
      server-llm.sh
    • File
      sync-ggml-am.sh
    • File
      sync-ggml.sh
  • spm-headers
    • ggml-alloc.h
    • ggml-backend.h
    • ggml-metal.h
    • ggml.h
    • llama.h
  • src
    • File
      CMakeLists.txt
    • File
      llama.cpp
    • File
      unicode-data.cpp
    • File
      unicode-data.h
    • File
      unicode.cpp
    • File
      unicode.h
  • tests
    • File
      test-backend-ops.cpp