llama.cpp
bc320ef6 - Merge branch 'master' into compilade/refactor-kv-cache

Commit
284 days ago
Merge branch 'master' into compilade/refactor-kv-cache
Author
Committer
  • .devops
    • File
      full-cuda.Dockerfile
    • File
      full-rocm.Dockerfile
    • File
      full.Dockerfile
    • File
      llama-cli-cann.Dockerfile
    • File
      llama-cli-cuda.Dockerfile
    • File
      llama-cli-intel.Dockerfile
    • File
      llama-cli-rocm.Dockerfile
    • File
      llama-cli-vulkan.Dockerfile
    • File
      llama-cli.Dockerfile
    • File
      llama-server-cuda.Dockerfile
    • File
      llama-server-intel.Dockerfile
    • File
      llama-server-rocm.Dockerfile
    • File
      llama-server-vulkan.Dockerfile
    • File
      llama-server.Dockerfile
    • nix
      • File
        apps.nix
      • File
        package.nix
    • File
      tools.sh
  • .ecrc
  • .github
    • File
      labeler.yml
    • workflows
      • bench.yml.disabled
      • File
        build.yml
      • File
        docker.yml
      • File
        python-check-requirements.yml
      • File
        python-type-check.yml
  • File
    .gitignore
  • File
    CMakeLists.txt
  • File
    CMakePresets.json
  • File
    CONTRIBUTING.md
  • File
    Makefile
  • File
    Package.swift
  • File
    README.md
  • ci
    • File
      run.sh
  • cmake
    • llama-config.cmake.in
  • common
    • File
      common.cpp
    • File
      common.h
    • File
      grammar-parser.cpp
    • File
      log.h
    • File
      ngram-cache.h
    • File
      sampling.cpp
    • File
      stb_image.h
  • File
    convert_hf_to_gguf.py
  • File
    convert_hf_to_gguf_update.py
  • File
    convert_llama_ggml_to_gguf.py
  • File
    convert_lora_to_gguf.py
  • docs
    • File
      android.md
    • backend
      • File
        BLIS.md
      • File
        CANN.md
      • File
        SYCL.md
    • File
      build.md
    • development
      • File
        HOWTO-add-model.md
      • File
        debugging-tests.md
      • llama-star
        • File
          idea-arch.key
        • File
          idea-arch.pdf
      • File
        token_generation_performance_tips.md
    • File
      docker.md
    • File
      install.md
  • examples
    • File
      CMakeLists.txt
    • baby-llama
      • File
        baby-llama.cpp
    • batched-bench
      • File
        batched-bench.cpp
    • batched.swift/Sources
      • File
        main.swift
    • batched
      • File
        batched.cpp
    • benchmark
      • File
        benchmark-matmult.cpp
    • File
      convert_legacy_llama.py
    • cvector-generator
      • File
        cvector-generator.cpp
    • deprecation-warning
      • File
        README.md
      • File
        deprecation-warning.cpp
    • embedding
      • File
        README.md
      • File
        embedding.cpp
    • eval-callback
      • File
        eval-callback.cpp
    • export-lora
      • File
        README.md
      • File
        export-lora.cpp
    • finetune
      • File
        CMakeLists.txt
      • File
        README.md
      • File
        convert-finetune-checkpoint-to-gguf.py
      • File
        finetune.cpp
      • File
        finetune.sh
    • gbnf-validator
      • File
        gbnf-validator.cpp
    • gguf-hash
      • File
        CMakeLists.txt
      • File
        README.md
      • deps
        • rotate-bits
          • File
            package.json
          • File
            rotate-bits.h
        • sha1
          • File
            package.json
          • File
            sha1.c
          • File
            sha1.h
        • sha256
          • File
            package.json
          • File
            sha256.c
          • File
            sha256.h
        • xxhash
          • File
            clib.json
          • File
            xxhash.c
          • File
            xxhash.h
      • File
        gguf-hash.cpp
    • gguf
      • File
        gguf.cpp
    • imatrix
      • File
        README.md
      • File
        imatrix.cpp
    • infill
      • File
        infill.cpp
    • File
      json_schema_pydantic_example.py
    • File
      json_schema_to_grammar.py
    • llama-bench
      • File
        llama-bench.cpp
    • llama.android/llama/src/main/cpp
      • File
        llama-android.cpp
    • llama.swiftui
      • llama.cpp.swift
        • File
          LibLlama.swift
      • llama.swiftui/Models
        • File
          LlamaState.swift
    • llava
      • File
        CMakeLists.txt
      • File
        MobileVLM-README.md
      • File
        README-minicpmv2.5.md
      • File
        README-minicpmv2.6.md
      • File
        README.md
      • File
        clip.cpp
      • File
        clip.h
      • File
        convert_image_encoder_to_gguf.py
      • File
        llava-cli.cpp
      • File
        llava.cpp
      • File
        llava.h
      • File
        llava_surgery.py
      • File
        llava_surgery_v2.py
      • File
        minicpmv-cli.cpp
      • File
        minicpmv-convert-image-encoder-to-gguf.py
      • File
        minicpmv-surgery.py
      • File
        requirements.txt
    • lookahead
      • File
        lookahead.cpp
    • lookup
      • File
        lookup-create.cpp
      • File
        lookup-stats.cpp
      • File
        lookup.cpp
    • main
      • File
        README.md
      • File
        main.cpp
    • parallel
      • File
        parallel.cpp
    • passkey
      • File
        README.md
    • perplexity
      • File
        perplexity.cpp
    • File
      pydantic-models-to-grammar-examples.py
    • File
      pydantic_models_to_grammar.py
    • File
      pydantic_models_to_grammar_examples.py
    • quantize-stats
      • File
        quantize-stats.cpp
    • quantize
      • File
        README.md
      • File
        quantize.cpp
    • File
      regex_to_grammar.py
    • retrieval
      • File
        retrieval.cpp
    • rpc
      • File
        README.md
      • File
        rpc-server.cpp
    • save-load-state
      • File
        save-load-state.cpp
    • server
      • File
        README.md
      • bench
        • File
          bench.py
      • public
        • File
          completion.js
        • File
          index-new.html
        • File
          index.html
        • File
          index.js
      • File
        server.cpp
      • tests
        • features
          • File
            lora.feature
          • steps
            • File
              steps.py
        • File
          requirements.txt
      • File
        utils.hpp
    • File
      server_embd.py
    • simple
      • File
        README.md
    • speculative
      • File
        speculative.cpp
    • sycl
      • File
        README.md
      • File
        win-run-llama2.bat
    • tokenize
      • File
        tokenize.cpp
    • train-text-from-scratch
      • File
        CMakeLists.txt
      • File
        README.md
      • File
        convert-train-checkpoint-to-gguf.py
      • File
        train-text-from-scratch.cpp
  • File
    flake.lock
  • ggml
    • File
      .gitignore
    • File
      CMakeLists.txt
    • File
      ggml_vk_generate_shaders.py
    • include
      • File
        ggml-alloc.h
      • File
        ggml-backend.h
      • File
        ggml-cann.h
      • File
        ggml-cuda.h
      • File
        ggml-metal.h
      • File
        ggml.h
    • src
      • File
        CMakeLists.txt
      • File
        ggml-aarch64.c
      • File
        ggml-aarch64.h
      • File
        ggml-alloc.c
      • File
        ggml-backend.c
      • File
        ggml-blas.cpp
      • File
        ggml-cann.cpp
      • ggml-cann
        • File
          .clang-format
        • Doxyfile
        • File
          acl_tensor.cpp
        • File
          acl_tensor.h
        • File
          aclnn_ops.cpp
        • File
          aclnn_ops.h
        • File
          common.h
        • kernels
          • File
            CMakeLists.txt
          • File
            ascendc_kernels.h
          • File
            dup.cpp
          • File
            get_row_f16.cpp
          • File
            get_row_f32.cpp
          • File
            get_row_q4_0.cpp
          • File
            get_row_q8_0.cpp
          • File
            quantize_f16_q8_0.cpp
          • File
            quantize_f32_q8_0.cpp
          • File
            quantize_float_to_q4_0.cpp
      • File
        ggml-common.h
      • File
        ggml-cuda.cu
      • ggml-cuda
        • File
          argsort.cu
        • File
          binbcast.cu
        • File
          binbcast.cuh
        • File
          common.cuh
        • File
          conv-transpose-1d.cu
        • File
          conv-transpose-1d.cuh
        • File
          cpy.cu
        • File
          cross-entropy-loss.cu
        • File
          cross-entropy-loss.cuh
        • File
          dmmv.cu
        • File
          dmmv.cuh
        • File
          fattn-common.cuh
        • File
          fattn-tile-f16.cu
        • File
          fattn-tile-f32.cu
        • File
          fattn-vec-f16.cuh
        • File
          fattn-vec-f32.cuh
        • File
          fattn-wmma-f16.cuh
        • File
          fattn.cu
        • File
          getrows.cu
        • File
          mma.cuh
        • File
          mmq.cu
        • File
          mmq.cuh
        • File
          mmvq.cu
        • File
          norm.cu
        • File
          quantize.cu
        • File
          quantize.cuh
        • File
          rope.cu
        • File
          sumrows.cu
        • File
          sumrows.cuh
        • template-instances
          • File
            generate_cu_files.py
          • File
            mmq-instance-iq1_s.cu
          • File
            mmq-instance-iq2_s.cu
          • File
            mmq-instance-iq2_xs.cu
          • File
            mmq-instance-iq2_xxs.cu
          • File
            mmq-instance-iq3_s.cu
          • File
            mmq-instance-iq3_xxs.cu
          • File
            mmq-instance-iq4_nl.cu
          • File
            mmq-instance-iq4_xs.cu
        • File
          unary.cu
        • File
          unary.cuh
        • File
          vecdotq.cuh
        • vendors
          • File
            cuda.h
          • File
            hip.h
          • File
            musa.h
      • File
        ggml-impl.h
      • File
        ggml-kompute.cpp
      • File
        ggml-metal.m
      • ggml-metal.metal
      • File
        ggml-quants.c
      • File
        ggml-quants.h
      • File
        ggml-rpc.cpp
      • File
        ggml-sycl.cpp
      • ggml-sycl
        • File
          backend.hpp
        • File
          common.cpp
        • File
          common.hpp
        • File
          concat.cpp
        • File
          concat.hpp
        • File
          conv.cpp
        • File
          conv.hpp
        • File
          convert.cpp
        • File
          convert.hpp
        • File
          dequantize.hpp
        • File
          dmmv.cpp
        • dpct
          • File
            helper.hpp
        • File
          gemm.hpp
        • File
          im2col.cpp
        • File
          im2col.hpp
        • File
          mmq.cpp
        • File
          mmvq.cpp
        • File
          norm.cpp
        • File
          presets.hpp
        • File
          rope.cpp
        • File
          softmax.cpp
        • File
          softmax.hpp
        • File
          tsembd.cpp
        • File
          tsembd.hpp
      • File
        ggml-vulkan-shaders.hpp
      • File
        ggml-vulkan.cpp
      • File
        ggml.c
      • kompute-shaders
        • File
          op_rope_f16.comp
        • File
          op_rope_f32.comp
        • File
          rope_common.comp
      • llamafile
        • File
          sgemm.cpp
        • File
          sgemm.h
      • vulkan-shaders
        • File
          CMakeLists.txt
        • File
          acc.comp
        • File
          add.comp
        • File
          clamp.comp
        • File
          concat.comp
        • File
          copy.comp
        • File
          cos.comp
        • File
          dequant_funcs.comp
        • File
          dequant_iq4_nl.comp
        • File
          dequant_q4_0.comp
        • File
          div.comp
        • File
          gelu.comp
        • File
          gelu_quick.comp
        • File
          generic_binary_head.comp
        • File
          generic_unary_head.comp
        • File
          group_norm.comp
        • File
          im2col.comp
        • File
          leaky_relu.comp
        • File
          mul.comp
        • File
          mul_mat_vec.comp
        • File
          mul_mat_vec_nc.comp
        • File
          mul_mat_vec_p021.comp
        • File
          mul_mat_vec_q2_k.comp
        • File
          mul_mat_vec_q3_k.comp
        • File
          mul_mat_vec_q4_k.comp
        • File
          mul_mat_vec_q5_k.comp
        • File
          mul_mat_vec_q6_k.comp
        • File
          mul_mm.comp
        • File
          norm.comp
        • File
          pad.comp
        • File
          relu.comp
        • File
          repeat.comp
        • File
          rms_norm.comp
        • File
          scale.comp
        • File
          silu.comp
        • File
          sin.comp
        • File
          soft_max.comp
        • File
          square.comp
        • File
          sum_rows.comp
        • File
          tanh.comp
        • File
          timestep_embedding.comp
        • File
          types.comp
        • File
          upscale.comp
        • File
          vulkan-shaders-gen.cpp
  • gguf-py
    • File
      README.md
    • examples
      • File
        writer.py
    • gguf
      • File
        __init__.py
      • File
        constants.py
      • File
        gguf_reader.py
      • File
        gguf_writer.py
      • File
        lazy.py
      • File
        metadata.py
      • File
        quants.py
      • File
        tensor_mapping.py
      • File
        utility.py
    • File
      pyproject.toml
    • scripts
      • File
        __init__.py
      • File
        gguf_convert_endian.py
      • File
        gguf_dump.py
      • File
        gguf_hash.py
      • File
        gguf_new_metadata.py
      • File
        gguf_set_metadata.py
    • tests
      • File
        __init__.py
      • File
        test_gguf.py
      • File
        test_metadata.py
      • File
        test_quants.py
  • grammars
    • File
      README.md
  • include
    • File
      llama.h
  • models
    • ggml-vocab-gpt2.gguf
    • ggml-vocab-stablelm.gguf
  • File
    pyrightconfig.json
  • File
    requirements.txt
  • requirements
    • File
      requirements-all.txt
    • File
      requirements-compare-llama-bench.txt
    • File
      requirements-convert_hf_to_gguf.txt
    • File
      requirements-convert_hf_to_gguf_update.txt
    • File
      requirements-convert_legacy_llama.txt
    • File
      requirements-convert_llama_ggml_to_gguf.txt
    • File
      requirements-convert_lora_to_gguf.txt
    • File
      requirements-pydantic.txt
    • File
      requirements-test-tokenizer-random.txt
  • scripts
    • File
      check-requirements.sh
    • File
      compare-llama-bench.py
    • File
      convert-gg.sh
    • File
      gen-unicode-data.py
    • File
      pod-llama.sh
    • File
      sync-ggml-am.sh
    • sync-ggml.last
    • File
      sync-ggml.sh
  • src
    • File
      CMakeLists.txt
    • File
      llama-grammar.cpp
    • File
      llama-grammar.h
    • File
      llama-impl.h
    • File
      llama-sampling.cpp
    • File
      llama-sampling.h
    • File
      llama-vocab.cpp
    • File
      llama-vocab.h
    • File
      llama.cpp
    • File
      unicode.cpp
    • File
      unicode.h
  • tests
    • File
      CMakeLists.txt
    • File
      test-backend-ops.cpp
    • File
      test-chat-template.cpp
    • File
      test-double-float.cpp
    • File
      test-grad0.cpp
    • File
      test-grammar-integration.cpp
    • File
      test-llama-grammar.cpp
    • File
      test-lora-conversion-inference.sh
    • File
      test-quantize-fns.cpp
    • File
      test-quantize-perf.cpp
    • File
      test-rope.cpp
    • File
      test-sampling.cpp
    • File
      test-tokenizer-0.cpp
    • File
      test-tokenizer-1-bpe.cpp
    • File
      test-tokenizer-1-spm.cpp
    • File
      test-tokenizer-random.py