llama.cpp
bc320ef6
- Merge branch 'master' into compilade/refactor-kv-cache
Go
Login via GitHub
Home
Pricing
FAQ
Install
Login
via GitHub
Commit
View On
GitHub
Hide Minimap (CTRL+M)
Commit
284 days ago
Merge branch 'master' into compilade/refactor-kv-cache
References
#7531 - llama : support Jamba hybrid Transformer-Mamba models
Author
compilade
Committer
compilade
Parents
9b38f8bf
a47667cf
Files
395
.devops
full-cuda.Dockerfile
full-rocm.Dockerfile
full.Dockerfile
llama-cli-cann.Dockerfile
llama-cli-cuda.Dockerfile
llama-cli-intel.Dockerfile
llama-cli-rocm.Dockerfile
llama-cli-vulkan.Dockerfile
llama-cli.Dockerfile
llama-server-cuda.Dockerfile
llama-server-intel.Dockerfile
llama-server-rocm.Dockerfile
llama-server-vulkan.Dockerfile
llama-server.Dockerfile
nix
apps.nix
package.nix
tools.sh
.ecrc
.github
labeler.yml
workflows
bench.yml.disabled
build.yml
docker.yml
python-check-requirements.yml
python-type-check.yml
.gitignore
CMakeLists.txt
CMakePresets.json
CONTRIBUTING.md
Makefile
Package.swift
README.md
ci
run.sh
cmake
llama-config.cmake.in
common
common.cpp
common.h
grammar-parser.cpp
log.h
ngram-cache.h
sampling.cpp
stb_image.h
convert_hf_to_gguf.py
convert_hf_to_gguf_update.py
convert_llama_ggml_to_gguf.py
convert_lora_to_gguf.py
docs
android.md
backend
BLIS.md
CANN.md
SYCL.md
build.md
development
HOWTO-add-model.md
debugging-tests.md
llama-star
idea-arch.key
idea-arch.pdf
token_generation_performance_tips.md
docker.md
install.md
examples
CMakeLists.txt
baby-llama
baby-llama.cpp
batched-bench
batched-bench.cpp
batched.swift/Sources
main.swift
batched
batched.cpp
benchmark
benchmark-matmult.cpp
convert_legacy_llama.py
cvector-generator
cvector-generator.cpp
deprecation-warning
README.md
deprecation-warning.cpp
embedding
README.md
embedding.cpp
eval-callback
eval-callback.cpp
export-lora
README.md
export-lora.cpp
finetune
CMakeLists.txt
README.md
convert-finetune-checkpoint-to-gguf.py
finetune.cpp
finetune.sh
gbnf-validator
gbnf-validator.cpp
gguf-hash
CMakeLists.txt
README.md
deps
rotate-bits
package.json
rotate-bits.h
sha1
package.json
sha1.c
sha1.h
sha256
package.json
sha256.c
sha256.h
xxhash
clib.json
xxhash.c
xxhash.h
gguf-hash.cpp
gguf
gguf.cpp
imatrix
README.md
imatrix.cpp
infill
infill.cpp
json_schema_pydantic_example.py
json_schema_to_grammar.py
llama-bench
llama-bench.cpp
llama.android/llama/src/main/cpp
llama-android.cpp
llama.swiftui
llama.cpp.swift
LibLlama.swift
llama.swiftui/Models
LlamaState.swift
llava
CMakeLists.txt
MobileVLM-README.md
README-minicpmv2.5.md
README-minicpmv2.6.md
README.md
clip.cpp
clip.h
convert_image_encoder_to_gguf.py
llava-cli.cpp
llava.cpp
llava.h
llava_surgery.py
llava_surgery_v2.py
minicpmv-cli.cpp
minicpmv-convert-image-encoder-to-gguf.py
minicpmv-surgery.py
requirements.txt
lookahead
lookahead.cpp
lookup
lookup-create.cpp
lookup-stats.cpp
lookup.cpp
main
README.md
main.cpp
parallel
parallel.cpp
passkey
README.md
perplexity
perplexity.cpp
pydantic-models-to-grammar-examples.py
pydantic_models_to_grammar.py
pydantic_models_to_grammar_examples.py
quantize-stats
quantize-stats.cpp
quantize
README.md
quantize.cpp
regex_to_grammar.py
retrieval
retrieval.cpp
rpc
README.md
rpc-server.cpp
save-load-state
save-load-state.cpp
server
README.md
bench
bench.py
public
completion.js
index-new.html
index.html
index.js
server.cpp
tests
features
lora.feature
steps
steps.py
requirements.txt
utils.hpp
server_embd.py
simple
README.md
speculative
speculative.cpp
sycl
README.md
win-run-llama2.bat
tokenize
tokenize.cpp
train-text-from-scratch
CMakeLists.txt
README.md
convert-train-checkpoint-to-gguf.py
train-text-from-scratch.cpp
flake.lock
ggml
.gitignore
CMakeLists.txt
ggml_vk_generate_shaders.py
include
ggml-alloc.h
ggml-backend.h
ggml-cann.h
ggml-cuda.h
ggml-metal.h
ggml.h
src
CMakeLists.txt
ggml-aarch64.c
ggml-aarch64.h
ggml-alloc.c
ggml-backend.c
ggml-blas.cpp
ggml-cann.cpp
ggml-cann
.clang-format
Doxyfile
acl_tensor.cpp
acl_tensor.h
aclnn_ops.cpp
aclnn_ops.h
common.h
kernels
CMakeLists.txt
ascendc_kernels.h
dup.cpp
get_row_f16.cpp
get_row_f32.cpp
get_row_q4_0.cpp
get_row_q8_0.cpp
quantize_f16_q8_0.cpp
quantize_f32_q8_0.cpp
quantize_float_to_q4_0.cpp
ggml-common.h
ggml-cuda.cu
ggml-cuda
argsort.cu
binbcast.cu
binbcast.cuh
common.cuh
conv-transpose-1d.cu
conv-transpose-1d.cuh
cpy.cu
cross-entropy-loss.cu
cross-entropy-loss.cuh
dmmv.cu
dmmv.cuh
fattn-common.cuh
fattn-tile-f16.cu
fattn-tile-f32.cu
fattn-vec-f16.cuh
fattn-vec-f32.cuh
fattn-wmma-f16.cuh
fattn.cu
getrows.cu
mma.cuh
mmq.cu
mmq.cuh
mmvq.cu
norm.cu
quantize.cu
quantize.cuh
rope.cu
sumrows.cu
sumrows.cuh
template-instances
generate_cu_files.py
mmq-instance-iq1_s.cu
mmq-instance-iq2_s.cu
mmq-instance-iq2_xs.cu
mmq-instance-iq2_xxs.cu
mmq-instance-iq3_s.cu
mmq-instance-iq3_xxs.cu
mmq-instance-iq4_nl.cu
mmq-instance-iq4_xs.cu
unary.cu
unary.cuh
vecdotq.cuh
vendors
cuda.h
hip.h
musa.h
ggml-impl.h
ggml-kompute.cpp
ggml-metal.m
ggml-metal.metal
ggml-quants.c
ggml-quants.h
ggml-rpc.cpp
ggml-sycl.cpp
ggml-sycl
backend.hpp
common.cpp
common.hpp
concat.cpp
concat.hpp
conv.cpp
conv.hpp
convert.cpp
convert.hpp
dequantize.hpp
dmmv.cpp
dpct
helper.hpp
gemm.hpp
im2col.cpp
im2col.hpp
mmq.cpp
mmvq.cpp
norm.cpp
presets.hpp
rope.cpp
softmax.cpp
softmax.hpp
tsembd.cpp
tsembd.hpp
ggml-vulkan-shaders.hpp
ggml-vulkan.cpp
ggml.c
kompute-shaders
op_rope_f16.comp
op_rope_f32.comp
rope_common.comp
llamafile
sgemm.cpp
sgemm.h
vulkan-shaders
CMakeLists.txt
acc.comp
add.comp
clamp.comp
concat.comp
copy.comp
cos.comp
dequant_funcs.comp
dequant_iq4_nl.comp
dequant_q4_0.comp
div.comp
gelu.comp
gelu_quick.comp
generic_binary_head.comp
generic_unary_head.comp
group_norm.comp
im2col.comp
leaky_relu.comp
mul.comp
mul_mat_vec.comp
mul_mat_vec_nc.comp
mul_mat_vec_p021.comp
mul_mat_vec_q2_k.comp
mul_mat_vec_q3_k.comp
mul_mat_vec_q4_k.comp
mul_mat_vec_q5_k.comp
mul_mat_vec_q6_k.comp
mul_mm.comp
norm.comp
pad.comp
relu.comp
repeat.comp
rms_norm.comp
scale.comp
silu.comp
sin.comp
soft_max.comp
square.comp
sum_rows.comp
tanh.comp
timestep_embedding.comp
types.comp
upscale.comp
vulkan-shaders-gen.cpp
gguf-py
README.md
examples
writer.py
gguf
__init__.py
constants.py
gguf_reader.py
gguf_writer.py
lazy.py
metadata.py
quants.py
tensor_mapping.py
utility.py
pyproject.toml
scripts
__init__.py
gguf_convert_endian.py
gguf_dump.py
gguf_hash.py
gguf_new_metadata.py
gguf_set_metadata.py
tests
__init__.py
test_gguf.py
test_metadata.py
test_quants.py
grammars
README.md
include
llama.h
models
ggml-vocab-gpt2.gguf
ggml-vocab-stablelm.gguf
pyrightconfig.json
requirements.txt
requirements
requirements-all.txt
requirements-compare-llama-bench.txt
requirements-convert_hf_to_gguf.txt
requirements-convert_hf_to_gguf_update.txt
requirements-convert_legacy_llama.txt
requirements-convert_llama_ggml_to_gguf.txt
requirements-convert_lora_to_gguf.txt
requirements-pydantic.txt
requirements-test-tokenizer-random.txt
scripts
check-requirements.sh
compare-llama-bench.py
convert-gg.sh
gen-unicode-data.py
pod-llama.sh
sync-ggml-am.sh
sync-ggml.last
sync-ggml.sh
src
CMakeLists.txt
llama-grammar.cpp
llama-grammar.h
llama-impl.h
llama-sampling.cpp
llama-sampling.h
llama-vocab.cpp
llama-vocab.h
llama.cpp
unicode.cpp
unicode.h
tests
CMakeLists.txt
test-backend-ops.cpp
test-chat-template.cpp
test-double-float.cpp
test-grad0.cpp
test-grammar-integration.cpp
test-llama-grammar.cpp
test-lora-conversion-inference.sh
test-quantize-fns.cpp
test-quantize-perf.cpp
test-rope.cpp
test-sampling.cpp
test-tokenizer-0.cpp
test-tokenizer-1-bpe.cpp
test-tokenizer-1-spm.cpp
test-tokenizer-random.py
Loading