vllm
bcf3c823
- Merge branch 'main' into woosuk-jf
Go
Login via GitHub
Home
Pricing
FAQ
Install
Login
via GitHub
Commit
View On
GitHub
Hide Minimap (CTRL+M)
Commit
37 days ago
Merge branch 'main' into woosuk-jf
References
woosuk-jf
Author
WoosukKwon
Parents
a01af39a
2858830c
Files
102
.buildkite
test-pipeline.yaml
.gitignore
docs
Makefile
source
api
engine
async_llm_engine.md
index.md
llm_engine.md
inference_params.md
model
adapters.md
index.md
interfaces.md
interfaces_base.md
multimodal
index.md
inputs.md
parse.md
processing.md
profiling.md
registry.md
offline_inference
index.md
llm.md
llm_inputs.md
summary.md
autodoc2_docstring_parser.py
conf.py
design
arch_overview.md
features
compatibility_matrix.md
index.md
models
generative_models.md
pooling_models.md
serving
offline_inference.md
examples/offline_inference
profiling.py
requirements
docs.txt
tests
conftest.py
tokenization
test_get_eos.py
utils.py
v1/core
test_scheduler.py
vllm
attention/backends
mla
common.py
utils.py
compilation
compiler_interface.py
config.py
connections.py
distributed/kv_transfer
__init__.py
engine
arg_utils.py
async_llm_engine.py
llm_engine.py
multiprocessing
client.py
engine.py
output_processor
multi_step.py
single_step.py
entrypoints
llm.py
openai
protocol.py
serving_engine.py
executor
executor_base.py
inputs
__init__.py
data.py
preprocess.py
registry.py
logger.py
lora/ops/triton_ops
__init__.py
lora_expand_op.py
lora_shrink_op.py
model_executor
layers
rejection_sampler.py
sampler.py
typical_acceptance_sampler.py
models
blip2.py
interfaces.py
llava.py
llava_next.py
mistral3.py
molmo.py
phi4mm_utils.py
pixtral.py
qwen_vl.py
registry.py
utils.py
multimodal
__init__.py
base.py
inputs.py
parse.py
processing.py
profiling.py
registry.py
utils.py
platforms
cpu.py
cuda.py
interface.py
profiler
__init__.py
sequence.py
spec_decode
smaller_tp_proposer_worker.py
transformers_utils
configs
dbrx.py
exaone.py
tokenizer.py
utils.py
v1
attention/backends/mla
common.py
core
kv_cache_manager.py
engine
output_processor.py
sample
rejection_sampler.py
worker
gpu_worker.py
utils.py
worker
hpu_worker.py
multi_step_model_runner.py
worker.py
xpu_worker.py
Loading