vllm
bcf3c823 - Merge branch 'main' into woosuk-jf

Comment changes are shownComment changes are hidden
Commit
37 days ago
Merge branch 'main' into woosuk-jf
References
Author
  • .buildkite
    • File
      test-pipeline.yaml
  • File
    .gitignore
  • docs
    • File
      Makefile
    • source
      • api
        • engine
          • File
            async_llm_engine.md
          • File
            index.md
          • File
            llm_engine.md
        • File
          inference_params.md
        • model
          • File
            adapters.md
          • File
            index.md
          • File
            interfaces.md
          • File
            interfaces_base.md
        • multimodal
          • File
            index.md
          • File
            inputs.md
          • File
            parse.md
          • File
            processing.md
          • File
            profiling.md
          • File
            registry.md
        • offline_inference
          • File
            index.md
          • File
            llm.md
          • File
            llm_inputs.md
        • File
          summary.md
      • File
        autodoc2_docstring_parser.py
      • File
        conf.py
      • design
        • File
          arch_overview.md
      • features
        • File
          compatibility_matrix.md
      • File
        index.md
      • models
        • File
          generative_models.md
        • File
          pooling_models.md
      • serving
        • File
          offline_inference.md
  • examples/offline_inference
    • File
      profiling.py
  • requirements
    • File
      docs.txt
  • tests
    • File
      conftest.py
    • tokenization
      • File
        test_get_eos.py
    • File
      utils.py
    • v1/core
      • File
        test_scheduler.py
  • vllm
    • attention/backends
      • mla
        • File
          common.py
      • File
        utils.py
    • compilation
      • File
        compiler_interface.py
    • File
      config.py
    • File
      connections.py
    • distributed/kv_transfer
      • File
        __init__.py
    • engine
      • File
        arg_utils.py
      • File
        async_llm_engine.py
      • File
        llm_engine.py
      • multiprocessing
        • File
          client.py
        • File
          engine.py
      • output_processor
        • File
          multi_step.py
        • File
          single_step.py
    • entrypoints
      • File
        llm.py
      • openai
        • File
          protocol.py
        • File
          serving_engine.py
    • executor
      • File
        executor_base.py
    • inputs
      • File
        __init__.py
      • File
        data.py
      • File
        preprocess.py
      • File
        registry.py
    • File
      logger.py
    • lora/ops/triton_ops
      • File
        __init__.py
      • File
        lora_expand_op.py
      • File
        lora_shrink_op.py
    • model_executor
      • layers
        • File
          rejection_sampler.py
        • File
          sampler.py
        • File
          typical_acceptance_sampler.py
      • models
        • File
          blip2.py
        • File
          interfaces.py
        • File
          llava.py
        • File
          llava_next.py
        • File
          mistral3.py
        • File
          molmo.py
        • File
          phi4mm_utils.py
        • File
          pixtral.py
        • File
          qwen_vl.py
        • File
          registry.py
        • File
          utils.py
    • multimodal
      • File
        __init__.py
      • File
        base.py
      • File
        inputs.py
      • File
        parse.py
      • File
        processing.py
      • File
        profiling.py
      • File
        registry.py
      • File
        utils.py
    • platforms
      • File
        cpu.py
      • File
        cuda.py
      • File
        interface.py
    • profiler
      • File
        __init__.py
    • File
      sequence.py
    • spec_decode
      • File
        smaller_tp_proposer_worker.py
    • transformers_utils
      • configs
        • File
          dbrx.py
        • File
          exaone.py
      • File
        tokenizer.py
    • File
      utils.py
    • v1
      • attention/backends/mla
        • File
          common.py
      • core
        • File
          kv_cache_manager.py
      • engine
        • File
          output_processor.py
      • sample
        • File
          rejection_sampler.py
      • worker
        • File
          gpu_worker.py
        • File
          utils.py
    • worker
      • File
        hpu_worker.py
      • File
        multi_step_model_runner.py
      • File
        worker.py
      • File
        xpu_worker.py