Merge branch 'main' into pd_disagg/feat

googs1025 · web-flow · commit 9f43e3d26346 · 2025-11-25T13:02:36.000+08:00
diff --git a/README.md b/README.md
@@ -62,10 +62,10 @@ kubectl apply -k config/default
 Install stable distribution
 ```shell
 # Install component dependencies
-kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.4.0/aibrix-dependency-v0.4.0.yaml" --server-side
+kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.5.0/aibrix-dependency-v0.5.0.yaml" --server-side
 
 # Install aibrix components
-kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.4.0/aibrix-core-v0.4.0.yaml"
+kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.5.0/aibrix-core-v0.5.0.yaml"
 ```
 
 ## Documentation
diff --git a/build/container/Dockerfile.sglang b/build/container/Dockerfile.sglang
@@ -0,0 +1,47 @@
+ARG SGLANG_VERSION=v0.5.5.post3
+
+# Extract torch version from the SGLang base image
+FROM lmsysorg/sglang:${SGLANG_VERSION} AS torch-version
+ARG PYTHON_BIN=python3
+RUN ${PYTHON_BIN} -m pip show torch | awk '/Version/{print $2}' | sed 's/+.*$//' > /torch_version.txt
+
+# Builder stage
+FROM docker.io/pytorch/manylinux2_28-builder:cuda12.1 AS builder
+
+ARG AIBRIX_REPO=https://github.com/vllm-project/aibrix
+ARG AIBRIX_BRANCH=v0.5.0
+ARG PYTHON_BIN=python
+
+ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
+
+# checkout codebase
+RUN git clone ${AIBRIX_REPO} /tmp/aibrix && cd /tmp/aibrix && git checkout ${AIBRIX_BRANCH}
+
+# install dependencies
+# use the same torch version as sglang
+COPY --from=torch-version /torch_version.txt /tmp/torch_version.txt
+RUN --mount=type=cache,target=/root/.cache/pip ${PYTHON_BIN} -m pip install torch==$(cat /tmp/torch_version.txt) && rm /tmp/torch_version.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd /tmp/aibrix && \
+    ${PYTHON_BIN} -m pip install -r python/aibrix_kvcache/requirements/build.txt -r python/aibrix_kvcache/requirements/core.txt
+
+# build aibrix_kvcache
+RUN cd /tmp/aibrix && \
+    ${PYTHON_BIN} -m build python/aibrix_kvcache --wheel --outdir=python/aibrix_kvcache/dist --no-isolation
+
+# Runtime stage
+FROM lmsysorg/sglang:${SGLANG_VERSION} AS sglang
+
+ARG PYTHON_BIN=python3
+ARG NIXL_VERSION=0.7.1
+
+COPY --from=builder /tmp/aibrix /tmp/aibrix
+
+RUN ${PYTHON_BIN} -m pip uninstall -y aibrix_kvcache && \
+    ${PYTHON_BIN} -m pip install /tmp/aibrix/python/aibrix_kvcache/dist/*.whl
+
+RUN rm -rf /tmp/aibrix
+
+RUN pip install nixl==${NIXL_VERSION} nixl-cu12==${NIXL_VERSION}
+RUN apt install iproute2 perftest ucx-utils iputils-ping net-tools -y
+
+WORKDIR /sgl-workspace/sglang
diff --git a/build/container/Dockerfile.vllm b/build/container/Dockerfile.vllm
@@ -0,0 +1,53 @@
+ARG VLLM_VERSION=v0.10.2
+
+# Extract torch version from the vLLM base image
+FROM vllm/vllm-openai:${VLLM_VERSION} AS torch-version
+ARG PYTHON_BIN=python3
+RUN ${PYTHON_BIN} -m pip show torch | awk '/Version/{print $2}' | sed 's/+.*$//' > /torch_version.txt
+
+# Builder stage
+FROM docker.io/pytorch/manylinux2_28-builder:cuda12.1 AS builder
+
+ARG AIBRIX_REPO=https://github.com/vllm-project/aibrix
+ARG AIBRIX_BRANCH=v0.5.0
+ARG PYTHON_BIN=python
+
+ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
+
+# checkout codebase
+RUN git clone ${AIBRIX_REPO} /tmp/aibrix && cd /tmp/aibrix && git checkout ${AIBRIX_BRANCH}
+
+# install dependencies
+# use the same torch version as vllm
+COPY --from=torch-version /torch_version.txt /tmp/torch_version.txt
+RUN --mount=type=cache,target=/root/.cache/pip ${PYTHON_BIN} -m pip install torch==$(cat /tmp/torch_version.txt) && rm /tmp/torch_version.txt
+RUN --mount=type=cache,target=/root/.cache/pip cd /tmp/aibrix && \
+    ${PYTHON_BIN} -m pip install -r python/aibrix_kvcache/requirements/build.txt -r python/aibrix_kvcache/requirements/core.txt
+
+# build aibrix_kvcache
+RUN cd /tmp/aibrix && \
+    ${PYTHON_BIN} -m build python/aibrix_kvcache --wheel --outdir=python/aibrix_kvcache/dist --no-isolation
+
+# Runtime stage
+FROM vllm/vllm-openai:${VLLM_VERSION} AS vllm-openai
+
+ARG PYTHON_BIN=python3
+ARG VLLM_VERSION=v0.10.2
+ARG NIXL_VERSION=0.7.1
+
+COPY --from=builder /tmp/aibrix /tmp/aibrix
+
+RUN ${PYTHON_BIN} -m pip uninstall -y aibrix_kvcache && \
+    ${PYTHON_BIN} -m pip install /tmp/aibrix/python/aibrix_kvcache/dist/*.whl
+
+# apply patch to vLLM
+RUN DIST_DIR=$(${PYTHON_BIN} -m pip show vllm | grep "Location:" | awk '{print $2}') && \
+    cd $DIST_DIR && \
+    patch -p 1 -l -i /tmp/aibrix/python/aibrix_kvcache/integration/vllm/patches/vllm_${VLLM_VERSION}-aibrix-kvcache.patch
+
+RUN rm -rf /tmp/aibrix
+
+RUN pip install nixl==${NIXL_VERSION} nixl-cu12==${NIXL_VERSION}
+RUN apt install iproute2 perftest ucx-utils iputils-ping net-tools -y
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/build/container/README.md b/build/container/README.md
@@ -0,0 +1,96 @@
+# AIBrix KVCache-Enabled vLLM & SGLang Images
+
+This directory contains Dockerfiles that build **vLLM** and **SGLang** images
+enhanced with AIBrix capabilities:
+
+- **aibrix_kvcache** - Built from source for KV cache disaggregation
+- **nixl + nixl-cu12** - UCX-based high-performance networking libraries
+- **UCX tooling** - Pre-installed debugging and performance testing utilities
+
+## Image Naming Convention
+
+**Upstream vs. AIBrix Images:**
+
+| Upstream Image | AIBrix Enhanced Image                                          | Description |
+|----------------|----------------------------------------------------------------|-------------|
+| `vllm/vllm-openai:v0.10.2` | `aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123` | vLLM base + AIBrix KVCache + UCX/NIXL networking |
+| `lmsysorg/sglang:v0.5.5.post3` | `aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123` | SGLang base + AIBrix KVCache + UCX/NIXL networking |
+
+**AIBrix images** extend upstream inference engines with:
+- Distributed KV cache support via `aibrix_kvcache`
+- RDMA-capable networking through NIXL/UCX for disaggregated inference
+- Compatible torch versions automatically derived from base images
+
+## Compatibility Matrix
+
+Default build arguments produce the following component versions:
+
+| Component | vLLM Image | SGLang Image |
+|-----------|-----------|--------------|
+| Engine version | v0.10.2 | v0.5.5.post3 |
+| Torch version | 2.8 | 2.9  |
+| aibrix_kvcache | v0.5.0 | v0.5.0 |
+| NIXL / CUDA plugin | 0.7.1 | 0.7.1 |
+| UCX | 1.19.0 | 1.19.0 |
+
+**Version Compatibility:**
+- Torch version is automatically extracted from the upstream base image to ensure compatibility
+- AIBrix KVCache is built against the exact torch version from the base image
+- NIXL and UCX versions are pinned for stable RDMA networking
+
+## Building the Images
+
+### vLLM Image
+
+```bash
+docker build \
+  -f Dockerfile.vllm \
+  --build-arg VLLM_VERSION=v0.10.2 \
+  --build-arg AIBRIX_BRANCH=v0.5.0 \
+  --build-arg NIXL_VERSION=0.7.1 \
+  -t aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-$(date +'%Y%m%d') \
+  .
+```
+
+### SGLang Image
+
+```bash
+docker build \
+  -f Dockerfile.sglang \
+  --build-arg SGLANG_VERSION=v0.5.5.post3 \
+  --build-arg AIBRIX_BRANCH=v0.5.0 \
+  --build-arg NIXL_VERSION=0.7.1 \
+  -t aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-$(date +'%Y%m%d') \
+  .
+```
+
+### Build Arguments
+
+All build arguments are optional and have sensible defaults:
+
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `VLLM_VERSION` | `v0.10.2` | vLLM upstream version to use as base |
+| `SGLANG_VERSION` | `v0.5.5.post3` | SGLang upstream version to use as base |
+| `AIBRIX_BRANCH` | `v0.5.0` | AIBrix release tag or branch to build from |
+| `NIXL_VERSION` | `0.7.1` | NIXL networking library version |
+| `AIBRIX_REPO` | `https://github.com/vllm-project/aibrix` | AIBrix repository URL |
+
+## Release History
+
+AIBrix maintains stable image releases with tested component combinations:
+
+### v0.5.0 (Current)
+
+| Component      | vLLM    | SGLang       | Notes                           |
+|----------------|---------|--------------|---------------------------------|
+| Engine         | v0.10.2 | v0.5.5.post3 | Stable inference engines        |
+| CUDA           | 12.8    | 12.9         | CUDA Version                    |
+| Torch          | 2.8     | 2.9          | PyTorch Version                 |
+| AIBrix KVCache | v0.5.0  | v0.5.0       | KV cache disaggregation support |
+| NIXL           | 0.7.1   | 0.7.1        | UCX-based RDMA networking       |
+| UCX            | 1.19.0  | 1.19.0       | Pre-installed for debugging     |
+
+**Recommended Tags:**
+- `aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123`
+- `aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123`
diff --git a/docs/source/getting_started/container-images.rst b/docs/source/getting_started/container-images.rst
@@ -0,0 +1,149 @@
+.. _container-images:
+
+=======================
+AIBrix Container Images
+=======================
+
+Overview
+--------
+
+AIBrix provides enhanced container images for **vLLM** and **SGLang** that include additional capabilities for distributed inference and KV cache disaggregation:
+
+- **aibrix_kvcache** - Built from source for KV cache disaggregation support
+- **nixl + nixl-cu12** - UCX-based high-performance networking libraries for RDMA
+- **UCX tooling** - Pre-installed debugging and performance testing utilities
+
+Image Naming Convention
+-----------------------
+
+AIBrix images extend upstream inference engines with additional capabilities:
+
+.. list-table:: Upstream vs. AIBrix Images
+   :header-rows: 1
+   :widths: 40 40 20
+
+   * - Upstream Image
+     - AIBrix Enhanced Image
+     - Use Case
+   * - ``vllm/vllm-openai:v0.10.2``
+     - ``aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123``
+     - vLLM + KVCache + RDMA
+   * - ``lmsysorg/sglang:v0.5.5.post3``
+     - ``aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123``
+     - SGLang + KVCache + RDMA
+
+When to Use AIBrix Images
+--------------------------
+
+Use **AIBrix-enhanced images** when you need:
+
+- **KV Cache Offloading**: KV cache offload to Host memory or remote storage
+- **Prefill-Decode Disaggregation**: Separate prefill and decode workloads via NIXL
+
+Use **upstream images** for:
+
+- Standard single-node inference without disaggregation
+- Development and testing without specialized networking
+
+Compatibility Matrix
+--------------------
+
+The following table shows tested component versions for AIBrix v0.5.0:
+
+.. list-table:: Component Versions
+   :header-rows: 1
+   :widths: 25 20 20 35
+
+   * - Component
+     - vLLM Image
+     - SGLang Image
+     - Notes
+   * - Engine Version
+     - v0.10.2
+     - v0.5.5.post3
+     - Stable inference engines
+   * - CUDA Version
+     - 12.8
+     - 12.9
+     - CUDA toolkit version
+   * - PyTorch Version
+     - 2.8
+     - 2.9
+     - Auto-detected from base image
+   * - AIBrix KVCache
+     - v0.5.0
+     - v0.5.0
+     - KV cache disaggregation support
+   * - NIXL Version
+     - 0.7.1
+     - 0.7.1
+     - UCX-based RDMA networking
+   * - UCX Version
+     - 1.19.0
+     - 1.19.0
+     - Unified Communication X
+
+.. note::
+   PyTorch version is automatically extracted from the upstream base image to ensure compatibility.
+   AIBrix KVCache is built against the exact PyTorch version from the base image.
+
+Released Images (v0.5.0)
+------------------------
+
+The following pre-built images are available for immediate use:
+
+**vLLM Image:**
+
+.. code-block:: bash
+
+   docker pull aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123
+
+**SGLang Image:**
+
+.. code-block:: bash
+
+   docker pull aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123
+
+Building Custom Images
+-----------------------
+
+For detailed build instructions and troubleshooting, see `build/container/README.md <https://github.com/vllm-project/aibrix/blob/main/build/container/README.md>`_.
+
+Version History
+---------------
+
+v0.5.0
+~~~~~~
+
+- **vLLM**: v0.10.2 with CUDA 12.8, PyTorch 2.8
+- **SGLang**: v0.5.5.post3 with CUDA 12.9, PyTorch 2.9
+- **AIBrix KVCache**: v0.5.0
+- **NIXL**: 0.7.1
+- **UCX**: 1.19.0
+
+Features:
+
+- Full KV cache offloading support
+- RDMA networking for distributed inference
+- Prefill-Decode disaggregation support
+
+Troubleshooting
+---------------
+
+Performance Issues
+~~~~~~~~~~~~~~~~~~
+
+For RDMA networking issues:
+
+1. Verify RDMA devices are available: ``ibv_devices``
+2. Check UCX configuration: ``ucx_info -d``
+3. Test RDMA bandwidth: ``ib_write_bw``
+4. Ensure security policies allow RDMA access
+
+For debugging utilities included in the image, run:
+
+.. code-block:: bash
+
+   kubectl exec -it <pod-name> -- ucx_info -d
+   kubectl exec -it <pod-name> -- ibv_devices
+   kubectl exec -it <pod-name> -- ib_write_bw
diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst
@@ -57,7 +57,7 @@ Save yaml as `pd-model.yaml` and run `kubectl apply -f pd-model.yaml`.
 
 
 .. note::
-   We use a custom vLLM image with NIXL support. For detailed information about the image build process, see: [aibrix vllm pd-image build](https://github.com/vllm-project/aibrix/blob/main/samples/disaggregation/vllm/README.md)
+   We use an AIBrix-enhanced vLLM image with KVCache and NIXL support for disaggregated inference. For detailed information about available images, compatibility, and build instructions, see :ref:`container-images`.
 
 Invoke the model endpoint using gateway API
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -29,7 +29,7 @@ Documentation
    :caption: Getting Started
 
    getting_started/quickstart.rst
-   getting_started/advanced-k8s-examples.rst
+   getting_started/container-images.rst
    getting_started/installation/installation.rst
    getting_started/faq.rst
 
diff --git a/pkg/controller/podset/podset_controller.go b/pkg/controller/podset/podset_controller.go
@@ -359,7 +359,7 @@ func (r *PodSetReconciler) createPodFromTemplate(podSet *orchestrationv1alpha1.P
 	}
 
 	// Set pod name
-	pod.Name = fmt.Sprintf("%s-%d", podSet.Name, podIndex)
+	pod.Name = utils.Shorten(fmt.Sprintf("%s-%d", podSet.Name, podIndex), false, false)
 	pod.Namespace = podSet.Namespace
 
 	// Add labels
diff --git a/pkg/controller/roleset/podset_rollsyncer.go b/pkg/controller/roleset/podset_rollsyncer.go
diff --git a/pkg/controller/stormservice/rolesetoperations.go b/pkg/controller/stormservice/rolesetoperations.go
diff --git a/pkg/controller/util/orchestration/util.go b/pkg/controller/util/orchestration/util.go
diff --git a/pkg/controller/util/orchestration/util_test.go b/pkg/controller/util/orchestration/util_test.go

Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,7 @@ func (r PodSetReconciler) createPodFromTemplate(podSet orchestrationv1alpha1.P`
`359`	`359`	`}`
`360`	`360`
`361`	`361`	`// Set pod name`
`362`		`- pod.Name = fmt.Sprintf("%s-%d", podSet.Name, podIndex)`
	`362`	`+ pod.Name = utils.Shorten(fmt.Sprintf("%s-%d", podSet.Name, podIndex), false, false)`
`363`	`363`	`pod.Namespace = podSet.Namespace`
`364`	`364`
`365`	`365`	`// Add labels`