Skip to content

Commit 9f43e3d

Browse files
authored
Merge branch 'main' into pd_disagg/feat
2 parents c7e389d + 42e843f commit 9f43e3d

File tree

12 files changed

+382
-8
lines changed

12 files changed

+382
-8
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ kubectl apply -k config/default
6262
Install stable distribution
6363
```shell
6464
# Install component dependencies
65-
kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.4.0/aibrix-dependency-v0.4.0.yaml" --server-side
65+
kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.5.0/aibrix-dependency-v0.5.0.yaml" --server-side
6666

6767
# Install aibrix components
68-
kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.4.0/aibrix-core-v0.4.0.yaml"
68+
kubectl apply -f "https://github.com/vllm-project/aibrix/releases/download/v0.5.0/aibrix-core-v0.5.0.yaml"
6969
```
7070

7171
## Documentation

build/container/Dockerfile.sglang

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
ARG SGLANG_VERSION=v0.5.5.post3
2+
3+
# Extract torch version from the SGLang base image
4+
FROM lmsysorg/sglang:${SGLANG_VERSION} AS torch-version
5+
ARG PYTHON_BIN=python3
6+
RUN ${PYTHON_BIN} -m pip show torch | awk '/Version/{print $2}' | sed 's/+.*$//' > /torch_version.txt
7+
8+
# Builder stage
9+
FROM docker.io/pytorch/manylinux2_28-builder:cuda12.1 AS builder
10+
11+
ARG AIBRIX_REPO=https://github.com/vllm-project/aibrix
12+
ARG AIBRIX_BRANCH=v0.5.0
13+
ARG PYTHON_BIN=python
14+
15+
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
16+
17+
# checkout codebase
18+
RUN git clone ${AIBRIX_REPO} /tmp/aibrix && cd /tmp/aibrix && git checkout ${AIBRIX_BRANCH}
19+
20+
# install dependencies
21+
# use the same torch version as sglang
22+
COPY --from=torch-version /torch_version.txt /tmp/torch_version.txt
23+
RUN --mount=type=cache,target=/root/.cache/pip ${PYTHON_BIN} -m pip install torch==$(cat /tmp/torch_version.txt) && rm /tmp/torch_version.txt
24+
RUN --mount=type=cache,target=/root/.cache/pip cd /tmp/aibrix && \
25+
${PYTHON_BIN} -m pip install -r python/aibrix_kvcache/requirements/build.txt -r python/aibrix_kvcache/requirements/core.txt
26+
27+
# build aibrix_kvcache
28+
RUN cd /tmp/aibrix && \
29+
${PYTHON_BIN} -m build python/aibrix_kvcache --wheel --outdir=python/aibrix_kvcache/dist --no-isolation
30+
31+
# Runtime stage
32+
FROM lmsysorg/sglang:${SGLANG_VERSION} AS sglang
33+
34+
ARG PYTHON_BIN=python3
35+
ARG NIXL_VERSION=0.7.1
36+
37+
COPY --from=builder /tmp/aibrix /tmp/aibrix
38+
39+
RUN ${PYTHON_BIN} -m pip uninstall -y aibrix_kvcache && \
40+
${PYTHON_BIN} -m pip install /tmp/aibrix/python/aibrix_kvcache/dist/*.whl
41+
42+
RUN rm -rf /tmp/aibrix
43+
44+
RUN pip install nixl==${NIXL_VERSION} nixl-cu12==${NIXL_VERSION}
45+
RUN apt install iproute2 perftest ucx-utils iputils-ping net-tools -y
46+
47+
WORKDIR /sgl-workspace/sglang

build/container/Dockerfile.vllm

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
ARG VLLM_VERSION=v0.10.2
2+
3+
# Extract torch version from the vLLM base image
4+
FROM vllm/vllm-openai:${VLLM_VERSION} AS torch-version
5+
ARG PYTHON_BIN=python3
6+
RUN ${PYTHON_BIN} -m pip show torch | awk '/Version/{print $2}' | sed 's/+.*$//' > /torch_version.txt
7+
8+
# Builder stage
9+
FROM docker.io/pytorch/manylinux2_28-builder:cuda12.1 AS builder
10+
11+
ARG AIBRIX_REPO=https://github.com/vllm-project/aibrix
12+
ARG AIBRIX_BRANCH=v0.5.0
13+
ARG PYTHON_BIN=python
14+
15+
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
16+
17+
# checkout codebase
18+
RUN git clone ${AIBRIX_REPO} /tmp/aibrix && cd /tmp/aibrix && git checkout ${AIBRIX_BRANCH}
19+
20+
# install dependencies
21+
# use the same torch version as vllm
22+
COPY --from=torch-version /torch_version.txt /tmp/torch_version.txt
23+
RUN --mount=type=cache,target=/root/.cache/pip ${PYTHON_BIN} -m pip install torch==$(cat /tmp/torch_version.txt) && rm /tmp/torch_version.txt
24+
RUN --mount=type=cache,target=/root/.cache/pip cd /tmp/aibrix && \
25+
${PYTHON_BIN} -m pip install -r python/aibrix_kvcache/requirements/build.txt -r python/aibrix_kvcache/requirements/core.txt
26+
27+
# build aibrix_kvcache
28+
RUN cd /tmp/aibrix && \
29+
${PYTHON_BIN} -m build python/aibrix_kvcache --wheel --outdir=python/aibrix_kvcache/dist --no-isolation
30+
31+
# Runtime stage
32+
FROM vllm/vllm-openai:${VLLM_VERSION} AS vllm-openai
33+
34+
ARG PYTHON_BIN=python3
35+
ARG VLLM_VERSION=v0.10.2
36+
ARG NIXL_VERSION=0.7.1
37+
38+
COPY --from=builder /tmp/aibrix /tmp/aibrix
39+
40+
RUN ${PYTHON_BIN} -m pip uninstall -y aibrix_kvcache && \
41+
${PYTHON_BIN} -m pip install /tmp/aibrix/python/aibrix_kvcache/dist/*.whl
42+
43+
# apply patch to vLLM
44+
RUN DIST_DIR=$(${PYTHON_BIN} -m pip show vllm | grep "Location:" | awk '{print $2}') && \
45+
cd $DIST_DIR && \
46+
patch -p 1 -l -i /tmp/aibrix/python/aibrix_kvcache/integration/vllm/patches/vllm_${VLLM_VERSION}-aibrix-kvcache.patch
47+
48+
RUN rm -rf /tmp/aibrix
49+
50+
RUN pip install nixl==${NIXL_VERSION} nixl-cu12==${NIXL_VERSION}
51+
RUN apt install iproute2 perftest ucx-utils iputils-ping net-tools -y
52+
53+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

build/container/README.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# AIBrix KVCache-Enabled vLLM & SGLang Images
2+
3+
This directory contains Dockerfiles that build **vLLM** and **SGLang** images
4+
enhanced with AIBrix capabilities:
5+
6+
- **aibrix_kvcache** - Built from source for KV cache disaggregation
7+
- **nixl + nixl-cu12** - UCX-based high-performance networking libraries
8+
- **UCX tooling** - Pre-installed debugging and performance testing utilities
9+
10+
## Image Naming Convention
11+
12+
**Upstream vs. AIBrix Images:**
13+
14+
| Upstream Image | AIBrix Enhanced Image | Description |
15+
|----------------|----------------------------------------------------------------|-------------|
16+
| `vllm/vllm-openai:v0.10.2` | `aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123` | vLLM base + AIBrix KVCache + UCX/NIXL networking |
17+
| `lmsysorg/sglang:v0.5.5.post3` | `aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123` | SGLang base + AIBrix KVCache + UCX/NIXL networking |
18+
19+
**AIBrix images** extend upstream inference engines with:
20+
- Distributed KV cache support via `aibrix_kvcache`
21+
- RDMA-capable networking through NIXL/UCX for disaggregated inference
22+
- Compatible torch versions automatically derived from base images
23+
24+
## Compatibility Matrix
25+
26+
Default build arguments produce the following component versions:
27+
28+
| Component | vLLM Image | SGLang Image |
29+
|-----------|-----------|--------------|
30+
| Engine version | v0.10.2 | v0.5.5.post3 |
31+
| Torch version | 2.8 | 2.9 |
32+
| aibrix_kvcache | v0.5.0 | v0.5.0 |
33+
| NIXL / CUDA plugin | 0.7.1 | 0.7.1 |
34+
| UCX | 1.19.0 | 1.19.0 |
35+
36+
**Version Compatibility:**
37+
- Torch version is automatically extracted from the upstream base image to ensure compatibility
38+
- AIBrix KVCache is built against the exact torch version from the base image
39+
- NIXL and UCX versions are pinned for stable RDMA networking
40+
41+
## Building the Images
42+
43+
### vLLM Image
44+
45+
```bash
46+
docker build \
47+
-f Dockerfile.vllm \
48+
--build-arg VLLM_VERSION=v0.10.2 \
49+
--build-arg AIBRIX_BRANCH=v0.5.0 \
50+
--build-arg NIXL_VERSION=0.7.1 \
51+
-t aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-$(date +'%Y%m%d') \
52+
.
53+
```
54+
55+
### SGLang Image
56+
57+
```bash
58+
docker build \
59+
-f Dockerfile.sglang \
60+
--build-arg SGLANG_VERSION=v0.5.5.post3 \
61+
--build-arg AIBRIX_BRANCH=v0.5.0 \
62+
--build-arg NIXL_VERSION=0.7.1 \
63+
-t aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-$(date +'%Y%m%d') \
64+
.
65+
```
66+
67+
### Build Arguments
68+
69+
All build arguments are optional and have sensible defaults:
70+
71+
| Argument | Default | Description |
72+
|----------|---------|-------------|
73+
| `VLLM_VERSION` | `v0.10.2` | vLLM upstream version to use as base |
74+
| `SGLANG_VERSION` | `v0.5.5.post3` | SGLang upstream version to use as base |
75+
| `AIBRIX_BRANCH` | `v0.5.0` | AIBrix release tag or branch to build from |
76+
| `NIXL_VERSION` | `0.7.1` | NIXL networking library version |
77+
| `AIBRIX_REPO` | `https://github.com/vllm-project/aibrix` | AIBrix repository URL |
78+
79+
## Release History
80+
81+
AIBrix maintains stable image releases with tested component combinations:
82+
83+
### v0.5.0 (Current)
84+
85+
| Component | vLLM | SGLang | Notes |
86+
|----------------|---------|--------------|---------------------------------|
87+
| Engine | v0.10.2 | v0.5.5.post3 | Stable inference engines |
88+
| CUDA | 12.8 | 12.9 | CUDA Version |
89+
| Torch | 2.8 | 2.9 | PyTorch Version |
90+
| AIBrix KVCache | v0.5.0 | v0.5.0 | KV cache disaggregation support |
91+
| NIXL | 0.7.1 | 0.7.1 | UCX-based RDMA networking |
92+
| UCX | 1.19.0 | 1.19.0 | Pre-installed for debugging |
93+
94+
**Recommended Tags:**
95+
- `aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123`
96+
- `aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123`
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
.. _container-images:
2+
3+
=======================
4+
AIBrix Container Images
5+
=======================
6+
7+
Overview
8+
--------
9+
10+
AIBrix provides enhanced container images for **vLLM** and **SGLang** that include additional capabilities for distributed inference and KV cache disaggregation:
11+
12+
- **aibrix_kvcache** - Built from source for KV cache disaggregation support
13+
- **nixl + nixl-cu12** - UCX-based high-performance networking libraries for RDMA
14+
- **UCX tooling** - Pre-installed debugging and performance testing utilities
15+
16+
Image Naming Convention
17+
-----------------------
18+
19+
AIBrix images extend upstream inference engines with additional capabilities:
20+
21+
.. list-table:: Upstream vs. AIBrix Images
22+
:header-rows: 1
23+
:widths: 40 40 20
24+
25+
* - Upstream Image
26+
- AIBrix Enhanced Image
27+
- Use Case
28+
* - ``vllm/vllm-openai:v0.10.2``
29+
- ``aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123``
30+
- vLLM + KVCache + RDMA
31+
* - ``lmsysorg/sglang:v0.5.5.post3``
32+
- ``aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123``
33+
- SGLang + KVCache + RDMA
34+
35+
When to Use AIBrix Images
36+
--------------------------
37+
38+
Use **AIBrix-enhanced images** when you need:
39+
40+
- **KV Cache Offloading**: KV cache offload to Host memory or remote storage
41+
- **Prefill-Decode Disaggregation**: Separate prefill and decode workloads via NIXL
42+
43+
Use **upstream images** for:
44+
45+
- Standard single-node inference without disaggregation
46+
- Development and testing without specialized networking
47+
48+
Compatibility Matrix
49+
--------------------
50+
51+
The following table shows tested component versions for AIBrix v0.5.0:
52+
53+
.. list-table:: Component Versions
54+
:header-rows: 1
55+
:widths: 25 20 20 35
56+
57+
* - Component
58+
- vLLM Image
59+
- SGLang Image
60+
- Notes
61+
* - Engine Version
62+
- v0.10.2
63+
- v0.5.5.post3
64+
- Stable inference engines
65+
* - CUDA Version
66+
- 12.8
67+
- 12.9
68+
- CUDA toolkit version
69+
* - PyTorch Version
70+
- 2.8
71+
- 2.9
72+
- Auto-detected from base image
73+
* - AIBrix KVCache
74+
- v0.5.0
75+
- v0.5.0
76+
- KV cache disaggregation support
77+
* - NIXL Version
78+
- 0.7.1
79+
- 0.7.1
80+
- UCX-based RDMA networking
81+
* - UCX Version
82+
- 1.19.0
83+
- 1.19.0
84+
- Unified Communication X
85+
86+
.. note::
87+
PyTorch version is automatically extracted from the upstream base image to ensure compatibility.
88+
AIBrix KVCache is built against the exact PyTorch version from the base image.
89+
90+
Released Images (v0.5.0)
91+
------------------------
92+
93+
The following pre-built images are available for immediate use:
94+
95+
**vLLM Image:**
96+
97+
.. code-block:: bash
98+
99+
docker pull aibrix/vllm-openai:v0.10.2-aibrix-v0.5.0-nixl-0.7.1-20251123
100+
101+
**SGLang Image:**
102+
103+
.. code-block:: bash
104+
105+
docker pull aibrix/sglang:v0.5.5.post3-aibrix-v0.5.0-nixl-0.7.1-20251123
106+
107+
Building Custom Images
108+
-----------------------
109+
110+
For detailed build instructions and troubleshooting, see `build/container/README.md <https://github.com/vllm-project/aibrix/blob/main/build/container/README.md>`_.
111+
112+
Version History
113+
---------------
114+
115+
v0.5.0
116+
~~~~~~
117+
118+
- **vLLM**: v0.10.2 with CUDA 12.8, PyTorch 2.8
119+
- **SGLang**: v0.5.5.post3 with CUDA 12.9, PyTorch 2.9
120+
- **AIBrix KVCache**: v0.5.0
121+
- **NIXL**: 0.7.1
122+
- **UCX**: 1.19.0
123+
124+
Features:
125+
126+
- Full KV cache offloading support
127+
- RDMA networking for distributed inference
128+
- Prefill-Decode disaggregation support
129+
130+
Troubleshooting
131+
---------------
132+
133+
Performance Issues
134+
~~~~~~~~~~~~~~~~~~
135+
136+
For RDMA networking issues:
137+
138+
1. Verify RDMA devices are available: ``ibv_devices``
139+
2. Check UCX configuration: ``ucx_info -d``
140+
3. Test RDMA bandwidth: ``ib_write_bw``
141+
4. Ensure security policies allow RDMA access
142+
143+
For debugging utilities included in the image, run:
144+
145+
.. code-block:: bash
146+
147+
kubectl exec -it <pod-name> -- ucx_info -d
148+
kubectl exec -it <pod-name> -- ibv_devices
149+
kubectl exec -it <pod-name> -- ib_write_bw

docs/source/getting_started/quickstart.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Save yaml as `pd-model.yaml` and run `kubectl apply -f pd-model.yaml`.
5757

5858

5959
.. note::
60-
We use a custom vLLM image with NIXL support. For detailed information about the image build process, see: [aibrix vllm pd-image build](https://github.com/vllm-project/aibrix/blob/main/samples/disaggregation/vllm/README.md)
60+
We use an AIBrix-enhanced vLLM image with KVCache and NIXL support for disaggregated inference. For detailed information about available images, compatibility, and build instructions, see :ref:`container-images`.
6161

6262
Invoke the model endpoint using gateway API
6363
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Documentation
2929
:caption: Getting Started
3030

3131
getting_started/quickstart.rst
32-
getting_started/advanced-k8s-examples.rst
32+
getting_started/container-images.rst
3333
getting_started/installation/installation.rst
3434
getting_started/faq.rst
3535

pkg/controller/podset/podset_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ func (r *PodSetReconciler) createPodFromTemplate(podSet *orchestrationv1alpha1.P
359359
}
360360

361361
// Set pod name
362-
pod.Name = fmt.Sprintf("%s-%d", podSet.Name, podIndex)
362+
pod.Name = utils.Shorten(fmt.Sprintf("%s-%d", podSet.Name, podIndex), false, false)
363363
pod.Namespace = podSet.Namespace
364364

365365
// Add labels

0 commit comments

Comments
 (0)