Skip to content

Commit 273da1d

Browse files
rootfsXunzhuo
andauthored
feat: support ROCM with onnx binding (#1427)
* feat: support ROCM with onnx binding Signed-off-by: Huamin Chen <hchen@redhat.com> * lint Signed-off-by: Huamin Chen <hchen@redhat.com> * lint Signed-off-by: Huamin Chen <hchen@redhat.com> * lint Signed-off-by: Huamin Chen <hchen@redhat.com> * review feedback Signed-off-by: Huamin Chen <hchen@redhat.com> * use sdpa fp16 optimization Signed-off-by: Huamin Chen <hchen@redhat.com> * fix paper build ci Signed-off-by: Huamin Chen <hchen@redhat.com> * ignore ci cache Signed-off-by: Huamin Chen <hchen@redhat.com> --------- Signed-off-by: Huamin Chen <hchen@redhat.com> Co-authored-by: Xunzhuo <bitliu@tencent.com>
1 parent c978938 commit 273da1d

File tree

25 files changed

+2312
-124
lines changed

25 files changed

+2312
-124
lines changed

.github/workflows/docker-publish.yml

Lines changed: 117 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ on:
2525
- ".github/workflows/docker-publish.yml"
2626
- "tools/docker/Dockerfile*"
2727
- "candle-binding/**"
28+
- "onnx-binding/**"
2829
- "src/**"
2930
- "e2e/testing/llm-katan/**"
3031
- "dashboard/**"
@@ -34,6 +35,7 @@ on:
3435
- ".github/workflows/docker-publish.yml"
3536
- "tools/docker/Dockerfile*"
3637
- "candle-binding/**"
38+
- "onnx-binding/**"
3739
- "src/**"
3840
- "e2e/testing/llm-katan/**"
3941
- "dashboard/**"
@@ -59,7 +61,7 @@ jobs:
5961
packages: write
6062
strategy:
6163
matrix:
62-
image: [extproc, llm-katan, vllm-sr]
64+
image: [extproc, extproc-rocm, llm-katan, vllm-sr]
6365
fail-fast: false
6466

6567
steps:
@@ -98,6 +100,9 @@ jobs:
98100
if [ "${{ matrix.image }}" = "extproc" ]; then
99101
echo "context=." >> $GITHUB_OUTPUT
100102
echo "dockerfile=./tools/docker/Dockerfile.extproc" >> $GITHUB_OUTPUT
103+
elif [ "${{ matrix.image }}" = "extproc-rocm" ]; then
104+
echo "context=." >> $GITHUB_OUTPUT
105+
echo "dockerfile=./tools/docker/Dockerfile.extproc-rocm" >> $GITHUB_OUTPUT
101106
elif [ "${{ matrix.image }}" = "llm-katan" ]; then
102107
echo "context=./e2e/testing/llm-katan" >> $GITHUB_OUTPUT
103108
echo "dockerfile=./e2e/testing/llm-katan/Dockerfile" >> $GITHUB_OUTPUT
@@ -130,7 +135,7 @@ jobs:
130135
load: true
131136
tags: ${{ steps.tags.outputs.tags }}
132137
cache-from: type=gha,scope=${{ matrix.image }}-amd64
133-
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-amd64
138+
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-amd64,ignore-error=true
134139
build-args: |
135140
BUILDKIT_INLINE_CACHE=1
136141
CARGO_BUILD_JOBS=8
@@ -228,8 +233,6 @@ jobs:
228233
PLATFORM_SUFFIX=$(echo "${{ matrix.platform }}" | tr '/' '-')
229234
echo "platform_suffix=${PLATFORM_SUFFIX}" >> $GITHUB_OUTPUT
230235
231-
# For multi-arch push builds, always use cross-compilation Dockerfiles
232-
# for images that have them (extproc, vllm-sr). This avoids slow QEMU emulation.
233236
if [ "${{ matrix.image }}" = "extproc" ]; then
234237
echo "context=." >> $GITHUB_OUTPUT
235238
echo "dockerfile=./tools/docker/Dockerfile.extproc" >> $GITHUB_OUTPUT
@@ -282,7 +285,7 @@ jobs:
282285
push: true
283286
tags: ${{ steps.tags.outputs.tags }}
284287
cache-from: type=gha,scope=${{ matrix.image }}-${{ steps.build-params.outputs.platform_suffix }}
285-
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ steps.build-params.outputs.platform_suffix }}
288+
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ steps.build-params.outputs.platform_suffix }},ignore-error=true
286289
build-args: |
287290
BUILDKIT_INLINE_CACHE=1
288291
CARGO_BUILD_JOBS=20
@@ -312,6 +315,115 @@ jobs:
312315
echo "- **Elapsed**: ${DURATION_MIN}m ${DURATION_SEC}s (${DURATION}s)" >> $GITHUB_STEP_SUMMARY
313316
fi
314317
318+
# extproc-rocm: amd64-only (ROCm does not support arm64), no manifest needed
319+
build_extproc_rocm:
320+
if: >-
321+
github.event_name != 'pull_request'
322+
&& github.repository == 'vllm-project/semantic-router'
323+
runs-on: ubuntu-latest
324+
timeout-minutes: 120
325+
permissions:
326+
contents: read
327+
packages: write
328+
329+
steps:
330+
- name: Free up disk space
331+
run: |
332+
echo "Before cleanup:"
333+
df -h
334+
sudo rm -rf /usr/share/dotnet
335+
sudo rm -rf /usr/local/lib/android
336+
sudo rm -rf /opt/ghc
337+
sudo rm -rf /opt/hostedtoolcache/CodeQL
338+
sudo docker image prune --all --force
339+
echo "After cleanup:"
340+
df -h
341+
342+
- name: Check out the repo
343+
uses: actions/checkout@v4
344+
345+
- name: Set up Docker Buildx
346+
uses: docker/setup-buildx-action@v3
347+
with:
348+
driver-opts: |
349+
image=moby/buildkit:latest
350+
network=host
351+
352+
- name: Log in to GitHub Container Registry
353+
uses: docker/login-action@v3
354+
with:
355+
registry: ghcr.io
356+
username: ${{ github.actor }}
357+
password: ${{ secrets.GITHUB_TOKEN }}
358+
359+
- name: Set lowercase repository owner
360+
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
361+
362+
- name: Generate date tag for nightly builds
363+
id: date
364+
if: inputs.is_nightly == true
365+
run: echo "date_tag=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
366+
367+
- name: Generate tags
368+
id: tags
369+
run: |
370+
REPO_LOWER="${{ env.REPOSITORY_OWNER_LOWER }}"
371+
IMAGE="ghcr.io/${REPO_LOWER}/semantic-router/extproc-rocm"
372+
373+
if [ "${{ inputs.is_nightly }}" = "true" ]; then
374+
TAG="nightly-${{ steps.date.outputs.date_tag }}"
375+
else
376+
TAG="${{ github.sha }}"
377+
fi
378+
379+
TAGS="${IMAGE}:${TAG}"
380+
if [ "${{ inputs.is_nightly }}" != "true" ]; then
381+
TAGS="${TAGS},${IMAGE}:latest"
382+
fi
383+
echo "tags=${TAGS}" >> $GITHUB_OUTPUT
384+
385+
- name: Record build start time
386+
id: build-start
387+
run: echo "start=$(date +%s)" >> $GITHUB_OUTPUT
388+
389+
- name: Build and push extproc-rocm (amd64 only)
390+
uses: docker/build-push-action@v5
391+
with:
392+
context: .
393+
file: ./tools/docker/Dockerfile.extproc-rocm
394+
platforms: linux/amd64
395+
push: true
396+
tags: ${{ steps.tags.outputs.tags }}
397+
cache-from: type=gha,scope=extproc-rocm-amd64
398+
cache-to: type=gha,mode=max,scope=extproc-rocm-amd64,ignore-error=true
399+
build-args: |
400+
BUILDKIT_INLINE_CACHE=1
401+
CARGO_BUILD_JOBS=20
402+
CARGO_INCREMENTAL=1
403+
RUSTC_WRAPPER=""
404+
CARGO_NET_GIT_FETCH_WITH_CLI=true
405+
provenance: false
406+
407+
- name: Build summary and timing
408+
if: always()
409+
run: |
410+
END=$(date +%s)
411+
START="${{ steps.build-start.outputs.start }}"
412+
DURATION=$((END - START))
413+
DURATION_MIN=$((DURATION / 60))
414+
DURATION_SEC=$((DURATION % 60))
415+
if [ "${{ job.status }}" = "success" ]; then
416+
echo "::notice title=Build Success::extproc-rocm (amd64) built in ${DURATION_MIN}m ${DURATION_SEC}s"
417+
echo "### Build Summary for extproc-rocm" >> $GITHUB_STEP_SUMMARY
418+
echo "- **Platform**: linux/amd64 (ROCm, no arm64)" >> $GITHUB_STEP_SUMMARY
419+
echo "- **Build time**: ${DURATION_MIN}m ${DURATION_SEC}s (${DURATION}s)" >> $GITHUB_STEP_SUMMARY
420+
echo "- **Tags**: ${{ steps.tags.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
421+
else
422+
echo "::error title=Build Failed::extproc-rocm build failed after ${DURATION_MIN}m ${DURATION_SEC}s"
423+
echo "### Build failed for extproc-rocm" >> $GITHUB_STEP_SUMMARY
424+
echo "- **Elapsed**: ${DURATION_MIN}m ${DURATION_SEC}s (${DURATION}s)" >> $GITHUB_STEP_SUMMARY
425+
fi
426+
315427
# Step 2: Create multi-arch manifest lists from per-platform images
316428
create_manifest:
317429
if: >-

.github/workflows/docker-release.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,51 @@ jobs:
5252
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc:${{ steps.extract_tag.outputs.tag }}
5353
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc:latest
5454
55+
build_and_push_extproc_rocm:
56+
if: github.repository == 'vllm-project/semantic-router'
57+
runs-on: ubuntu-latest
58+
timeout-minutes: 120
59+
permissions:
60+
contents: read
61+
packages: write
62+
63+
steps:
64+
- name: Free up disk space
65+
run: |
66+
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
67+
sudo docker image prune --all --force
68+
69+
- name: Check out the repo
70+
uses: actions/checkout@v4
71+
72+
- name: Set up Docker Buildx
73+
uses: docker/setup-buildx-action@v3
74+
75+
- name: Extract tag name
76+
id: extract_tag
77+
run: echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
78+
79+
- name: Set lowercase repository owner
80+
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
81+
82+
- name: Log in to GitHub Container Registry
83+
uses: docker/login-action@v3
84+
with:
85+
registry: ghcr.io
86+
username: ${{ github.actor }}
87+
password: ${{ secrets.GITHUB_TOKEN }}
88+
89+
- name: Build and push extproc-rocm Docker image
90+
uses: docker/build-push-action@v5
91+
with:
92+
context: .
93+
file: ./tools/docker/Dockerfile.extproc-rocm
94+
platforms: linux/amd64
95+
push: true
96+
tags: |
97+
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc-rocm:${{ steps.extract_tag.outputs.tag }}
98+
ghcr.io/${{ env.REPOSITORY_OWNER_LOWER }}/semantic-router/extproc-rocm:latest
99+
55100
build_and_push_llm_katan:
56101
if: github.repository == 'vllm-project/semantic-router'
57102
runs-on: ubuntu-latest

.github/workflows/operator-ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
load: true
177177
tags: ${{ steps.local-tags.outputs.operator }}
178178
cache-from: type=gha,scope=operator
179-
cache-to: type=gha,mode=max,scope=operator
179+
cache-to: type=gha,mode=max,scope=operator,ignore-error=true
180180

181181
- name: Build and push operator image (multi-arch to registry)
182182
if: github.event_name != 'pull_request'
@@ -199,7 +199,7 @@ jobs:
199199
load: true
200200
tags: ${{ steps.local-tags.outputs.bundle }}
201201
cache-from: type=gha,scope=operator-bundle
202-
cache-to: type=gha,mode=max,scope=operator-bundle
202+
cache-to: type=gha,mode=max,scope=operator-bundle,ignore-error=true
203203

204204
- name: Build and push bundle image (multi-arch to registry)
205205
if: github.event_name != 'pull_request'
@@ -222,7 +222,7 @@ jobs:
222222
load: true
223223
tags: ${{ steps.local-tags.outputs.extproc }}
224224
cache-from: type=gha,scope=extproc
225-
cache-to: type=gha,mode=max,scope=extproc
225+
cache-to: type=gha,mode=max,scope=extproc,ignore-error=true
226226

227227
- name: Save operator image for integration tests
228228
run: docker save -o /tmp/operator-image.tar ${{ steps.local-tags.outputs.operator }}

.github/workflows/paper-build.yml

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,29 +62,15 @@ jobs:
6262
path: paper/white-paper.pdf
6363
retention-days: 30
6464

65-
- name: Comment PDF link on PR
65+
- name: Save PR metadata
6666
if: github.event_name == 'pull_request'
67-
uses: actions/github-script@v7
67+
run: |
68+
mkdir -p pr-metadata
69+
echo "${{ github.event.pull_request.number }}" > pr-metadata/pr-number
70+
echo "${{ github.sha }}" > pr-metadata/sha
71+
- name: Upload PR metadata
72+
if: github.event_name == 'pull_request'
73+
uses: actions/upload-artifact@v4
6874
with:
69-
script: |
70-
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
71-
const sha = context.sha.substring(0, 7);
72-
const now = new Date().toISOString().replace('T', ' ').substring(0, 19) + ' UTC';
73-
const body = [
74-
`### 📄 White Paper Build`,
75-
``,
76-
`The paper PDF has been rebuilt from the latest changes.`,
77-
``,
78-
`| | |`,
79-
`|---|---|`,
80-
`| **Commit** | \`${sha}\` |`,
81-
`| **Built at** | ${now} |`,
82-
`| **Download** | **[white-paper.pdf](${runUrl}#artifacts)** |`,
83-
].join('\n');
84-
85-
await github.rest.issues.createComment({
86-
owner: context.repo.owner,
87-
repo: context.repo.repo,
88-
issue_number: context.issue.number,
89-
body,
90-
});
75+
name: pr-metadata
76+
path: pr-metadata/

0 commit comments

Comments
 (0)