Skip to content

Commit b156257

Browse files
committed
feat(ci): add AST-based security scanner
Signed-off-by: Huamin Chen <hchen@redhat.com>
1 parent c9816e0 commit b156257

File tree

8 files changed

+2251
-0
lines changed

8 files changed

+2251
-0
lines changed

.github/workflows/pre-commit.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,27 @@ jobs:
9999
restore-keys: |
100100
${{ runner.os }}-precommit-
101101
102+
- name: Install tree-sitter for security scan
103+
run: |
104+
pip install \
105+
tree-sitter \
106+
tree-sitter-python \
107+
tree-sitter-javascript \
108+
tree-sitter-typescript \
109+
tree-sitter-go \
110+
tree-sitter-rust
111+
112+
- name: Run AST supply chain security scan
113+
run: |
114+
python3 tools/security/ast_security_scanner.py \
115+
scan . --fail-on HIGH
116+
117+
- name: Run AST PR diff security scan
118+
if: github.event_name == 'pull_request'
119+
run: |
120+
python3 tools/security/ast_security_scanner.py \
121+
diff "origin/${{ github.base_ref }}" --fail-on HIGH
122+
102123
- name: Run agent CI lint on changed files
103124
run: |
104125
if [ "${{ github.event_name }}" = "pull_request" ]; then
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
name: Supply Chain Security Scan
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch:
9+
10+
concurrency:
11+
group: security-${{ github.workflow }}-${{ github.ref }}
12+
cancel-in-progress: true
13+
14+
jobs:
15+
ast-security-scan:
16+
if: github.repository == 'vllm-project/semantic-router'
17+
runs-on: ubuntu-latest
18+
name: AST supply chain security scan
19+
permissions:
20+
contents: read
21+
pull-requests: write
22+
issues: write
23+
24+
steps:
25+
- name: Check out the repo
26+
uses: actions/checkout@v4
27+
with:
28+
fetch-depth: 0
29+
30+
- name: Set up Python
31+
uses: actions/setup-python@v5
32+
with:
33+
python-version: "3.11"
34+
35+
- name: Install tree-sitter dependencies
36+
run: |
37+
pip install \
38+
tree-sitter \
39+
tree-sitter-python \
40+
tree-sitter-javascript \
41+
tree-sitter-typescript \
42+
tree-sitter-go \
43+
tree-sitter-rust
44+
45+
- name: Run AST codebase scan
46+
id: ast_scan
47+
continue-on-error: true
48+
run: |
49+
python3 tools/security/ast_security_scanner.py \
50+
scan . --fail-on HIGH --json > /tmp/ast_scan.json
51+
52+
- name: Run AST PR diff scan
53+
id: diff_scan
54+
if: github.event_name == 'pull_request'
55+
continue-on-error: true
56+
run: |
57+
python3 tools/security/ast_security_scanner.py \
58+
diff "origin/${{ github.base_ref }}" --fail-on HIGH --json \
59+
> /tmp/diff_scan.json
60+
61+
- name: Run regex fallback scan
62+
id: regex_scan
63+
continue-on-error: true
64+
run: |
65+
python3 tools/security/scan_malicious_code.py \
66+
. --fail-on HIGH 2>&1 \
67+
| tee /tmp/regex_report.txt
68+
69+
- name: Post security report on PR
70+
if: github.event_name == 'pull_request' && !cancelled()
71+
uses: actions/github-script@v7
72+
with:
73+
script: |
74+
const fs = require('fs');
75+
76+
// --- Read structured scanner outputs ---
77+
let astResult = { findings: [], counts: {} };
78+
try {
79+
astResult = JSON.parse(fs.readFileSync('/tmp/ast_scan.json', 'utf8'));
80+
} catch (e) { console.log('No AST scan JSON:', e.message); }
81+
82+
let diffResult = { findings: [], counts: {} };
83+
try {
84+
diffResult = JSON.parse(fs.readFileSync('/tmp/diff_scan.json', 'utf8'));
85+
} catch (e) { console.log('No diff scan JSON:', e.message); }
86+
87+
// --- Step outcomes (actual result, ignoring continue-on-error) ---
88+
const astOutcome = '${{ steps.ast_scan.outcome }}';
89+
const diffOutcome = '${{ steps.diff_scan.outcome }}' || 'skipped';
90+
const regexOutcome = '${{ steps.regex_scan.outcome }}';
91+
92+
const anyFailed = [astOutcome, diffOutcome, regexOutcome]
93+
.some(o => o === 'failure');
94+
95+
// --- Helpers ---
96+
function severityCounts(counts) {
97+
const total = Object.values(counts).reduce((a, b) => a + b, 0);
98+
if (total === 0) return { total: 0, text: 'No issues detected' };
99+
const parts = [];
100+
for (const sev of ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']) {
101+
if (counts[sev]) parts.push(`**${sev}**: ${counts[sev]}`);
102+
}
103+
return { total, text: `${total} finding(s) — ${parts.join(' · ')}` };
104+
}
105+
106+
function icon(outcome) {
107+
return outcome === 'success' ? '✅' : outcome === 'skipped' ? '⏭️' : '🚨';
108+
}
109+
110+
// --- Build summary table ---
111+
const astCounts = severityCounts(astResult.counts || {});
112+
const diffCounts = severityCounts(diffResult.counts || {});
113+
114+
const statusIcon = anyFailed ? '🚨' : '✅';
115+
const statusText = anyFailed ? 'Issues Found' : 'All Clear';
116+
117+
let body = `## ${statusIcon} Supply Chain Security Report — ${statusText}\n\n`;
118+
body += `| Scanner | Status | Findings |\n`;
119+
body += `|---------|--------|----------|\n`;
120+
body += `| AST Codebase Scan (Py, Go, JS/TS, Rust) | ${icon(astOutcome)} | ${astCounts.text} |\n`;
121+
body += `| AST PR Diff Scan | ${icon(diffOutcome)} | ${diffOutcome === 'skipped' ? 'Skipped (push event)' : diffCounts.text} |\n`;
122+
body += `| Regex Fallback Scan | ${icon(regexOutcome)} | ${regexOutcome === 'success' ? 'No issues detected' : 'Issues found — see logs'} |\n`;
123+
body += `\n`;
124+
125+
// --- PR diff findings detail (most actionable for PR authors) ---
126+
if (diffResult.findings && diffResult.findings.length > 0) {
127+
body += `### Findings in this PR's diff\n\n`;
128+
body += `<details><summary>${diffCounts.total} finding(s) — click to expand</summary>\n\n`;
129+
body += `| Severity | File | Line | Description |\n`;
130+
body += `|----------|------|------|-------------|\n`;
131+
const cap = 25;
132+
for (const f of diffResult.findings.slice(0, cap)) {
133+
const sev = f.severity === 'CRITICAL' ? '🔴 CRITICAL'
134+
: f.severity === 'HIGH' ? '🟠 HIGH'
135+
: f.severity === 'MEDIUM' ? '🟡 MEDIUM' : '🔵 LOW';
136+
const file = f.file.length > 50 ? '…' + f.file.slice(-49) : f.file;
137+
body += `| ${sev} | \`${file}\` | ${f.line} | ${f.message} |\n`;
138+
}
139+
if (diffResult.findings.length > cap) {
140+
body += `\n_...and ${diffResult.findings.length - cap} more (see workflow logs)_\n`;
141+
}
142+
body += `\n</details>\n\n`;
143+
}
144+
145+
// --- Codebase scan findings (collapsed, informational) ---
146+
if (astResult.findings && astResult.findings.length > 0) {
147+
const critHigh = astResult.findings.filter(
148+
f => f.severity === 'CRITICAL' || f.severity === 'HIGH');
149+
if (critHigh.length > 0) {
150+
body += `### CRITICAL / HIGH findings in codebase\n\n`;
151+
body += `<details><summary>${critHigh.length} finding(s) — click to expand</summary>\n\n`;
152+
body += `| Severity | File | Line | Description |\n`;
153+
body += `|----------|------|------|-------------|\n`;
154+
const cap = 25;
155+
for (const f of critHigh.slice(0, cap)) {
156+
const sev = f.severity === 'CRITICAL' ? '🔴 CRITICAL' : '🟠 HIGH';
157+
const file = f.file.length > 50 ? '…' + f.file.slice(-49) : f.file;
158+
body += `| ${sev} | \`${file}\` | ${f.line} | ${f.message} |\n`;
159+
}
160+
if (critHigh.length > cap) {
161+
body += `\n_...and ${critHigh.length - cap} more_\n`;
162+
}
163+
body += `\n</details>\n\n`;
164+
}
165+
}
166+
167+
if (anyFailed) {
168+
body += `> **Action required:** CRITICAL and HIGH severity findings must be resolved before merge.\n\n`;
169+
}
170+
171+
body += `---\n`;
172+
body += `_Scanned at \`${new Date().toISOString()}\` · `;
173+
body += `[View full workflow logs](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})_\n`;
174+
175+
// --- Create or update existing bot comment ---
176+
const { data: comments } = await github.rest.issues.listComments({
177+
owner: context.repo.owner,
178+
repo: context.repo.repo,
179+
issue_number: context.issue.number,
180+
});
181+
182+
const existing = comments.find(c =>
183+
c.user.login === 'github-actions[bot]' &&
184+
c.body.includes('Supply Chain Security Report')
185+
);
186+
187+
if (existing) {
188+
await github.rest.issues.updateComment({
189+
owner: context.repo.owner,
190+
repo: context.repo.repo,
191+
comment_id: existing.id,
192+
body: body,
193+
});
194+
console.log('Updated existing security report comment');
195+
} else {
196+
await github.rest.issues.createComment({
197+
owner: context.repo.owner,
198+
repo: context.repo.repo,
199+
issue_number: context.issue.number,
200+
body: body,
201+
});
202+
console.log('Created new security report comment');
203+
}
204+
205+
- name: Fail if security issues found
206+
if: "!cancelled()"
207+
run: |
208+
if [ "${{ steps.ast_scan.outcome }}" = "failure" ] || \
209+
[ "${{ steps.diff_scan.outcome }}" = "failure" ] || \
210+
[ "${{ steps.regex_scan.outcome }}" = "failure" ]; then
211+
echo "::error::Supply chain security scan detected issues — see PR comment for details."
212+
exit 1
213+
fi

.pre-commit-config.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ repos:
9797
files: \.py$
9898
exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site-packages/)
9999

100+
# Supply chain security scan (AST-based, tree-sitter)
101+
- repo: local
102+
hooks:
103+
- id: supply-chain-security-scan
104+
name: supply chain security scan (AST)
105+
entry: python3 tools/security/ast_security_scanner.py scan . --fail-on HIGH
106+
language: system
107+
pass_filenames: false
108+
always_run: true
109+
files: \.(py|go|js|ts|tsx|rs)$
110+
100111
# Commented out flake8 - only reports issues, doesn't auto-fix
101112
# - repo: https://github.com/PyCQA/flake8
102113
# rev: 7.3.0

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ _run:
1818
-f tools/make/llama-stack.mk \
1919
-f tools/make/models.mk \
2020
-f tools/make/pre-commit.mk \
21+
-f tools/make/security.mk \
2122
-f tools/make/docker.mk \
2223
-f tools/make/agent.mk \
2324
-f tools/make/dockerless.mk \

tools/docker/Dockerfile.precommit

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@ RUN pip install --break-system-packages \
3838
ruff==0.15.5 \
3939
tree-sitter==0.25.2 \
4040
tree-sitter-go==0.25.0 \
41+
tree-sitter-javascript==0.25.0 \
4142
tree-sitter-python==0.25.0 \
4243
tree-sitter-rust==0.24.0 \
44+
tree-sitter-typescript==0.23.2 \
4345
yamllint==1.38.0
4446

4547
# Golangci-lint v2.5.0 — matches CI golangci-lint-action version

tools/make/security.mk

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
##@ Supply Chain Security
2+
3+
SECURITY_DIR := tools/security
4+
AST_SCANNER := $(SECURITY_DIR)/ast_security_scanner.py
5+
REGEX_SCANNER := $(SECURITY_DIR)/scan_malicious_code.py
6+
7+
security-scan: ## Run full AST + regex supply chain security scan
8+
@echo "=== AST Supply Chain Security Scan ==="
9+
@python3 $(AST_SCANNER) scan . --fail-on HIGH
10+
@echo ""
11+
@echo "=== Regex Supply Chain Security Scan ==="
12+
@python3 $(REGEX_SCANNER) . --fail-on HIGH
13+
14+
security-scan-diff: ## Scan PR diff for supply chain attacks (AST-based)
15+
@echo "=== AST PR Diff Security Scan ==="
16+
@BASE=$${AGENT_BASE_REF:-$${GITHUB_BASE_REF:-main}}; \
17+
python3 $(AST_SCANNER) diff "$$BASE" --fail-on HIGH
18+
19+
security-scan-ci: ## CI gate: AST scan (for GitHub Actions)
20+
@echo "=== CI Security Gate ==="
21+
@python3 $(AST_SCANNER) scan . --fail-on HIGH
22+
23+
.PHONY: security-scan security-scan-diff security-scan-ci

0 commit comments

Comments
 (0)