feat(ci): add AST-based security scanner

rootfs · rootfs · commit b1562570c976 · 2026-03-24T20:58:32.000Z
Signed-off-by: Huamin Chen &lt;hchen@redhat.com&gt;
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -99,6 +99,27 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-precommit-
 
+      - name: Install tree-sitter for security scan
+        run: |
+          pip install \
+            tree-sitter \
+            tree-sitter-python \
+            tree-sitter-javascript \
+            tree-sitter-typescript \
+            tree-sitter-go \
+            tree-sitter-rust
+
+      - name: Run AST supply chain security scan
+        run: |
+          python3 tools/security/ast_security_scanner.py \
+            scan . --fail-on HIGH
+
+      - name: Run AST PR diff security scan
+        if: github.event_name == 'pull_request'
+        run: |
+          python3 tools/security/ast_security_scanner.py \
+            diff "origin/${{ github.base_ref }}" --fail-on HIGH
+
       - name: Run agent CI lint on changed files
         run: |
           if [ "${{ github.event_name }}" = "pull_request" ]; then
diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml
@@ -0,0 +1,213 @@
+name: Supply Chain Security Scan
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: security-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ast-security-scan:
+    if: github.repository == 'vllm-project/semantic-router'
+    runs-on: ubuntu-latest
+    name: AST supply chain security scan
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install tree-sitter dependencies
+        run: |
+          pip install \
+            tree-sitter \
+            tree-sitter-python \
+            tree-sitter-javascript \
+            tree-sitter-typescript \
+            tree-sitter-go \
+            tree-sitter-rust
+
+      - name: Run AST codebase scan
+        id: ast_scan
+        continue-on-error: true
+        run: |
+          python3 tools/security/ast_security_scanner.py \
+            scan . --fail-on HIGH --json > /tmp/ast_scan.json
+
+      - name: Run AST PR diff scan
+        id: diff_scan
+        if: github.event_name == 'pull_request'
+        continue-on-error: true
+        run: |
+          python3 tools/security/ast_security_scanner.py \
+            diff "origin/${{ github.base_ref }}" --fail-on HIGH --json \
+            > /tmp/diff_scan.json
+
+      - name: Run regex fallback scan
+        id: regex_scan
+        continue-on-error: true
+        run: |
+          python3 tools/security/scan_malicious_code.py \
+            . --fail-on HIGH 2>&1 \
+            | tee /tmp/regex_report.txt
+
+      - name: Post security report on PR
+        if: github.event_name == 'pull_request' && !cancelled()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+
+            // --- Read structured scanner outputs ---
+            let astResult = { findings: [], counts: {} };
+            try {
+              astResult = JSON.parse(fs.readFileSync('/tmp/ast_scan.json', 'utf8'));
+            } catch (e) { console.log('No AST scan JSON:', e.message); }
+
+            let diffResult = { findings: [], counts: {} };
+            try {
+              diffResult = JSON.parse(fs.readFileSync('/tmp/diff_scan.json', 'utf8'));
+            } catch (e) { console.log('No diff scan JSON:', e.message); }
+
+            // --- Step outcomes (actual result, ignoring continue-on-error) ---
+            const astOutcome   = '${{ steps.ast_scan.outcome }}';
+            const diffOutcome  = '${{ steps.diff_scan.outcome }}' || 'skipped';
+            const regexOutcome = '${{ steps.regex_scan.outcome }}';
+
+            const anyFailed = [astOutcome, diffOutcome, regexOutcome]
+              .some(o => o === 'failure');
+
+            // --- Helpers ---
+            function severityCounts(counts) {
+              const total = Object.values(counts).reduce((a, b) => a + b, 0);
+              if (total === 0) return { total: 0, text: 'No issues detected' };
+              const parts = [];
+              for (const sev of ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']) {
+                if (counts[sev]) parts.push(`**${sev}**: ${counts[sev]}`);
+              }
+              return { total, text: `${total} finding(s) — ${parts.join(' · ')}` };
+            }
+
+            function icon(outcome) {
+              return outcome === 'success' ? '✅' : outcome === 'skipped' ? '⏭️' : '🚨';
+            }
+
+            // --- Build summary table ---
+            const astCounts  = severityCounts(astResult.counts  || {});
+            const diffCounts = severityCounts(diffResult.counts || {});
+
+            const statusIcon = anyFailed ? '🚨' : '✅';
+            const statusText = anyFailed ? 'Issues Found' : 'All Clear';
+
+            let body = `## ${statusIcon} Supply Chain Security Report — ${statusText}\n\n`;
+            body += `| Scanner | Status | Findings |\n`;
+            body += `|---------|--------|----------|\n`;
+            body += `| AST Codebase Scan (Py, Go, JS/TS, Rust) | ${icon(astOutcome)} | ${astCounts.text} |\n`;
+            body += `| AST PR Diff Scan | ${icon(diffOutcome)} | ${diffOutcome === 'skipped' ? 'Skipped (push event)' : diffCounts.text} |\n`;
+            body += `| Regex Fallback Scan | ${icon(regexOutcome)} | ${regexOutcome === 'success' ? 'No issues detected' : 'Issues found — see logs'} |\n`;
+            body += `\n`;
+
+            // --- PR diff findings detail (most actionable for PR authors) ---
+            if (diffResult.findings && diffResult.findings.length > 0) {
+              body += `### Findings in this PR's diff\n\n`;
+              body += `<details><summary>${diffCounts.total} finding(s) — click to expand</summary>\n\n`;
+              body += `| Severity | File | Line | Description |\n`;
+              body += `|----------|------|------|-------------|\n`;
+              const cap = 25;
+              for (const f of diffResult.findings.slice(0, cap)) {
+                const sev = f.severity === 'CRITICAL' ? '🔴 CRITICAL'
+                          : f.severity === 'HIGH'     ? '🟠 HIGH'
+                          : f.severity === 'MEDIUM'   ? '🟡 MEDIUM' : '🔵 LOW';
+                const file = f.file.length > 50 ? '…' + f.file.slice(-49) : f.file;
+                body += `| ${sev} | \`${file}\` | ${f.line} | ${f.message} |\n`;
+              }
+              if (diffResult.findings.length > cap) {
+                body += `\n_...and ${diffResult.findings.length - cap} more (see workflow logs)_\n`;
+              }
+              body += `\n</details>\n\n`;
+            }
+
+            // --- Codebase scan findings (collapsed, informational) ---
+            if (astResult.findings && astResult.findings.length > 0) {
+              const critHigh = astResult.findings.filter(
+                f => f.severity === 'CRITICAL' || f.severity === 'HIGH');
+              if (critHigh.length > 0) {
+                body += `### CRITICAL / HIGH findings in codebase\n\n`;
+                body += `<details><summary>${critHigh.length} finding(s) — click to expand</summary>\n\n`;
+                body += `| Severity | File | Line | Description |\n`;
+                body += `|----------|------|------|-------------|\n`;
+                const cap = 25;
+                for (const f of critHigh.slice(0, cap)) {
+                  const sev = f.severity === 'CRITICAL' ? '🔴 CRITICAL' : '🟠 HIGH';
+                  const file = f.file.length > 50 ? '…' + f.file.slice(-49) : f.file;
+                  body += `| ${sev} | \`${file}\` | ${f.line} | ${f.message} |\n`;
+                }
+                if (critHigh.length > cap) {
+                  body += `\n_...and ${critHigh.length - cap} more_\n`;
+                }
+                body += `\n</details>\n\n`;
+              }
+            }
+
+            if (anyFailed) {
+              body += `> **Action required:** CRITICAL and HIGH severity findings must be resolved before merge.\n\n`;
+            }
+
+            body += `---\n`;
+            body += `_Scanned at \`${new Date().toISOString()}\` · `;
+            body += `[View full workflow logs](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})_\n`;
+
+            // --- Create or update existing bot comment ---
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const existing = comments.find(c =>
+              c.user.login === 'github-actions[bot]' &&
+              c.body.includes('Supply Chain Security Report')
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body: body,
+              });
+              console.log('Updated existing security report comment');
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: body,
+              });
+              console.log('Created new security report comment');
+            }
+
+      - name: Fail if security issues found
+        if: "!cancelled()"
+        run: |
+          if [ "${{ steps.ast_scan.outcome }}" = "failure" ] || \
+             [ "${{ steps.diff_scan.outcome }}" = "failure" ] || \
+             [ "${{ steps.regex_scan.outcome }}" = "failure" ]; then
+            echo "::error::Supply chain security scan detected issues — see PR comment for details."
+            exit 1
+          fi
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -97,6 +97,17 @@ repos:
         files: \.py$
         exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site-packages/)
 
+  # Supply chain security scan (AST-based, tree-sitter)
+  - repo: local
+    hooks:
+      - id: supply-chain-security-scan
+        name: supply chain security scan (AST)
+        entry: python3 tools/security/ast_security_scanner.py scan . --fail-on HIGH
+        language: system
+        pass_filenames: false
+        always_run: true
+        files: \.(py|go|js|ts|tsx|rs)$
+
 # Commented out flake8 - only reports issues, doesn't auto-fix
 # -   repo: https://github.com/PyCQA/flake8
 #     rev: 7.3.0
diff --git a/Makefile b/Makefile
@@ -18,6 +18,7 @@ _run:
 		-f tools/make/llama-stack.mk \
 		-f tools/make/models.mk \
 		-f tools/make/pre-commit.mk \
+		-f tools/make/security.mk \
 		-f tools/make/docker.mk \
 		-f tools/make/agent.mk \
 		-f tools/make/dockerless.mk \
diff --git a/tools/docker/Dockerfile.precommit b/tools/docker/Dockerfile.precommit
@@ -38,8 +38,10 @@ RUN pip install --break-system-packages \
     ruff==0.15.5 \
     tree-sitter==0.25.2 \
     tree-sitter-go==0.25.0 \
+    tree-sitter-javascript==0.25.0 \
     tree-sitter-python==0.25.0 \
     tree-sitter-rust==0.24.0 \
+    tree-sitter-typescript==0.23.2 \
     yamllint==1.38.0
 
 # Golangci-lint v2.5.0 — matches CI golangci-lint-action version
diff --git a/tools/make/security.mk b/tools/make/security.mk
@@ -0,0 +1,23 @@
+##@ Supply Chain Security
+
+SECURITY_DIR := tools/security
+AST_SCANNER := $(SECURITY_DIR)/ast_security_scanner.py
+REGEX_SCANNER := $(SECURITY_DIR)/scan_malicious_code.py
+
+security-scan: ## Run full AST + regex supply chain security scan
+	@echo "=== AST Supply Chain Security Scan ==="
+	@python3 $(AST_SCANNER) scan . --fail-on HIGH
+	@echo ""
+	@echo "=== Regex Supply Chain Security Scan ==="
+	@python3 $(REGEX_SCANNER) . --fail-on HIGH
+
+security-scan-diff: ## Scan PR diff for supply chain attacks (AST-based)
+	@echo "=== AST PR Diff Security Scan ==="
+	@BASE=$${AGENT_BASE_REF:-$${GITHUB_BASE_REF:-main}}; \
+	python3 $(AST_SCANNER) diff "$$BASE" --fail-on HIGH
+
+security-scan-ci: ## CI gate: AST scan (for GitHub Actions)
+	@echo "=== CI Security Gate ==="
+	@python3 $(AST_SCANNER) scan . --fail-on HIGH
+
+.PHONY: security-scan security-scan-diff security-scan-ci
diff --git a/tools/security/ast_security_scanner.py b/tools/security/ast_security_scanner.py
diff --git a/tools/security/scan_malicious_code.py b/tools/security/scan_malicious_code.py