devrel-agents-tutorial/.github/workflows/ai-config-validation.yml at main · launchdarkly-labs/devrel-agents-tutorial · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
name: AI Config Validation

# Uses HTTP evaluator for integration testing of multi-agent system
# Tests full request flow: API → Supervisor → Security/Support agent routing
# Validates that LaunchDarkly AI configs are properly selected and used
#
# Security Notes:
# - pull_request_target is used to prevent external PRs from accessing secrets without approval
# - External PRs require manual environment approval before secrets are exposed
# - .env file is cleaned up after each run to prevent secret leakage
# - Artifacts are sanitized before upload

on:
  # Temporarily using pull_request for testing (will change back to pull_request_target)
  pull_request:
    branches: [main]
    types: [opened, synchronize, reopened, labeled]
  push:
    branches: [main]
  workflow_dispatch:
    inputs:
      environment:
        description: 'LaunchDarkly environment to validate against'
        required: true
        default: 'production'
        type: choice
        options:
          - production
          - staging
          - development

jobs:
  validate-configs:
    name: Validate AI Configs
    runs-on: ubuntu-latest
    # Require manual approval for external PRs to prevent secret exposure
    environment:
      name: ci

    steps:
      - name: Checkout code
        uses: actions/checkout@v3
        with:
          ref: ${{ github.event.pull_request.head.sha || github.ref }}

      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
          version: "latest"
          enable-cache: false  # Disable cache to ensure fresh test runs

      - name: Set up Python
        run: uv python install

      - name: Install dependencies
        run: |
          uv venv
          # Install dependencies without installing the package itself (to avoid dev dependency issues)
          uv pip install langchain langgraph langchain-anthropic fastapi "uvicorn[standard]" pydantic launchdarkly-server-sdk launchdarkly-server-sdk-ai numpy openai faiss-cpu PyMuPDF tiktoken streamlit requests python-dotenv PyYAML langchain-openai langchain-mcp-adapters beautifulsoup4 mcp semanticscholar rank-bm25 langchain-mistralai httpx boto3 langchain-aws
          # Force fresh clone by clearing all caches
          rm -rf ~/.cache/uv ~/.cache/pip /tmp/pip-* || true
          uv pip install --no-cache --force-reinstall git+https://x-access-token:${{ secrets.GH_PAT }}@github.com/launchdarkly-labs/scarlett_ai_configs_ci_cd-.git@02cb9b1

      - name: Validate required secrets
        run: |
          if [ -z "${{ secrets.LD_SDK_KEY }}" ]; then
            echo "::error::Missing required secret: LD_SDK_KEY"
            exit 1
          fi
          if [ -z "${{ secrets.LD_API_KEY }}" ]; then
            echo "::error::Missing required secret: LD_API_KEY"
            exit 1
          fi
          echo "✅ Required secrets are configured"

      - name: Run AI Config validation
        env:
          LD_SDK_KEY: ${{ secrets.LD_SDK_KEY }}
          LD_API_KEY: ${{ secrets.LD_API_KEY }}
          LD_PROJECT_KEY: ${{ secrets.LD_PROJECT_KEY }}
        run: |
          # Use ld-aic-cicd framework to validate our AI configs
          .venv/bin/ld-aic validate \
            --environment ${{ github.event.inputs.environment || 'production' }} \
            --config-keys "supervisor-agent,support-agent,security-agent" \
            --report validation-report.json \
            --fail-on-error

      - name: Upload validation report
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: validation-report
          path: validation-report.json

      - name: Comment PR with results
        if: github.event_name == 'pull_request' && always()
        uses: actions/github-script@v6
        with:
          script: |
            const fs = require('fs');
            const report = JSON.parse(fs.readFileSync('validation-report.json', 'utf8'));

            let comment = '## 🔍 AI Config Validation Results\n\n';
            comment += `**Environment:** ${report.environment}\n`;
            comment += `**Total Configs:** ${report.total_configs}\n\n`;

            // Count statuses
            const configs = Object.values(report.configs);
            const valid = configs.filter(c => c.valid).length;
            const errors = configs.filter(c => c.errors.length > 0).length;
            const warnings = configs.filter(c => c.warnings.length > 0).length;

            // Summary
            comment += '### Summary\n';
            comment += `✅ Valid: ${valid}\n`;
            comment += `❌ Errors: ${errors}\n`;
            comment += `⚠️ Warnings: ${warnings}\n\n`;

            // Details for problematic configs
            if (errors > 0 || warnings > 0) {
              comment += '### Issues Found\n';
              for (const [key, config] of Object.entries(report.configs)) {
                if (config.errors.length > 0 || config.warnings.length > 0) {
                  comment += `\n**${key}**\n`;
                  config.errors.forEach(e => comment += `- ❌ ${e}\n`);
                  config.warnings.forEach(w => comment += `- ⚠️ ${w}\n`);
                }
              }
            }

            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: comment
            });

  evaluate-configs:
    name: Evaluate AI Configs with Judge
    runs-on: ubuntu-latest
    if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
    # Require manual approval for external PRs to prevent secret exposure
    environment:
      name: ci

    steps:
      - name: Checkout code
        uses: actions/checkout@v3
        with:
          ref: ${{ github.event.pull_request.head.sha || github.ref }}

      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
          version: "latest"
          enable-cache: false  # Disable cache to ensure fresh test runs

      - name: Set up Python
        run: uv python install

      - name: Install dependencies
        run: |
          uv venv
          # Install dependencies without installing the package itself (to avoid dev dependency issues)
          uv pip install langchain langgraph langchain-anthropic fastapi "uvicorn[standard]" pydantic launchdarkly-server-sdk launchdarkly-server-sdk-ai numpy openai faiss-cpu PyMuPDF tiktoken streamlit requests python-dotenv PyYAML langchain-openai langchain-mcp-adapters beautifulsoup4 mcp semanticscholar rank-bm25 langchain-mistralai httpx boto3 langchain-aws
          # Force fresh clone by clearing all caches
          rm -rf ~/.cache/uv ~/.cache/pip /tmp/pip-* || true
          uv pip install --no-cache --force-reinstall git+https://x-access-token:${{ secrets.GH_PAT }}@github.com/launchdarkly-labs/scarlett_ai_configs_ci_cd-.git@02cb9b1

      - name: Validate required secrets
        run: |
          # Check LaunchDarkly secrets
          if [ -z "${{ secrets.LD_SDK_KEY }}" ]; then
            echo "::error::Missing required secret: LD_SDK_KEY"
            exit 1
          fi

          # Check at least one AI provider API key is set
          if [ -z "${{ secrets.OPENAI_API_KEY }}" ] && [ -z "${{ secrets.ANTHROPIC_API_KEY }}" ]; then
            echo "::error::At least one AI provider API key required: OPENAI_API_KEY or ANTHROPIC_API_KEY"
            exit 1
          fi

          echo "✅ Required secrets are configured"

      - name: Initialize vector embeddings
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          echo "📚 Initializing vector embeddings for search tools..."
          .venv/bin/python initialize_embeddings.py

      - name: Install MCP servers for research-enhanced variation
        run: |
          echo "📦 Installing MCP servers (ArXiv and Semantic Scholar)..."

          # Install ArXiv MCP server
          uv tool install arxiv-mcp-server

          # Clone and set up Semantic Scholar MCP server
          git clone https://github.com/JackKuo666/semanticscholar-MCP-Server.git

          echo "✅ MCP servers installed (available for research-enhanced variation)"

      - name: Start API server
        env:
          LD_SDK_KEY: ${{ secrets.LD_SDK_KEY }}
          LD_API_KEY: ${{ secrets.LD_API_KEY }}
          LD_PROJECT_KEY: ${{ secrets.LD_PROJECT_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
        run: |
          # Create .env file for search tools and API access
          # Strip any trailing whitespace/newlines from secrets
          echo "📝 Creating .env file for search tools and API access..."
          OPENAI_KEY=$(echo "$OPENAI_API_KEY" | tr -d '\n\r')
          ANTHROPIC_KEY=$(echo "$ANTHROPIC_API_KEY" | tr -d '\n\r')
          MISTRAL_KEY=$(echo "$MISTRAL_API_KEY" | tr -d '\n\r')
          LD_SDK=$(echo "$LD_SDK_KEY" | tr -d '\n\r')
          LD_API=$(echo "$LD_API_KEY" | tr -d '\n\r')
          LD_PROJECT=$(echo "$LD_PROJECT_KEY" | tr -d '\n\r')

          {
            echo "OPENAI_API_KEY=$OPENAI_KEY"
            echo "ANTHROPIC_API_KEY=$ANTHROPIC_KEY"
            echo "MISTRAL_API_KEY=$MISTRAL_KEY"
            echo "LD_SDK_KEY=$LD_SDK"
            echo "LD_API_KEY=$LD_API"
            echo "LD_PROJECT_KEY=$LD_PROJECT"
          } > .env
          echo "✅ Environment file created"

          # Verify API keys are set (show first 10 chars only for security)
          echo "🔍 Verifying API keys..."
          if [ -n "$OPENAI_KEY" ]; then
            echo "  OPENAI_API_KEY: ${OPENAI_KEY:0:10}... (${#OPENAI_KEY} chars)"
          else
            echo "  ⚠️  OPENAI_API_KEY is empty!"
          fi
          if [ -n "$ANTHROPIC_KEY" ]; then
            echo "  ANTHROPIC_API_KEY: ${ANTHROPIC_KEY:0:10}... (${#ANTHROPIC_KEY} chars)"
          else
            echo "  ⚠️  ANTHROPIC_API_KEY is empty!"
          fi

          echo "🚀 Starting FastAPI server in background..."
          # Export cleaned environment variables for the server process
          export OPENAI_API_KEY="$OPENAI_KEY"
          export ANTHROPIC_API_KEY="$ANTHROPIC_KEY"
          export MISTRAL_API_KEY="$MISTRAL_KEY"
          export LD_SDK_KEY="$LD_SDK"
          export LD_API_KEY="$LD_API"
          export LD_PROJECT_KEY="$LD_PROJECT"

          .venv/bin/uvicorn api.main:app --host 0.0.0.0 --port 8000 > /tmp/agents-demo-api.log 2>&1 &
          API_PID=$!
          echo $API_PID > api.pid
          echo "API server started with PID: $API_PID"

          # Wait for server to be ready
          echo "⏳ Waiting for API server to be ready..."
          for i in {1..30}; do
            if curl -s http://localhost:8000/health > /dev/null 2>&1; then
              echo "✅ API server is ready!"
              break
            fi
            if [ $i -eq 30 ]; then
              echo "❌ API server failed to start within 30 seconds"
              cat /tmp/agents-demo-api.log
              exit 1
            fi
            sleep 1
          done

      - name: Run tests with HTTP evaluator
        id: run-tests
        env:
          LD_CICD_SDK_KEY: ${{ secrets.LD_CICD_SDK_KEY }}
          LD_CICD_PROJECT_KEY: ${{ secrets.LD_CICD_PROJECT_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          PYTHONPATH: ${{ github.workspace }}
        run: |
          echo "🧪 Running AI Config test suite with HTTP evaluator (tests full multi-agent routing)..."
          echo "API server URL: http://localhost:8000"
          echo "Test data file: test_data/ai_config_evaluation.yaml"

          # PROOF OF EXECUTION: Show current timestamp
          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
          echo "🕐 TEST EXECUTION START TIME: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
          echo "🔍 Workflow Run ID: ${{ github.run_id }}"
          echo "📝 Commit SHA: ${{ github.sha }}"
          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

          # Delete any cached logs/reports from previous runs
          rm -rf logs/judge_evaluations/*.log logs/judge_evaluations/*.jsonl test-report.json
          echo "🧹 Cleared old test artifacts to ensure fresh run"

          # Run tests with HTTP evaluator (calls API endpoint to test full system)
          .venv/bin/ld-aic test \
            --config-keys "supervisor-agent,support-agent,security-agent" \
            --environment production \
            --evaluation-dataset test_data/ai_config_evaluation.yaml \
            --evaluator http \
            --api-url http://localhost:8000 \
            --endpoint /chat \
            --minimal-payload \
            --report test-report.json

          TEST_EXIT_CODE=$?

          # PROOF OF EXECUTION: Show completion timestamp and verify new logs were created
          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
          echo "🕐 TEST EXECUTION END TIME: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
          echo "📊 Verifying new test artifacts were created:"
          ls -lh test-report.json logs/judge_evaluations/ 2>/dev/null || echo "⚠️  Warning: Expected test artifacts not found!"
          echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

          exit $TEST_EXIT_CODE

      - name: Stop API server
        if: always()
        run: |
          if [ -f api.pid ]; then
            API_PID=$(cat api.pid)
            echo "🛑 Stopping API server (PID: $API_PID)..."
            kill $API_PID || true
            rm api.pid
          fi

      - name: Summarize test failures
        if: failure()
        run: |
          echo "📊 Generating human-readable failure summary..."
          .venv/bin/python tools/summarize_test_failures.py || true

          echo ""
          echo "═══════════════════════════════════════════════════════════════════"
          echo "📋 API SERVER LOGS (last 100 lines)"
          echo "═══════════════════════════════════════════════════════════════════"
          if [ -f /tmp/agents-demo-api.log ]; then
            tail -n 100 /tmp/agents-demo-api.log
          else
            echo "⚠️  API log file not found at /tmp/agents-demo-api.log"
          fi

      - name: Cleanup secrets
        if: always()
        run: |
          # Remove .env file containing secrets
          rm -f .env
          echo "🧹 Cleaned up .env file"

      - name: Upload test report
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-report
          path: test-report.json

      - name: Upload judge evaluation logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: judge-evaluation-logs
          path: logs/judge_evaluations/**

      - name: Upload API server logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: api-server-logs
          path: /tmp/agents-demo-api.log
          if-no-files-found: warn

  sync-production:
    name: Sync Production Configs
    runs-on: ubuntu-latest
    # CONFIGURATION: Change 'main' to your production branch name (e.g., 'production', 'master', 'release')
    # This job creates drift detection PRs when LaunchDarkly production configs change
    # Only runs on pushes to your canonical production branch
    if: github.ref == 'refs/heads/main' && github.event_name == 'push'

    steps:
      - name: Checkout code
        uses: actions/checkout@v3

      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
          version: "latest"
          enable-cache: false  # Disable cache to ensure fresh test runs

      - name: Set up Python
        run: uv python install

      - name: Install dependencies
        run: |
          uv venv
          # Install dependencies without installing the package itself (to avoid dev dependency issues)
          uv pip install langchain langgraph langchain-anthropic fastapi "uvicorn[standard]" pydantic launchdarkly-server-sdk launchdarkly-server-sdk-ai numpy openai faiss-cpu PyMuPDF tiktoken streamlit requests python-dotenv PyYAML langchain-openai langchain-mcp-adapters beautifulsoup4 mcp semanticscholar rank-bm25 langchain-mistralai httpx boto3 langchain-aws
          # Force fresh clone by clearing all caches
          rm -rf ~/.cache/uv ~/.cache/pip /tmp/pip-* || true
          uv pip install --no-cache --force-reinstall git+https://x-access-token:${{ secrets.GH_PAT }}@github.com/launchdarkly-labs/scarlett_ai_configs_ci_cd-.git@02cb9b1

      - name: Validate required secrets
        run: |
          if [ -z "${{ secrets.LD_API_KEY }}" ]; then
            echo "::error::Missing required secret: LD_API_KEY"
            exit 1
          fi
          if [ -z "${{ secrets.LD_PROJECT_KEY }}" ]; then
            echo "::error::Missing required secret: LD_PROJECT_KEY"
            exit 1
          fi
          echo "✅ Required secrets are configured"

      - name: Sync production configs
        env:
          LD_SDK_KEY: ${{ secrets.LD_SDK_KEY }}
          LD_API_KEY: ${{ secrets.LD_API_KEY }}
          LD_PROJECT_KEY: ${{ secrets.LD_PROJECT_KEY }}
        run: |
          echo "🔄 Syncing AI configs from LaunchDarkly production..."
          mkdir -p configs
          .venv/bin/ld-aic sync \
            --environment production \
            --output-dir configs \
            --format json \
            --generate-module

      - name: Check for drift
        run: |
          # Check if there are changes to commit
          git diff --exit-code configs/ || echo "DRIFT_DETECTED=true" >> $GITHUB_ENV

      - name: Create PR for config updates
        if: env.DRIFT_DETECTED == 'true'
        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: 'Sync AI configs from production'
          title: 'Sync AI Configs from Production'
          body: |
            ## 🔄 Production Config Sync

            This PR updates the local AI config defaults to match production.

            ### AI Configs Synced
            - `supervisor-agent` - Multi-agent workflow orchestration
            - `support-agent` - RAG + MCP research capabilities
            - `security-agent` - PII detection and compliance

            ### Changes
            - Updated config snapshots in `configs/`
            - Regenerated `configs/production_defaults.py`

            Please review the changes to ensure they are expected.
          branch: sync/production-configs
          delete-branch: true