Skip to content

AWS Fargate Benchmarks #10

AWS Fargate Benchmarks

AWS Fargate Benchmarks #10

name: AWS Fargate Benchmarks
permissions:
contents: write
packages: read
pull-requests: write
on:
workflow_dispatch:
inputs:
debug:
description: 'Enable debug output'
required: false
default: 'true'
type: boolean
fargate_cpu:
description: 'Fargate CPU units (1024=1vCPU, 2048=2vCPU, 4096=4vCPU, 8192=8vCPU, 16384=16vCPU)'
required: false
default: '8192'
type: choice
options:
- '2048'
- '4096'
- '8192'
- '16384'
fargate_memory:
description: 'Fargate memory in MB (must be compatible with CPU)'
required: false
default: '16384'
type: choice
options:
- '4096'
- '8192'
- '16384'
- '32768'
- '61440'
- '122880'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}/test-runner
jobs:
# Job 1: Check and prepare benchmark image
check-image:
name: Check Image Cache
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
outputs:
image-tag: ${{ steps.compute-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Compute image tag from flake files
id: compute-tag
run: |
FLAKE_HASH=$(cat tests/nix/flake.nix tests/nix/flake.lock | sha256sum | cut -c1-16)
echo "tag=flake-${FLAKE_HASH}" >> $GITHUB_OUTPUT
echo "Computed image tag: flake-${FLAKE_HASH}"
# Job 2: Build and push to ECR
build-and-push-to-ecr:
name: Build and Push to ECR
needs: check-image
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
outputs:
ecr-image-tag: ${{ steps.push-ecr.outputs.image-tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Log in to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push benchmark image
id: push-ecr
run: |
GHCR_IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.check-image.outputs.image-tag }}"
echo "Using base image from GHCR: ${GHCR_IMAGE}"
# Create a Dockerfile that includes the source code.
# The base image (test-runner) contains the environment but not the code.
cat > Dockerfile.bench <<EOF
FROM ${GHCR_IMAGE}
WORKDIR /workspace
COPY . .
RUN chmod +x benches/*.sh
EOF
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
REPO_URL="${{ secrets.ECR_REPOSITORY }}"
ECR_IMAGE="${REPO_URL}:bench-${TIMESTAMP}"
ECR_LATEST="${REPO_URL}:latest"
echo "Building benchmark image: ${ECR_IMAGE}"
docker build -t "${ECR_IMAGE}" -t "${ECR_LATEST}" -f Dockerfile.bench .
echo "Pushing to ECR..."
docker push "${ECR_IMAGE}"
docker push "${ECR_LATEST}"
# Output only the tag part (timestamp), not full URI with secret
echo "image-tag=bench-${TIMESTAMP}" >> $GITHUB_OUTPUT
echo "✅ Successfully built and pushed to ECR: ${ECR_IMAGE}"
# Job 3: Register ECS task definition
register-task-definition:
name: Register ECS Task Definition
needs: [check-image, build-and-push-to-ecr]
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
status: ${{ steps.register-step.outputs.status }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Register task definition
id: register-step
run: |
# Reconstruct image URI from secret + tag
REPO_URL="${{ secrets.ECR_REPOSITORY }}"
IMAGE_TAG="${{ needs.build-and-push-to-ecr.outputs.ecr-image-tag }}"
IMAGE_URI="${REPO_URL}:${IMAGE_TAG}"
TASK_FAMILY="pg-doorman-bench-task"
echo "Constructing task definition for image: ${IMAGE_URI}"
if [ -z "${REPO_URL}" ]; then
echo "❌ Error: ECR_REPOSITORY secret is empty"
exit 1
fi
if [ -z "${IMAGE_TAG}" ]; then
echo "❌ Error: ecr-image-tag output from previous job is empty"
exit 1
fi
# Clean secret from potential newlines/spaces
BENCHER_TOKEN=$(echo "${{ secrets.BENCHER_API_TOKEN }}" | tr -d '\r\n ')
# Use jq to safely generate JSON (handles special characters and newlines in secrets)
jq -n \
--arg family "$TASK_FAMILY" \
--arg image "$IMAGE_URI" \
--arg cpu "${{ inputs.fargate_cpu }}" \
--arg memory "${{ inputs.fargate_memory }}" \
--arg execRole "${{ secrets.ECS_TASK_EXECUTION_ROLE_ARN }}" \
--arg taskRole "${{ secrets.ECS_TASK_ROLE_ARN }}" \
--arg logGroup "/ecs/${{ secrets.ECS_CLUSTER_NAME }}" \
--arg region "${{ secrets.AWS_REGION }}" \
--arg debug "${{ inputs.debug }}" \
--arg bencherToken "$BENCHER_TOKEN" \
--arg bencherBranch "${{ github.ref_name }}" \
'{
family: $family,
networkMode: "awsvpc",
requiresCompatibilities: ["FARGATE"],
cpu: $cpu,
memory: $memory,
executionRoleArn: $execRole,
taskRoleArn: $taskRole,
containerDefinitions: [
{
name: "benchmark-container",
image: $image,
essential: true,
command: ["bash", "benches/run-bench-and-upload.sh"],
logConfiguration: {
logDriver: "awslogs",
options: {
"awslogs-group": $logGroup,
"awslogs-region": $region,
"awslogs-stream-prefix": "benchmark"
}
},
environment: [
{ name: "DEBUG", value: $debug },
{ name: "BENCHER_API_TOKEN", value: $bencherToken },
{ name: "BENCHER_PROJECT", value: "pg-doorman" },
{ name: "BENCHER_BRANCH", value: $bencherBranch },
{ name: "BENCHER_TESTBED", value: "aws-fargate" },
{ name: "JEMALLOC_SYS_WITH_MALLOC_CONF", value: "dirty_decay_ms:30000,muzzy_decay_ms:30000,background_thread:true,metadata_thp:auto" }
]
}
]
}' > task-definition.json
echo "Registering task definition..."
aws ecs register-task-definition \
--cli-input-json file://task-definition.json > register-result.json
TASK_DEF_ARN=$(jq -r '.taskDefinition.taskDefinitionArn' register-result.json)
if [ -z "${TASK_DEF_ARN}" ] || [ "${TASK_DEF_ARN}" == "null" ]; then
echo "❌ Error: Failed to extract Task Definition ARN from response"
cat register-result.json
exit 1
fi
# Output to a file for the next job to pick up as an artifact
# This avoids the "Skip output since it may contain secret" restriction for ARNs
echo "${TASK_DEF_ARN}" > task_arn.txt
# Also set a "safe" output just for job status
echo "status=success" >> $GITHUB_OUTPUT
echo "✅ Task definition registered: ${TASK_DEF_ARN}"
- name: Upload Task ARN
uses: actions/upload-artifact@v4
with:
name: task-arn
path: task_arn.txt
retention-days: 1
# Job 4: Run benchmark on Fargate
run-benchmark:
name: Run Benchmark on Fargate
needs: [check-image, build-and-push-to-ecr, register-task-definition]
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Download Task ARN
uses: actions/download-artifact@v4
with:
name: task-arn
- name: Run Fargate task
id: run-task
run: |
TASK_DEF_ARN=$(cat task_arn.txt)
CLUSTER="${{ secrets.ECS_CLUSTER_NAME }}"
SUBNETS="${{ secrets.ECS_SUBNET_IDS }}"
SECURITY_GROUP="${{ secrets.ECS_SECURITY_GROUP_ID }}"
echo "Starting Fargate task..."
echo "Cluster: ${CLUSTER}"
echo "Task Definition: ${TASK_DEF_ARN}"
if [ -z "${TASK_DEF_ARN}" ]; then
echo "❌ Error: Task Definition ARN is blank."
exit 1
fi
TASK_ARN=$(aws ecs run-task \
--cluster "${CLUSTER}" \
--task-definition "${TASK_DEF_ARN}" \
--launch-type FARGATE \
--network-configuration "awsvpcConfiguration={subnets=[${SUBNETS}],securityGroups=[${SECURITY_GROUP}],assignPublicIp=ENABLED}" \
--query 'tasks[0].taskArn' \
--output text)
if [ -z "${TASK_ARN}" ] || [ "${TASK_ARN}" == "None" ]; then
echo "❌ Failed to start task. Full response from AWS:"
aws ecs run-task \
--cluster "${CLUSTER}" \
--task-definition "${TASK_DEF_ARN}" \
--launch-type FARGATE \
--network-configuration "awsvpcConfiguration={subnets=[${SUBNETS}],securityGroups=[${SECURITY_GROUP}],assignPublicIp=ENABLED}"
exit 1
fi
echo "task-arn=${TASK_ARN}" >> $GITHUB_OUTPUT
echo "✅ Task started: ${TASK_ARN}"
echo "### 🚀 Fargate Task Started" >> $GITHUB_STEP_SUMMARY
echo "- **Task ARN:** \`${TASK_ARN}\`" >> $GITHUB_STEP_SUMMARY
echo "- **Cluster:** \`${CLUSTER}\`" >> $GITHUB_STEP_SUMMARY
echo "- **Region:** \`${{ secrets.AWS_REGION }}\`" >> $GITHUB_STEP_SUMMARY
# Wait for task to complete with a custom loop (longer timeout)
echo "Waiting for task to complete..."
MAX_ATTEMPTS=240 # 240 * 30s = 120 minutes
ATTEMPT=0
while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
STATUS=$(aws ecs describe-tasks \
--cluster "${CLUSTER}" \
--tasks "${TASK_ARN}" \
--query 'tasks[0].lastStatus' \
--output text)
echo "Attempt $((ATTEMPT+1))/${MAX_ATTEMPTS}: Task status is ${STATUS}"
if [ "${STATUS}" == "STOPPED" ]; then
echo "✅ Task has stopped."
break
fi
if [ "${STATUS}" == "None" ]; then
echo "❌ Error: Task not found."
exit 1
fi
sleep 30
ATTEMPT=$((ATTEMPT+1))
done
if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then
echo "❌ Error: Timeout waiting for task to complete after 60 minutes."
# Try to stop the task if it's still running
aws ecs stop-task --cluster "${CLUSTER}" --task "${TASK_ARN}" --reason "Timeout in GitHub Actions" || true
exit 1
fi
# Check task exit code
EXIT_CODE=$(aws ecs describe-tasks \
--cluster "${CLUSTER}" \
--tasks "${TASK_ARN}" \
--query 'tasks[0].containers[0].exitCode' \
--output text)
echo "Task exit code: ${EXIT_CODE}"
if [ "${EXIT_CODE}" != "0" ]; then
echo "❌ Task failed with exit code: ${EXIT_CODE}"
exit 1
fi
echo "✅ Task completed successfully"
- name: Download and extract benchmark results
run: |
CLUSTER="${{ secrets.ECS_CLUSTER_NAME }}"
LOG_GROUP="/ecs/${CLUSTER}"
TASK_ID=$(echo "${{ steps.run-task.outputs.task-arn }}" | awk -F/ '{print $NF}')
LOG_STREAM="benchmark/${TASK_ID}"
echo "Fetching logs from ${LOG_GROUP}/${LOG_STREAM}"
mkdir -p benchmark-results
# Wait a bit for logs to be available
sleep 5
# Download logs
aws logs get-log-events \
--log-group-name "${LOG_GROUP}" \
--log-stream-name "${LOG_STREAM}" \
--output text > benchmark-results/cloudwatch-raw.txt || true
echo "Extracting benchmark results..."
if [ -f benchmark-results/cloudwatch-raw.txt ]; then
# Extract just the messages (column 4 onwards)
awk '{$1=$2=$3=""; print substr($0,4)}' benchmark-results/cloudwatch-raw.txt > benchmark-results/stdout.txt
# Extract base64 encoded benchmark file
sed -n '/===BEGIN_BENCHMARK_RESULTS===/,/===END_BENCHMARK_RESULTS===/p' benchmark-results/stdout.txt | \
grep -v "===BEGIN_BENCHMARK_RESULTS===" | \
grep -v "===END_BENCHMARK_RESULTS===" | \
base64 -d > documentation/docs/benchmarks.md
if [ -f documentation/docs/benchmarks.md ] && [ -s documentation/docs/benchmarks.md ]; then
echo "✅ Benchmark results extracted successfully"
echo "File size: $(wc -c < documentation/docs/benchmarks.md) bytes"
else
echo "❌ Failed to extract benchmark results"
exit 1
fi
else
echo "❌ No logs found"
exit 1
fi
ls -lah benchmark-results/
ls -lah documentation/docs/benchmarks.md
- name: Upload benchmark artifacts
uses: actions/upload-artifact@v4
with:
name: fargate-benchmark-results-${{ github.run_number }}
path: benchmark-results/
retention-days: 30
- name: Display benchmark summary
run: |
echo "=== Benchmark Execution Summary ===" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**ECS Cluster:** ${{ secrets.ECS_CLUSTER_NAME }}" >> $GITHUB_STEP_SUMMARY
echo "**Task:** ${{ steps.run-task.outputs.task-arn }}" >> $GITHUB_STEP_SUMMARY
echo "**Image Tag:** ${{ needs.build-and-push-to-ecr.outputs.ecr-image-tag }}" >> $GITHUB_STEP_SUMMARY
echo "**Region:** ${{ secrets.AWS_REGION }}" >> $GITHUB_STEP_SUMMARY
echo "**CPU:** ${{ inputs.fargate_cpu }} units ($(((${{ inputs.fargate_cpu }})/1024)) vCPU)" >> $GITHUB_STEP_SUMMARY
echo "**Memory:** ${{ inputs.fargate_memory }} MB ($(((${{ inputs.fargate_memory }})/1024)) GB)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f benchmark-results/stdout.txt ]; then
echo "### Benchmark Output (Last 100 lines)" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
tail -100 benchmark-results/stdout.txt >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
fi
- name: Create Pull Request with results
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
# Create a unique branch name with timestamp
BRANCH_NAME="fargate-benchmark-results-$(date +%Y%m%d-%H%M%S)"
# Fetch latest master and create new branch from it
git fetch origin master
git checkout -b "$BRANCH_NAME" origin/master
# benchmarks.md is already extracted in previous step
# Check if there are any changes to commit
git add documentation/docs/benchmarks.md
if git diff --staged --quiet; then
echo "No changes to benchmark results, skipping PR creation"
exit 0
fi
# Commit and push
git commit -m "Update AWS Fargate benchmark results [skip ci]"
git push origin "$BRANCH_NAME"
# Create Pull Request to master
gh pr create \
--title "Update AWS Fargate benchmark results" \
--body "Automated benchmark results from AWS Fargate. This PR updates the benchmark comparison table in documentation/docs/benchmarks.md." \
--base "master" \
--head "$BRANCH_NAME"