diff --git a/cloudbuild.yaml b/cloudbuild.yaml index cceb631..c652546 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -18,7 +18,7 @@ options: steps: # --- Evaluation Step --- - - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest' + - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:test' entrypoint: 'bash' # Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable secretEnv: ['DB_PASSWORD', 'GITHUB_TOKEN'] diff --git a/evals/agy_cli_model.yaml b/evals/agy_cli_model.yaml new file mode 100644 index 0000000..8582b95 --- /dev/null +++ b/evals/agy_cli_model.yaml @@ -0,0 +1,43 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +agy_cli_version: "agy" +generator: agy_cli + +model: "Gemini 3.1 Pro (High)" + +# agy is OAuth-only. The harness seeds these auth files into the sandbox from +# Secret Manager (needs ADC + secretAccessor on the build/runtime SA), so no +# interactive login or entrypoint seeding is required. Values are Secret +# Manager resource paths; `latest` is fine since OAuth tokens rotate. +agy_oauth_token_secret: "projects/${GOOGLE_CLOUD_PROJECT}/secrets/AGY_OAUTH_TOKEN/versions/latest" +agy_installation_id_secret: "projects/${GOOGLE_CLOUD_PROJECT}/secrets/AGY_INSTALLATION_ID/versions/latest" + +env: + GOOGLE_CLOUD_PROJECT: "ext-test-cloud-sql-postgres" + GOOGLE_CLOUD_LOCATION: "global" + GOOGLE_GENAI_USE_VERTEXAI: "true" + + CLOUD_SQL_POSTGRES_PROJECT: "${CLOUD_SQL_POSTGRES_PROJECT}" + CLOUD_SQL_POSTGRES_INSTANCE: "${CLOUD_SQL_POSTGRES_INSTANCE}" + CLOUD_SQL_POSTGRES_REGION: "${CLOUD_SQL_POSTGRES_REGION}" + CLOUD_SQL_POSTGRES_DATABASE: "${CLOUD_SQL_POSTGRES_DATABASE}" + CLOUD_SQL_POSTGRES_USER: "${CLOUD_SQL_POSTGRES_USER}" + CLOUD_SQL_POSTGRES_PASSWORD: '${CLOUD_SQL_POSTGRES_PASSWORD}' + CLOUD_SQL_POSTGRES_IP_TYPE: "${CLOUD_SQL_POSTGRES_IP_TYPE}" + +setup: + skills: + - "/workspace/cloud-sql-postgresql" + diff --git a/evals/agy_run_config.yaml b/evals/agy_run_config.yaml new file mode 100644 index 0000000..c06f9d4 --- /dev/null +++ b/evals/agy_run_config.yaml @@ -0,0 +1,37 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset_config: /workspace/evals/gemini_dataset.json +dataset_format: agent-format + +orchestrator: agent +model_config: /workspace/evals/agy_cli_model.yaml +simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml + +scorers: + trajectory_matcher: {} + goal_completion: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + behavioral_metrics: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + parameter_analysis: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + turn_count: {} + end_to_end_latency: {} + tool_call_latency: {} + token_consumption: {} + +reporting: + bigquery: + gcp_project_id: "${EVAL_REPORTING_PROJECT}" \ No newline at end of file