Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions evals/claude-code/eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,46 @@ config:
toolPattern: ".*"
minToolCalls: 1
maxToolCalls: 20
- glob: ../tasks/kubevirt/*/*.yaml
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah sorry this was a mistake, this double runs everything, somehow missed this in my local test run.

labelSelector:
expected-tool: vm_create
assertions:
toolsUsed:
- server: kubernetes
tool: vm_create
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: vm_clone
assertions:
toolsUsed:
- server: kubernetes
tool: vm_clone
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: vm_lifecycle
assertions:
toolsUsed:
- server: kubernetes
tool: vm_lifecycle
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: resources_delete
assertions:
toolsUsed:
- server: kubernetes
tool: resources_delete
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: resources_create_or_update
assertions:
toolsUsed:
- server: kubernetes
tool: resources_create_or_update
- path: ../tasks/kubevirt/troubleshoot-vm/task.yaml
assertions:
promptsUsed:
- server: kubernetes
prompt: vm-troubleshoot
# Kiali tasks
- glob: ../tasks/*/*/*.yaml
labelSelector:
Expand Down
40 changes: 40 additions & 0 deletions evals/openai-agent/eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,46 @@ config:
toolPattern: ".*"
minToolCalls: 1
maxToolCalls: 20
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: vm_create
assertions:
toolsUsed:
- server: kubernetes
tool: vm_create
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: vm_clone
assertions:
toolsUsed:
- server: kubernetes
tool: vm_clone
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: vm_lifecycle
assertions:
toolsUsed:
- server: kubernetes
tool: vm_lifecycle
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: resources_delete
assertions:
toolsUsed:
- server: kubernetes
tool: resources_delete
- glob: ../tasks/kubevirt/*/*.yaml
labelSelector:
expected-tool: resources_create_or_update
assertions:
toolsUsed:
- server: kubernetes
tool: resources_create_or_update
- path: ../tasks/kubevirt/troubleshoot-vm/task.yaml
assertions:
promptsUsed:
- server: kubernetes
prompt: vm-troubleshoot
# Kiali tasks
- glob: ../tasks/*/*/*.yaml
labelSelector:
Expand Down
18 changes: 0 additions & 18 deletions evals/tasks/kubevirt/claude-code/eval.yaml

This file was deleted.

19 changes: 10 additions & 9 deletions evals/tasks/kubevirt/clone-vm/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
suite: kubevirt
requires: kubevirt
expected-tool: vm_clone
name: "clone-vm"
difficulty: medium
spec:
Expand All @@ -15,19 +16,19 @@ spec:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-clone
ignoreNotFound: true
- k8s.create:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-clone
- k8s.create:
apiVersion: kubevirt.io/v1
kind: VirtualMachine
metadata:
name: source-vm
namespace: vm-test
namespace: kvt-clone
labels:
app: source
spec:
Expand All @@ -48,7 +49,7 @@ spec:
inline: |-
#!/usr/bin/env bash
set -e
NS="vm-test"
NS="kvt-clone"
SOURCE_VM="source-vm"
TARGET_VM="cloned-vm"

Expand Down Expand Up @@ -83,27 +84,27 @@ spec:
- script:
inline: |-
#!/usr/bin/env bash
kubectl delete virtualmachineclone --all -n vm-test --ignore-not-found=true 2>/dev/null || true
kubectl delete virtualmachineclone --all -n kvt-clone --ignore-not-found=true 2>/dev/null || true
- k8s.delete:
apiVersion: kubevirt.io/v1
kind: VirtualMachine
metadata:
name: cloned-vm
namespace: vm-test
namespace: kvt-clone
ignoreNotFound: true
- k8s.delete:
apiVersion: kubevirt.io/v1
kind: VirtualMachine
metadata:
name: source-vm
namespace: vm-test
namespace: kvt-clone
ignoreNotFound: true
- k8s.delete:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-clone
ignoreNotFound: true
prompt:
inline: |
Clone the virtual machine named source-vm in the vm-test namespace to a new virtual machine named cloned-vm.
Clone the virtual machine named source-vm in the kvt-clone namespace to a new virtual machine named cloned-vm.
13 changes: 7 additions & 6 deletions evals/tasks/kubevirt/create-vm-basic/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
suite: kubevirt
requires: kubevirt
expected-tool: vm_create
name: "create-basic-vm"
difficulty: easy
spec:
Expand All @@ -15,19 +16,19 @@ spec:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-basic
ignoreNotFound: true
- k8s.create:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-basic
verify:
- script:
inline: |-
#!/usr/bin/env bash
source ../helpers/verify-vm.sh
NS="vm-test"
NS="kvt-create-basic"

verify_vm_exists "test-vm" "$NS" || exit 1
verify_container_disk "test-vm" "$NS" "fedora" || exit 1
Expand All @@ -42,13 +43,13 @@ spec:
kind: VirtualMachine
metadata:
name: test-vm
namespace: vm-test
namespace: kvt-create-basic
ignoreNotFound: true
- k8s.delete:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-basic
ignoreNotFound: true
prompt:
inline: Create a Fedora virtual machine named test-vm in the vm-test namespace.
inline: Create a Fedora virtual machine named test-vm in the kvt-create-basic namespace.
13 changes: 7 additions & 6 deletions evals/tasks/kubevirt/create-vm-ubuntu/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
suite: kubevirt
requires: kubevirt
expected-tool: vm_create
name: "create-ubuntu-vm"
difficulty: easy
spec:
Expand All @@ -15,19 +16,19 @@ spec:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-ubuntu
ignoreNotFound: true
- k8s.create:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-ubuntu
verify:
- script:
inline: |-
#!/usr/bin/env bash
source ../helpers/verify-vm.sh
NS="vm-test"
NS="kvt-create-ubuntu"

verify_vm_exists "ubuntu-vm" "$NS" || exit 1
verify_container_disk "ubuntu-vm" "$NS" "ubuntu" || exit 1
Expand All @@ -42,13 +43,13 @@ spec:
kind: VirtualMachine
metadata:
name: ubuntu-vm
namespace: vm-test
namespace: kvt-create-ubuntu
ignoreNotFound: true
- k8s.delete:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-ubuntu
ignoreNotFound: true
prompt:
inline: Create an Ubuntu virtual machine named ubuntu-vm in the vm-test namespace.
inline: Create an Ubuntu virtual machine named ubuntu-vm in the kvt-create-ubuntu namespace.
13 changes: 7 additions & 6 deletions evals/tasks/kubevirt/create-vm-with-instancetype/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
suite: kubevirt
requires: kubevirt
expected-tool: vm_create
name: "create-vm-with-instancetype"
difficulty: easy
spec:
Expand All @@ -15,19 +16,19 @@ spec:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-instancetype
ignoreNotFound: true
- k8s.create:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-instancetype
verify:
- script:
inline: |-
#!/usr/bin/env bash
source ../helpers/verify-vm.sh
NS="vm-test"
NS="kvt-create-instancetype"

verify_vm_exists "test-vm-instancetype" "$NS" || exit 1
verify_instancetype "test-vm-instancetype" "$NS" "u1.medium" || exit 1
Expand All @@ -44,13 +45,13 @@ spec:
kind: VirtualMachine
metadata:
name: test-vm-instancetype
namespace: vm-test
namespace: kvt-create-instancetype
ignoreNotFound: true
- k8s.delete:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-instancetype
ignoreNotFound: true
prompt:
inline: Create a Fedora virtual machine named test-vm-instancetype in the vm-test namespace with instancetype 'u1.medium'.
inline: Create a Fedora virtual machine named test-vm-instancetype in the kvt-create-instancetype namespace with instancetype 'u1.medium'.
13 changes: 7 additions & 6 deletions evals/tasks/kubevirt/create-vm-with-size/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
suite: kubevirt
requires: kubevirt
expected-tool: vm_create
name: "create-vm-with-size"
difficulty: easy
spec:
Expand All @@ -15,19 +16,19 @@ spec:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-size
ignoreNotFound: true
- k8s.create:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-size
verify:
- script:
inline: |-
#!/usr/bin/env bash
source ../helpers/verify-vm.sh
NS="vm-test"
NS="kvt-create-size"

verify_vm_exists "test-vm-size" "$NS" || exit 1
verify_has_resources_or_instancetype "test-vm-size" "$NS" || exit 1
Expand All @@ -44,13 +45,13 @@ spec:
kind: VirtualMachine
metadata:
name: test-vm-size
namespace: vm-test
namespace: kvt-create-size
ignoreNotFound: true
- k8s.delete:
apiVersion: v1
kind: Namespace
metadata:
name: vm-test
name: kvt-create-size
ignoreNotFound: true
prompt:
inline: Create a Fedora virtual machine named test-vm-size in the vm-test namespace with size 'large'
inline: Create a Fedora virtual machine named test-vm-size in the kvt-create-size namespace with size 'large'
Loading
Loading