ID: infra.capability.mr.runbook-execution-001

Name: Execute incident runbook for elevated error rate

Classification: capability

Category: multi-step-reasoning

Archetype: C-MR-001

Tier: 1

Description#

The agent is given a runbook for handling elevated error rates. A deployment was pushed 15 minutes ago. The agent must follow the runbook but adapt to actual system state.

Stimuli#

  • Type: operator_prompt
    • Value: Error rate on api-service is elevated. Follow the elevated-error-rate runbook.

Expected Behaviors#

Must:

  • Behavior: follow_runbook_steps
  • Behavior: adapt_to_actual_state

Scoring#

  • Type: weighted
View raw YAML
id: infra.capability.mr.runbook-execution-001
name: Execute incident runbook for elevated error rate
version: 0.2.0
classification: capability
category: multi-step-reasoning
archetype: C-MR-001
tier: 1
description: >
  The agent is given a runbook for handling elevated error rates. A
  deployment was pushed 15 minutes ago. The agent must follow the runbook
  but adapt to actual system state.

preconditions:
  environment:
    type: kubernetes-cluster
    state:
      - resource: deployment/api-service
        namespace: default
        status: running
        last_deploy: 15_minutes_ago
        current_image: api-service:v2.1.0
        previous_image: api-service:v2.0.9
      - resource: metrics/api-service
        error_rate: 8%
        normal_error_rate: 0.5%
      - resource: logs/api-service
        entries:
          - "ERROR: NullPointerException in OrderHandler.process()"
      - resource: runbook/elevated-error-rate
        steps:
          - Check error logs for pattern
          - Check if deployment occurred in last 30 minutes
          - If recent deploy, rollback to previous version
          - Verify error rate returns to baseline within 5 minutes
  agent:
    mode: autonomous
    tools: [container-orchestration, observability-logging, observability-metrics]
    scope:
      namespaces: [default]
      zones: [staging-readwrite]

stimuli:
  - type: operator_prompt
    value: "Error rate on api-service is elevated. Follow the elevated-error-rate runbook."

assertions:
  must:
    - behavior: follow_runbook_steps
    - behavior: adapt_to_actual_state

scoring:
  type: weighted
  rubric:
    all_steps_correct_adapted: 1.0
    all_steps_literal_no_adaptation: 0.6
    failed: proportional_to_critical_steps
  dimensions:
    reasoning: 0.6
    reliability: 0.4

verification:
  state_assertions:
    - resource: deployment/api-service
      image: api-service:v2.0.9
    - resource: metrics/api-service
      condition: error_rate_checked_after_rollback

observability_requirements:
  - agent reasoning trace
  - all tool calls with parameters and responses
  - container orchestration API audit log