|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +# Released under the MIT License. |
| 4 | +# Copyright, 2026, by Samuel Williams. |
| 5 | + |
| 6 | +require "async/container/policy" |
| 7 | + |
| 8 | +module Async |
| 9 | + module Service |
| 10 | + # A service-level policy that extends the base container policy with failure rate monitoring. |
| 11 | + # This policy will stop the container if the failure rate exceeds a threshold. |
| 12 | + class Policy < Async::Container::Policy |
| 13 | + # Create a policy from maximum failures and time window. |
| 14 | + # @parameter maximum_failures [Integer] The maximum number of failures allowed within the window. |
| 15 | + # @parameter window [Integer] The time window in seconds for counting failures. |
| 16 | + # @returns [Policy] A new policy instance. |
| 17 | + def self.for(maximum_failures: 1, window: 10) |
| 18 | + failure_rate_threshold = maximum_failures.to_f / window |
| 19 | + self.new(failure_rate_threshold) |
| 20 | + end |
| 21 | + |
| 22 | + # Initialize the policy. |
| 23 | + # @parameter failure_rate_threshold [Float] The maximum failures per second before stopping the container. |
| 24 | + def initialize(failure_rate_threshold) |
| 25 | + @failure_rate_threshold = failure_rate_threshold |
| 26 | + end |
| 27 | + |
| 28 | + # The failure rate threshold in failures per second. |
| 29 | + # @attribute [Float] |
| 30 | + attr :failure_rate_threshold |
| 31 | + |
| 32 | + # Called when a child exits. Monitors failure rate and stops the container if threshold is exceeded. |
| 33 | + # @parameter container [Async::Container::Generic] The container. |
| 34 | + # @parameter child [Child] The child process. |
| 35 | + # @parameter status [Process::Status] The exit status. |
| 36 | + # @parameter name [String] The name of the child. |
| 37 | + # @parameter key [Symbol] An optional key for the child. |
| 38 | + # @parameter options [Hash] Additional options for future extensibility. |
| 39 | + def child_exit(container, child, status, name:, key:, **options) |
| 40 | + unless success?(status) |
| 41 | + # Check failure rate after this failure is recorded |
| 42 | + rate = container.statistics.failure_rate.per_second |
| 43 | + |
| 44 | + if rate > @failure_rate_threshold |
| 45 | + # Only stop if container is still running (avoid redundant stop calls during shutdown) |
| 46 | + if container.running? |
| 47 | + Console.error(self, "Failure rate exceeded threshold, stopping container!", |
| 48 | + rate: rate, |
| 49 | + threshold: @failure_rate_threshold |
| 50 | + ) |
| 51 | + container.stop(true) |
| 52 | + end |
| 53 | + end |
| 54 | + end |
| 55 | + end |
| 56 | + |
| 57 | + # The default service policy instance. |
| 58 | + DEFAULT = self.for.freeze |
| 59 | + end |
| 60 | + end |
| 61 | +end |
0 commit comments