|
1 | 1 | use std::future::Future; |
2 | 2 | use std::time::Duration; |
3 | 3 |
|
| 4 | +/// Retry/backoff behavior summary: |
| 5 | +/// |
| 6 | +/// This module retries transient failures using exponential backoff, but permanent failures stop immediately. |
| 7 | +/// |
| 8 | +/// Backoff algorithm: |
| 9 | +/// - Starts with `delay = ETHEREUM_CALL_MIN_RETRY_DELAY` |
| 10 | +/// - After each transient failure, sleeps for `delay` and then updates it following the next formula: |
| 11 | +/// delay = min(delay * ETHEREUM_CALL_BACKOFF_FACTOR, ETHEREUM_CALL_MAX_RETRY_DELAY) |
| 12 | +/// - Stops retrying when the number of attempts exceed the `ETHEREUM_CALL_MAX_RETRIES` constant |
| 13 | +/// |
| 14 | +/// About the retries limit: In the current implementation `attempt` starts at 0 and we stop when |
| 15 | +/// `attempt >= max_times`, incrementing `attempt` after sleeping. That means the code can perform |
| 16 | +/// max_times + 1 sleeps/retries. With the current constant value (10), that is 11 backoff intervals. |
| 17 | +/// |
| 18 | +/// Delay schedule with current config |
| 19 | +/// (start = 500ms, factor = 2.0, max delay = 60s, max_times = 10): |
| 20 | +/// |
| 21 | +/// retry 1: 0.5s |
| 22 | +/// retry 2: 1.0s |
| 23 | +/// retry 3: 2.0s |
| 24 | +/// retry 4: 4.0s |
| 25 | +/// retry 5: 8.0s |
| 26 | +/// retry 6: 16.0s |
| 27 | +/// retry 7: 32.0s |
| 28 | +/// retry 8: 60.0s (capped) |
| 29 | +/// retry 9: 60.0s |
| 30 | +/// retry 10: 60.0s |
| 31 | +/// retry 11: 60.0s (due to the max_times + 1 behavior described above) |
| 32 | +/// |
| 33 | +/// Worst-case total sleep time across all retries: |
| 34 | +/// 0.5 + 1 + 2 + 4 + 8 + 16 + 32 + 60 + 60 + 60 + 60 |
| 35 | +/// = 303.5 seconds (~5m 3.5s), |
| 36 | +/// plus the execution time of each Ethereum call attempt. |
| 37 | +
|
| 38 | +/// Minimum delay value (the one on first iteration) |
| 39 | +pub const ETHEREUM_CALL_MIN_RETRY_DELAY: u64 = 500; // milliseconds |
| 40 | + |
| 41 | +/// Maximum number of retry attempts. |
| 42 | +/// |
| 43 | +/// Note: With the current retry loop logic this behaves as "max_times + 1" |
| 44 | +/// backoff intervals. |
| 45 | +pub const ETHEREUM_CALL_MAX_RETRIES: usize = 10; |
| 46 | + |
| 47 | +/// Exponential backoff multiplier applied to the delay after each transient failure. |
| 48 | +/// |
| 49 | +/// Note: This value should be at least 1.0, otherwise will be clamped so the backoff never shrinks. |
| 50 | +pub const ETHEREUM_CALL_BACKOFF_FACTOR: f32 = 2.0; |
| 51 | + |
| 52 | +/// Maximum delay between retries (seconds). Delays are capped to this value. |
| 53 | +pub const ETHEREUM_CALL_MAX_RETRY_DELAY: u64 = 60; // seconds |
| 54 | + |
4 | 55 | #[derive(Debug)] |
5 | 56 | pub enum RetryError<E> { |
6 | 57 | Transient(E), |
@@ -54,7 +105,10 @@ where |
54 | 105 | return Err(RetryError::Transient(e)); |
55 | 106 | } |
56 | 107 |
|
57 | | - tracing::warn!("Retryable function failed: {e}"); |
| 108 | + tracing::warn!( |
| 109 | + "Retryable function failed, retrying in {} seconds", |
| 110 | + delay.as_secs() |
| 111 | + ); |
58 | 112 |
|
59 | 113 | tokio::time::sleep(delay).await; |
60 | 114 |
|
|
0 commit comments