|
23 | 23 | from grpc.aio import AioRpcError, Channel |
24 | 24 | from jumpstarter_protocol import jumpstarter_pb2, jumpstarter_pb2_grpc |
25 | 25 | from rich.console import Console |
| 26 | +from tenacity import retry, retry_if_exception_type, wait_exponential_jitter |
26 | 27 |
|
27 | 28 | from .exceptions import LeaseError |
28 | 29 | from jumpstarter.client import client_from_path |
@@ -77,6 +78,24 @@ async def get(self): |
77 | 78 | svc = ClientService(channel=self.channel, namespace=self.namespace) |
78 | 79 | return await svc.GetLease(name=self.name) |
79 | 80 |
|
| 81 | + @retry( |
| 82 | + wait=wait_exponential_jitter(initial=1, max=120, jitter=1), |
| 83 | + retry=retry_if_exception_type(ConnectionError), |
| 84 | + reraise=True, |
| 85 | + ) |
| 86 | + async def _get_with_retry(self): |
| 87 | + """Get lease with exponential backoff retry on ConnectionError. |
| 88 | +
|
| 89 | + Retries with exponential backoff and jitter indefinitely when ConnectionError occurs. |
| 90 | + The wait time between retries is capped at 2 minutes (120 seconds). |
| 91 | + Jitter helps prevent thundering herd problems when multiple clients retry simultaneously. |
| 92 | + """ |
| 93 | + try: |
| 94 | + return await self.get() |
| 95 | + except ConnectionError as e: |
| 96 | + logger.error("Error while getting lease %s: %s", self.name, e) |
| 97 | + raise |
| 98 | + |
80 | 99 | def request(self): |
81 | 100 | """Request a lease, or verifies a lease which was already created. |
82 | 101 |
|
@@ -136,8 +155,7 @@ async def _acquire(self): |
136 | 155 | with LeaseAcquisitionSpinner(self.name) as spinner: |
137 | 156 | while True: |
138 | 157 | logger.debug("Polling Lease %s", self.name) |
139 | | - result = await self.get() |
140 | | - |
| 158 | + result = await self._get_with_retry() |
141 | 159 | # lease ready |
142 | 160 | if condition_true(result.conditions, "Ready"): |
143 | 161 | logger.debug("Lease %s acquired", self.name) |
|
0 commit comments