Skip to content

Commit 9f9409a

Browse files
Handle resource quota on status forbidden by retrying (#4305)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 3d73636 commit 9f9409a

File tree

1 file changed

+29
-1
lines changed

1 file changed

+29
-1
lines changed

controllers/actions.github.com/ephemeralrunner_controller.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"net/http"
2424
"strconv"
25+
"strings"
2526
"time"
2627

2728
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
@@ -282,7 +283,34 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
282283
case kerrors.IsAlreadyExists(err):
283284
log.Info("Runner pod already exists. Waiting for the pod event to be received")
284285
return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil
285-
case kerrors.IsInvalid(err) || kerrors.IsForbidden(err):
286+
case kerrors.IsInvalid(err):
287+
log.Error(err, "Failed to create a pod due to unrecoverable failure")
288+
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
289+
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
290+
log.Error(err, "Failed to set ephemeral runner to phase Failed")
291+
return ctrl.Result{}, err
292+
}
293+
return ctrl.Result{}, nil
294+
case kerrors.IsForbidden(err):
295+
if status, ok := err.(kerrors.APIStatus); ok || errors.As(err, &status) {
296+
isResourceQuotaExceeded := strings.Contains(status.Status().Message, "exceeded quota:")
297+
isAboutToExpire := ephemeralRunner.CreationTimestamp.Time.Add(10 * time.Minute).Before(time.Now())
298+
switch {
299+
case isResourceQuotaExceeded && isAboutToExpire:
300+
log.Error(err, "Failed to create a pod due to resource quota exceeded and the ephemeral runner is about to expire; re-creating the ephemeral runner")
301+
if err := r.Delete(ctx, ephemeralRunner); err != nil {
302+
log.Error(err, "Failed to delete the ephemeral runner")
303+
return ctrl.Result{}, err
304+
}
305+
return ctrl.Result{}, nil
306+
case isResourceQuotaExceeded:
307+
log.Error(err, "Resource quota is exceeded; requeue in 30s to retry pod creation")
308+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
309+
default:
310+
// other forbidden errors
311+
// fallthrough to the default handling below
312+
}
313+
}
286314
log.Error(err, "Failed to create a pod due to unrecoverable failure")
287315
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
288316
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {

0 commit comments

Comments
 (0)