@@ -22,6 +22,7 @@ import (
2222 "fmt"
2323 "net/http"
2424 "strconv"
25+ "strings"
2526 "time"
2627
2728 "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
@@ -282,7 +283,34 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
282283 case kerrors .IsAlreadyExists (err ):
283284 log .Info ("Runner pod already exists. Waiting for the pod event to be received" )
284285 return ctrl.Result {Requeue : true , RequeueAfter : 5 * time .Second }, nil
285- case kerrors .IsInvalid (err ) || kerrors .IsForbidden (err ):
286+ case kerrors .IsInvalid (err ):
287+ log .Error (err , "Failed to create a pod due to unrecoverable failure" )
288+ errMessage := fmt .Sprintf ("Failed to create the pod: %v" , err )
289+ if err := r .markAsFailed (ctx , ephemeralRunner , errMessage , ReasonInvalidPodFailure , log ); err != nil {
290+ log .Error (err , "Failed to set ephemeral runner to phase Failed" )
291+ return ctrl.Result {}, err
292+ }
293+ return ctrl.Result {}, nil
294+ case kerrors .IsForbidden (err ):
295+ if status , ok := err .(kerrors.APIStatus ); ok || errors .As (err , & status ) {
296+ isResourceQuotaExceeded := strings .Contains (status .Status ().Message , "exceeded quota:" )
297+ isAboutToExpire := ephemeralRunner .CreationTimestamp .Time .Add (10 * time .Minute ).Before (time .Now ())
298+ switch {
299+ case isResourceQuotaExceeded && isAboutToExpire :
300+ log .Error (err , "Failed to create a pod due to resource quota exceeded and the ephemeral runner is about to expire; re-creating the ephemeral runner" )
301+ if err := r .Delete (ctx , ephemeralRunner ); err != nil {
302+ log .Error (err , "Failed to delete the ephemeral runner" )
303+ return ctrl.Result {}, err
304+ }
305+ return ctrl.Result {}, nil
306+ case isResourceQuotaExceeded :
307+ log .Error (err , "Resource quota is exceeded; requeue in 30s to retry pod creation" )
308+ return ctrl.Result {RequeueAfter : 30 * time .Second }, nil
309+ default :
310+ // other forbidden errors
311+ // fallthrough to the default handling below
312+ }
313+ }
286314 log .Error (err , "Failed to create a pod due to unrecoverable failure" )
287315 errMessage := fmt .Sprintf ("Failed to create the pod: %v" , err )
288316 if err := r .markAsFailed (ctx , ephemeralRunner , errMessage , ReasonInvalidPodFailure , log ); err != nil {
0 commit comments