@@ -26,6 +26,7 @@ import (
2626 _ "crypto/sha512"
2727 "encoding/base64"
2828 "encoding/json"
29+ "errors"
2930 "fmt"
3031 "net"
3132 "net/url"
@@ -37,7 +38,7 @@ import (
3738
3839 "gopkg.in/yaml.v3"
3940 corev1 "k8s.io/api/core/v1"
40- "k8s.io/apimachinery/pkg/api/errors"
41+ k8sapierrors "k8s.io/apimachinery/pkg/api/errors"
4142 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4243 "k8s.io/apimachinery/pkg/runtime"
4344 "k8s.io/apimachinery/pkg/types"
@@ -153,15 +154,14 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
153154 if verifyIsoAndAuthExists (clusterConfigDir ) {
154155 return ctrl.Result {}, nil
155156 }
156- log .Infof ("Running reconcile for ici with bootTime set" )
157+ log .Info ("Running reconcile for ici with bootTime set" )
157158 }
158159
159160 if err := r .initializeConditions (ctx , ici ); err != nil {
160161 log .Errorf ("Failed to initialize conditions: %s" , err )
161162 return ctrl.Result {}, err
162163 }
163164
164- /////// Requirements not met yet: setting defaults //////
165165 cond := hivev1.ClusterInstallCondition {
166166 Type : hivev1 .ClusterInstallRequirementsMet ,
167167 Status : corev1 .ConditionFalse ,
@@ -172,31 +172,103 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
172172 r .setRequirementsMetCondition (ctx , ici , cond .Status , cond .Reason , cond .Message )
173173 }()
174174
175- /////// Requirements not met yet: starting config (CD, BMH) validation //////
175+ // 1. Config validation phase
176+ // Possible reasons for not meeting requirements and exiting reconcile:
177+ // - ConfigurationPending (default): it's either the user needs to complete the ImageClusterInstall definition, or some of
178+ // referenced resources (CD or BMH) are not available yet. In both cases the reconcile ends, and will be triggered again
179+ // when the problem is resolved.
180+ // - ConfigurationFailed: sets this reason when AutomatedCleaningMode cannot be modified in BMH.
176181 cond .Reason = v1alpha1 .ConfigurationPendingReason
182+ cd , bmh , err := r .validateConfiguration (ctx , ici , & cond , log )
183+ if cd == nil || bmh == nil || err != nil {
184+ return ctrl.Result {}, err
185+ }
186+
187+ // 2. Host validation phase
188+ // Possible reasons for not meeting requirements and exiting reconcile:
189+ // - HostValidationPending: if BMH provisioning or hardware inspection is not ready yet, reconcile is requeued for 30s later.
190+ // - HostValidationFailed (default): in case of any errors or invalid BMH configuration the reconcile ends here.
191+ // Default is HostValidationFailedReason but validateBMH() can change this to HostValidationPendingReason
192+ cond .Reason = v1alpha1 .HostValidationFailedReason
193+ res , err := r .validateHost (ctx , ici , bmh , & cond , log )
194+ if ! res .IsZero () || err != nil {
195+ return res , err
196+ }
197+
198+ if err := r .setClusterInstallMetadata (ctx , log , ici , cd ); err != nil {
199+ cond .Message = "failed to set ClusterMetaData in ImageClusterInstall"
200+ log .Error (err )
201+ return ctrl.Result {}, err
202+ }
203+
204+ // 3. Image creation phase
205+ // Possible reasons for not meeting requirements and exiting reconcile:
206+ // - ImageCreationPending: when lock cannot be acquired, reconcile gets requeued for 5s later to try again.
207+ // - ImageCreationFailed (default): any other unexpected error stops the reconcile loop with this reason.
208+ cond .Reason = v1alpha1 .ImageCreationFailedReason
209+ imageUrl , res , err := r .createImage (ctx , ici , req , bmh , cd , & cond , log )
210+ if ! res .IsZero () || err != nil {
211+ return res , err
212+ }
213+
214+ r .labelReferencedObjectsForBackup (ctx , log , ici , cd )
215+
216+ // 4. Host configuration phase
217+ // Possible reasons for not meeting requirements and exiting reconcile:
218+ // - HostConfigurationPending: sets this reason in following scenarios:
219+ // > earlier DataImage instance is still being deleted for some reason (requeue after 30s)
220+ // > current DataImage was just created less than a second ago so BMO might not be notified yet (requeue after 1s)
221+ // > image-based-install-managed annotation is not set yet in BMH (no requeue)
222+ // - HostConfigurationFailed (default): any unexpected errors during this phase will lead to this reason and finish reconcile.
223+ cond .Reason = v1alpha1 .HostConfigurationFailedReason
224+ continueReconcile , res , err := r .configureHost (ctx , ici , imageUrl , bmh , & cond , log )
225+ if ! continueReconcile || ! res .IsZero () || err != nil {
226+ return res , err
227+ }
228+
229+ // Requirements met, host configured
230+ cond .Status = corev1 .ConditionTrue
231+ cond .Reason = v1alpha1 .HostConfigurationSucceededReason
232+ cond .Message = "configuration image is attached to the referenced host"
233+
234+ return ctrl.Result {}, nil
235+ }
236+
237+ func GetClusterConfigDir (namespacesDir , namespace , uid string ) string {
238+ return filepath .Join (namespacesDir , namespace , uid , FilesDir , ClusterConfigDir )
239+ }
240+
241+ func (r * ImageClusterInstallReconciler ) validateConfiguration (
242+ ctx context.Context ,
243+ ici * v1alpha1.ImageClusterInstall ,
244+ cond * hivev1.ClusterInstallCondition ,
245+ log logrus.FieldLogger ,
246+ ) (* hivev1.ClusterDeployment , * bmh_v1alpha1.BareMetalHost , error ) {
177247
178248 if ici .Spec .ClusterDeploymentRef == nil || ici .Spec .ClusterDeploymentRef .Name == "" {
179249 cond .Message = "ClusterDeploymentRef is unset"
180- return ctrl.Result {}, nil
250+ log .Error (errors .New (cond .Message ))
251+ return nil , nil , nil
181252 }
182253
183- clusterDeployment , err := r .getCD (ctx , ici )
254+ cd , err := r .getCD (ctx , ici )
184255 if err != nil {
185256 cond .Message = fmt .Sprintf ("failed to get ClusterDeployment %s/%s" , ici .Namespace , ici .Spec .ClusterDeploymentRef .Name )
186- log .WithError ( err ). Error (cond . Message )
187- return ctrl. Result {}, err
257+ log .Error (err )
258+ return nil , nil , nil
188259 }
189260
190- if ici .Spec .BareMetalHostRef == nil {
191- cond .Message = "no BareMetalHostRef set, nothing to do without provided bmh"
192- return ctrl.Result {}, nil
261+ if ici .Spec .BareMetalHostRef == nil || ici .Spec .BareMetalHostRef .Name == "" {
262+ cond .Message = "BareMetalHostRef is unset"
263+ log .Error (errors .New (cond .Message ))
264+ return nil , nil , nil
193265 }
194266
195267 bmh , err := r .getBMH (ctx , ici .Spec .BareMetalHostRef )
196268 if err != nil {
197269 cond .Message = fmt .Sprintf ("failed to get BareMetalHost %s/%s" , ici .Spec .BareMetalHostRef .Namespace , ici .Spec .BareMetalHostRef .Name )
198- log .WithError ( err ). Error (cond . Message )
199- return ctrl. Result {}, err
270+ log .Error (err )
271+ return nil , nil , nil
200272 }
201273
202274 // AutomatedCleaningMode is set at the beginning of this flow because we don't want ironic to format the disk
@@ -208,80 +280,113 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
208280 cond .Reason = v1alpha1 .ConfigurationFailedReason
209281 cond .Message = fmt .Sprintf ("failed to disable automated cleaning mode for BareMetalHost %s/%s" , bmh .Namespace , bmh .Name )
210282 log .WithError (err ).Error (cond .Message )
211- return ctrl. Result {} , err
283+ return nil , nil , err
212284 }
213285 }
214286
215- /////// Requirements not met yet: config validated, starting host validation //////
216- cond .Reason = v1alpha1 .HostValidationFailedReason
287+ return cd , bmh , nil
288+ }
289+
290+ func (r * ImageClusterInstallReconciler ) validateHost (
291+ ctx context.Context ,
292+ ici * v1alpha1.ImageClusterInstall ,
293+ bmh * bmh_v1alpha1.BareMetalHost ,
294+ cond * hivev1.ClusterInstallCondition ,
295+ log logrus.FieldLogger ,
296+ ) (ctrl.Result , error ) {
217297
218- if res , err := r .validateBMH (ctx , ici , bmh , & cond ); ! res .IsZero () || err != nil {
298+ if res , err := r .validateBMH (ici , bmh , cond ); ! res .IsZero () || err != nil {
219299 return res , err
220300 }
221301
222- if err = r .setClusterInstallMetadata (ctx , log , ici , clusterDeployment ); err != nil {
223- cond .Message = "failed to set ImageClusterInstall data"
224- log .WithError (err ).Error (cond .Message )
225- return ctrl.Result {}, err
302+ if ! bmh .Spec .ExternallyProvisioned {
303+ log .Infof ("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec" , bmh .Namespace , bmh .Name )
304+ patch := client .MergeFrom (bmh .DeepCopy ())
305+ bmh .Spec .ExternallyProvisioned = true
306+ if err := r .Patch (ctx , bmh , patch ); err != nil {
307+ return ctrl.Result {}, err
308+ }
309+
226310 }
227311
228- /////// Requirements not met yet: host validated, starting image creation //////
229- cond .Reason = v1alpha1 .ImageCreationFailedReason
312+ return ctrl.Result {}, nil
313+ }
314+
315+ func (r * ImageClusterInstallReconciler ) createImage (
316+ ctx context.Context ,
317+ ici * v1alpha1.ImageClusterInstall ,
318+ req ctrl.Request ,
319+ bmh * bmh_v1alpha1.BareMetalHost ,
320+ cd * hivev1.ClusterDeployment ,
321+ cond * hivev1.ClusterInstallCondition ,
322+ log logrus.FieldLogger ,
323+ ) (string , ctrl.Result , error ) {
230324
231- res , err := r .writeInputData (ctx , log , ici , clusterDeployment , bmh )
325+ res , err := r .writeInputData (ctx , log , ici , cd , bmh )
232326 if ! res .IsZero () || err != nil {
233- cond .Reason = v1alpha1 .ImageCreationPendingReason
234- cond .Message = "could not acquire lock for image data"
235327 if err != nil {
236328 cond .Reason = v1alpha1 .ImageCreationFailedReason
237329 cond .Message = "failed to create image"
238- log .WithError (err ).Error (cond .Message )
330+ log .Error (err )
331+ } else {
332+ cond .Reason = v1alpha1 .ImageCreationPendingReason
333+ cond .Message = "could not acquire lock for image data"
239334 }
240- return res , err
335+ return "" , res , err
241336 }
242337
243- r .labelReferencedObjectsForBackup (ctx , log , ici , clusterDeployment )
244-
245338 imageUrl , err := url .JoinPath (r .BaseURL , "images" , req .Namespace , fmt .Sprintf ("%s.iso" , ici .ObjectMeta .UID ))
246339 if err != nil {
247340 cond .Message = "failed to create image url"
248341 log .WithError (err ).Error (cond .Message )
249- return ctrl.Result {}, err
342+ return "" , ctrl.Result {}, err
250343 }
251344
252- /////// Requirements not met yet: image created, starting host configuration //////
253- cond .Reason = v1alpha1 .HostConfigurationFailedReason
345+ return imageUrl , ctrl.Result {}, nil
346+ }
347+
348+ func (r * ImageClusterInstallReconciler ) configureHost (
349+ ctx context.Context ,
350+ ici * v1alpha1.ImageClusterInstall ,
351+ imageUrl string ,
352+ bmh * bmh_v1alpha1.BareMetalHost ,
353+ cond * hivev1.ClusterInstallCondition ,
354+ log logrus.FieldLogger ,
355+ ) (bool , ctrl.Result , error ) {
356+
357+ continueReconcile := false
254358
255359 dataImage , res , err := r .ensureBMHDataImage (ctx , log , bmh , imageUrl )
360+ if ! res .IsZero () {
361+ cond .Reason = v1alpha1 .HostConfigurationPendingReason
362+ cond .Message = "previous DataImage is being deleted"
363+ return continueReconcile , res , nil
364+ }
256365 if err != nil {
257366 cond .Message = "failed to create BareMetalHost DataImage"
258- if ! res .IsZero () {
259- cond .Reason = v1alpha1 .HostConfigurationPendingReason
260- cond .Message = "previous DataImage is being deleted"
261- }
262367 log .WithError (err ).Error (cond .Message )
263- return res , err
368+ return continueReconcile , ctrl. Result {} , err
264369 }
265370
266371 if dataImage .ObjectMeta .CreationTimestamp .Time .Add (r .Options .DataImageCoolDownPeriod ).After (time .Now ()) {
267372 // in case the dataImage was created less than a second ago requeue to allow BMO some time to get
268373 // notified about the newly created DataImage before adding the reboot annotation in updateBMHProvisioningState
269374 cond .Reason = v1alpha1 .HostConfigurationPendingReason
270- cond .Message = "Waiting for BareMetalHost to get DataImage "
271- return ctrl.Result {RequeueAfter : r .Options .DataImageCoolDownPeriod }, err
375+ cond .Message = "waiting for DataImage to cool down "
376+ return continueReconcile , ctrl.Result {RequeueAfter : r .Options .DataImageCoolDownPeriod }, nil
272377 }
273378
274379 if err := r .updateBMHProvisioningState (ctx , log , bmh , dataImage ); err != nil {
275380 cond .Message = "failed to update BareMetalHost provisioning state"
276381 log .WithError (err ).Error (cond .Message )
277- return ctrl.Result {}, err
382+ return continueReconcile , ctrl.Result {}, err
278383 }
279384 if ! annotationExists (& bmh .ObjectMeta , ibioManagedBMH ) {
280385 // TODO: consider replacing this condition with `dataDisk.Status.AttachedImage`
281386 cond .Reason = v1alpha1 .HostConfigurationPendingReason
282- cond .Message = fmt .Sprintf ("Waiting for BMH to get %s annotation " , ibioManagedBMH )
387+ cond .Message = fmt .Sprintf ("waiting for BMH provisioning state to be StateAvailable or StateExternallyProvisioned, current state is: %s " , bmh . Status . Provisioning . State )
283388 log .Info (cond .Message )
284- return ctrl.Result {}, nil
389+ return continueReconcile , ctrl.Result {}, nil
285390 }
286391
287392 if ici .Status .BareMetalHostRef == nil {
@@ -294,24 +399,15 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
294399 if err := r .Status ().Patch (ctx , ici , patch ); err != nil {
295400 cond .Message = "failed to set Status.BareMetalHostRef"
296401 log .WithError (err ).Error (cond .Message )
297- return ctrl.Result {}, err
402+ return continueReconcile , ctrl.Result {}, err
298403 }
299404 }
300405
301- /////// Requirements met, host configured //////
302- cond .Status = corev1 .ConditionTrue
303- cond .Reason = v1alpha1 .HostConfigurationSucceededReason
304- cond .Message = "Configuration image is attached to the referenced host"
305-
306- return ctrl.Result {}, nil
307- }
308-
309- func GetClusterConfigDir (namespacesDir , namespace , uid string ) string {
310- return filepath .Join (namespacesDir , namespace , uid , FilesDir , ClusterConfigDir )
406+ continueReconcile = true
407+ return continueReconcile , ctrl.Result {}, nil
311408}
312409
313410func (r * ImageClusterInstallReconciler ) validateBMH (
314- ctx context.Context ,
315411 ici * v1alpha1.ImageClusterInstall ,
316412 bmh * bmh_v1alpha1.BareMetalHost ,
317413 cond * hivev1.ClusterInstallCondition ) (ctrl.Result , error ) {
@@ -499,13 +595,7 @@ func (r *ImageClusterInstallReconciler) updateBMHProvisioningState(ctx context.C
499595 if bmh .Status .Provisioning .State != bmh_v1alpha1 .StateAvailable && bmh .Status .Provisioning .State != bmh_v1alpha1 .StateExternallyProvisioned {
500596 return nil
501597 }
502- log .Infof ("Updating BareMetalHost %s/%s provisioning state, current PoweredOn status is: %s" , bmh .Namespace , bmh .Name , bmh .Status .PoweredOn )
503- if bmh .Status .Provisioning .State == bmh_v1alpha1 .StateAvailable {
504- if ! bmh .Spec .ExternallyProvisioned {
505- log .Infof ("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec" , bmh .Namespace , bmh .Name )
506- bmh .Spec .ExternallyProvisioned = true
507- }
508- }
598+ log .Infof ("BareMetalHost %s/%s PoweredOn status is: %s" , bmh .Namespace , bmh .Name , bmh .Status .PoweredOn )
509599 if ! bmh .Spec .Online {
510600 bmh .Spec .Online = true
511601 log .Infof ("Setting BareMetalHost (%s/%s) spec.Online to true" , bmh .Namespace , bmh .Name )
@@ -532,14 +622,14 @@ func (r *ImageClusterInstallReconciler) ensureBMHDataImage(
532622 url string ) (* bmh_v1alpha1.DataImage , ctrl.Result , error ) {
533623 dataImage , err := r .getDataImage (ctx , bmh .Namespace , bmh .Name )
534624 if err == nil {
535- if err == nil && ! dataImage .ObjectMeta .DeletionTimestamp .IsZero () {
536- err = fmt .Errorf ("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation" , bmh .Namespace , bmh .Name )
537- return dataImage , ctrl.Result {RequeueAfter : 30 * time .Second }, err
625+ if ! dataImage .ObjectMeta .DeletionTimestamp .IsZero () {
626+ log .Errorf ("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation" , bmh .Namespace , bmh .Name )
627+ return dataImage , ctrl.Result {RequeueAfter : 30 * time .Second }, nil
538628 }
539629 return dataImage , ctrl.Result {}, nil
540630 }
541631
542- if err != nil && ! errors .IsNotFound (err ) {
632+ if err != nil && ! k8sapierrors .IsNotFound (err ) {
543633 return dataImage , ctrl.Result {}, err
544634 }
545635 log .Infof ("creating new dataImage for BareMetalHost (%s/%s)" , bmh .Name , bmh .Namespace )
@@ -600,7 +690,7 @@ func (r *ImageClusterInstallReconciler) removeBMHDataImage(ctx context.Context,
600690
601691 bmh := & bmh_v1alpha1.BareMetalHost {}
602692 if err := r .Get (ctx , bmhRef , bmh ); err != nil {
603- if errors .IsNotFound (err ) {
693+ if k8sapierrors .IsNotFound (err ) {
604694 log .Warnf ("Referenced BareMetalHost %s/%s does not exist, not waiting for dataImage deletion" , bmhRef .Namespace , bmhRef .Name )
605695 return nil , nil
606696 } else {
@@ -634,7 +724,7 @@ func (r *ImageClusterInstallReconciler) deleteDataImage(ctx context.Context, log
634724 dataImage := & bmh_v1alpha1.DataImage {}
635725
636726 if err := r .Get (ctx , dataImageRef , dataImage ); err != nil {
637- if errors .IsNotFound (err ) {
727+ if k8sapierrors .IsNotFound (err ) {
638728 log .Infof ("Can't find DataImage from BareMetalHost %s/%s, Nothing to remove" , dataImageRef .Namespace , dataImageRef .Name )
639729 return nil , nil
640730 }
0 commit comments