Skip to content

Commit e8372a1

Browse files
eranco74openshift-merge-bot[bot]
authored andcommitted
MGMT-20128: Image based install operator not reconciling after pod is restarted
The monitor will list existing ImageCLusterInstall CRs and call client.Status.Update for all ICI that didn't complete the installation This should enqueu the ICI CRs for reconcilation Signed-off-by: Eran Cohen <eranco@redhat.com>
1 parent 9479cde commit e8372a1

4 files changed

Lines changed: 41 additions & 8 deletions

File tree

cmd/manager/main.go

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"context"
2021
"flag"
2122
"fmt"
2223
"net/http"
@@ -28,11 +29,8 @@ import (
2829
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
2930
// to ensure that exec-entrypoint and run can make use of them.
3031
_ "k8s.io/client-go/plugin/pkg/client/auth"
32+
"sigs.k8s.io/controller-runtime/pkg/manager"
3133

32-
"github.com/kelseyhightower/envconfig"
33-
bmh_v1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
34-
hivev1 "github.com/openshift/hive/apis/hive/v1"
35-
"github.com/sirupsen/logrus"
3634
corev1 "k8s.io/api/core/v1"
3735
"k8s.io/apimachinery/pkg/labels"
3836
"k8s.io/apimachinery/pkg/runtime"
@@ -46,6 +44,11 @@ import (
4644
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
4745
"sigs.k8s.io/controller-runtime/pkg/webhook"
4846

47+
"github.com/kelseyhightower/envconfig"
48+
bmh_v1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
49+
hivev1 "github.com/openshift/hive/apis/hive/v1"
50+
"github.com/sirupsen/logrus"
51+
4952
"github.com/openshift/image-based-install-operator/api/v1alpha1"
5053
"github.com/openshift/image-based-install-operator/controllers"
5154
"github.com/openshift/image-based-install-operator/internal/credentials"
@@ -191,13 +194,44 @@ func main() {
191194
os.Exit(1)
192195
}
193196

197+
go EnqueueExistingImageClusterInstall(mgr)
198+
194199
setupLog.Info("starting manager")
195200
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
196201
setupLog.Error(err, "problem running manager")
197202
os.Exit(1)
198203
}
199204
}
200205

206+
func EnqueueExistingImageClusterInstall(mgr manager.Manager) {
207+
ctx := context.Background()
208+
209+
// Wait for cache to sync
210+
setupLog.Info("Waiting for cache to sync...")
211+
if synced := mgr.GetCache().WaitForCacheSync(ctx); !synced {
212+
setupLog.Error(fmt.Errorf("Failed to wait for cache to sync"), "failed to wait for cache to sync while enqueuing existing ImageClusterInstall")
213+
os.Exit(1)
214+
}
215+
setupLog.Info("Cache successfully started and synced")
216+
217+
// List existing resources and enqueue those with ClusterInstallCompleted condition false
218+
var icilList v1alpha1.ImageClusterInstallList
219+
if err := mgr.GetClient().List(ctx, &icilList); err != nil {
220+
setupLog.Error(err, "Failed to list ImageClusterInstall")
221+
os.Exit(1)
222+
}
223+
224+
for _, ici := range icilList.Items {
225+
if !controllers.InstallationCompleted(&ici) {
226+
setupLog.Info("Enqueuing existing ImageClusterInstall:", "namespace", ici.Namespace, "name", ici.Name)
227+
// Triggers reconciliation
228+
if err := mgr.GetClient().Status().Update(ctx, &ici); err != nil {
229+
setupLog.Info("Failed to requeue ImageClusterInstall", "namespace", ici.Namespace, "name", ici.Name)
230+
}
231+
}
232+
}
233+
}
234+
201235
func serviceURL(opts *controllers.ImageClusterInstallReconcilerOptions) (string, error) {
202236
if opts.ServiceName == "" || opts.ServiceNamespace == "" || opts.ServiceScheme == "" {
203237
return "", fmt.Errorf("SERVICE_NAME, SERVICE_NAMESPACE, and SERVICE_SCHEME must be set")

controllers/conditions.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ func installationTimedout(ici *v1alpha1.ImageClusterInstall) bool {
165165
return cond != nil && cond.Status == corev1.ConditionTrue && cond.Reason == v1alpha1.InstallTimedoutReason
166166
}
167167

168-
func installationCompleted(ici *v1alpha1.ImageClusterInstall) bool {
168+
func InstallationCompleted(ici *v1alpha1.ImageClusterInstall) bool {
169169
cond := findCondition(ici.Status.Conditions, hivev1.ClusterInstallCompleted)
170170
return cond != nil && cond.Status == corev1.ConditionTrue
171171
}

controllers/imageclusterinstall_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
143143
}
144144

145145
// Nothing to do if the installation is complete
146-
if installationCompleted(ici) {
146+
if InstallationCompleted(ici) {
147147
return ctrl.Result{}, nil
148148
}
149149
// Nothing to do if the installation process started and the config.iso exists

controllers/imageclusterinstall_monitor.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package controllers
1818

1919
import (
2020
"context"
21-
2221
// These are required for image parsing to work correctly with digest-based pull specs
2322
// See: https://github.com/opencontainers/go-digest/blob/v1.0.0/README.md#usage
2423
_ "crypto/sha256"
@@ -74,7 +73,7 @@ func (r *ImageClusterInstallMonitor) Reconcile(ctx context.Context, req ctrl.Req
7473
return ctrl.Result{}, nil
7574
}
7675
// Nothing to do if the installation process has already stopped
77-
if installationCompleted(ici) {
76+
if InstallationCompleted(ici) {
7877
log.Infof("Cluster %s/%s finished installation process, nothing to do", ici.Namespace, ici.Name)
7978
return ctrl.Result{}, nil
8079
}

0 commit comments

Comments
 (0)