From b079d786f8d5c941e9edf6a6be68da3e993a07e1 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Thu, 9 Apr 2026 18:09:34 -0400 Subject: [PATCH 1/2] terminationmessagepolicy: correctly fail on violations --- .../terminationmessagepolicy/monitortest.go | 37 +++++----- .../monitortest_test.go | 72 +++++++++++++++++++ 2 files changed, 89 insertions(+), 20 deletions(-) create mode 100644 pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest_test.go diff --git a/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go index ca034dd7de88..f42b8f0efeda 100644 --- a/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go +++ b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go @@ -6,6 +6,7 @@ import ( "strings" "time" + configv1 "github.com/openshift/api/config/v1" configclient "github.com/openshift/client-go/config/clientset/versioned" "github.com/openshift/origin/pkg/monitor/monitorapi" "github.com/openshift/origin/pkg/monitortestframework" @@ -19,19 +20,13 @@ import ( "k8s.io/client-go/rest" ) -var ( - unfixedVersions = sets.NewString() -) +var unfixedVersions = sets.NewString() func init() { - for i := 0; i < 16; i++ { - unfixedVersions.Insert(fmt.Sprintf("4.%d", i)) + for i := 6; i < 16; i++ { + // we should be comparing against semver versions + unfixedVersions.Insert(fmt.Sprintf("4.%d.", i)) } - - // TODO: [lmeyer 2026-04-08] replace this temporary hack. - unfixedVersions.Insert("5.0") - // the algorithm below has permitted every release since 4.20 to flake because "4.2" is in the version. - // predictably, a number of violations have crept in. once those are fixed, fix hasOldVersion determination below. } type terminationMessagePolicyChecker struct { @@ -65,19 +60,21 @@ func (w *terminationMessagePolicyChecker) StartCollection(ctx context.Context, a return err } - for _, history := range clusterVersion.Status.History { - for _, unfixedVersion := range unfixedVersions.List() { - if strings.Contains(history.Version, unfixedVersion) { - w.hasOldVersion = true - break + w.hasOldVersion = w.hasOldVersion || hasOldVersion(clusterVersion) + return nil +} + +func hasOldVersion(clusterVersion *configv1.ClusterVersion) bool { + if clusterVersion != nil { + for _, history := range clusterVersion.Status.History { + for _, unfixedVersion := range unfixedVersions.List() { + if strings.HasPrefix(history.Version, unfixedVersion) { + return true + } } } - if w.hasOldVersion { - break - } } - - return nil + return false } func (w *terminationMessagePolicyChecker) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) { diff --git a/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest_test.go b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest_test.go new file mode 100644 index 000000000000..304c9201a5b9 --- /dev/null +++ b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest_test.go @@ -0,0 +1,72 @@ +package terminationmessagepolicy + +import ( + "testing" + + configv1 "github.com/openshift/api/config/v1" +) + +func TestHasOldVersion(t *testing.T) { + tests := []struct { + name string + versions []string + want bool + }{ + { + name: "4.15.3 is old", + versions: []string{"4.15.3"}, + want: true, + }, + { + name: "4.15.0-rc.1 is old", + versions: []string{"4.15.0-rc.1"}, + want: true, + }, + { + name: "4.15 is not semver, don't recognize", + versions: []string{"4.15"}, + want: false, + }, + { + name: "4.16.0-okd is not old", + versions: []string{"4.16.0-okd"}, + want: false, + }, + { + name: "5.0 is not old", + versions: []string{"5.0"}, + want: false, + }, + { + name: "empty history", + versions: []string{}, + want: false, + }, + { + name: "new version with old version in history", + versions: []string{"4.16.2", "4.15.1"}, + want: true, + }, + { + name: "4.100 is not old", + versions: []string{"4.100"}, + want: false, + }, + { + name: "14.11 is not old (major ending in 4)", + versions: []string{"14.11"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cv := &configv1.ClusterVersion{} + for _, v := range tt.versions { + cv.Status.History = append(cv.Status.History, configv1.UpdateHistory{Version: v}) + } + if got := hasOldVersion(cv); got != tt.want { + t.Errorf("hasOldVersion() = %v, want %v", got, tt.want) + } + }) + } +} From 0c6b6a640ef58ff6cdd81b3f0002d06cbf7589e0 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Fri, 10 Apr 2026 18:07:43 -0400 Subject: [PATCH 2/2] terminationmessagepolicy: add temporary exemptions These have crept in while the test was erroneously allowed to flake. See https://redhat.atlassian.net/browse/TRT-2084 We will continue to flake them while working to eradicate them. Meanwhile, new ones should be hard failures. --- .../terminationmessagepolicy/monitortest.go | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go index f42b8f0efeda..5674a9097ece 100644 --- a/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go +++ b/pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go @@ -136,6 +136,42 @@ func (w *terminationMessagePolicyChecker) CollectData(ctx context.Context, stora ), "openshift-multus": sets.NewString( "containers[multus-networkpolicy]", + "pods/dhcp-daemon", + ), + // per TRT-2084 these were erroneously allowed to flake, so grandfather them in for now. + // they should be fixed and removed from here: + "openshift-backplane": sets.NewString("pods/osd-delete-backplane-serviceaccounts"), + "openshift-cloud-controller-manager": sets.NewString("pods/aws-cloud-controller-manager"), + "openshift-cluster-machine-approver": sets.NewString("pods/machine-approver-capi"), + "openshift-cluster-version": sets.NewString("pods/version--"), + "openshift-cnv": sets.NewString( + "pods/hostpath-provisioner-operator", + "pods/virt-platform-autopilot", + ), + "openshift-deployment-validation-operator": sets.NewString("pods/deployment-validation-operator"), + "openshift-etcd": sets.NewString("pods/master-1ostesttestmetalkubeorg-debug"), + "openshift-frr-k8s": sets.NewString("pods/frr-k8s"), + "openshift-ingress": sets.NewString( + "pods/gateway", + "pods/istiod-openshift-gateway", + ), + "openshift-insights": sets.NewString( + "pods/insights-runtime-extractor", + "pods/periodic-gathering", + ), + "openshift-machine-config-operator": sets.NewString("containers[container-00]"), + "openshift-metallb-system": sets.NewString( + "pods/metallb-operator-controller-manager", + "pods/metallb-operator-webhook-server", + ), + "openshift-marketplace": sets.NewString("pods/podman"), + "openshift-operators": sets.NewString("pods/servicemesh-operator3"), + "openshift-ovn-kubernetes": sets.NewString("pods/ovnkube-upgrades-prepuller"), + "openshift-sriov-network-operator": sets.NewString( + "pods/network-resources-injector", + "pods/operator-webhook", + "pods/sriov-network-config-daemon", + "pods/sriov-network-operator", ), }