Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"
"time"

configv1 "github.com/openshift/api/config/v1"
configclient "github.com/openshift/client-go/config/clientset/versioned"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestframework"
Expand All @@ -19,19 +20,13 @@ import (
"k8s.io/client-go/rest"
)

var (
unfixedVersions = sets.NewString()
)
var unfixedVersions = sets.NewString()

func init() {
for i := 0; i < 16; i++ {
unfixedVersions.Insert(fmt.Sprintf("4.%d", i))
for i := 6; i < 16; i++ {
// we should be comparing against semver versions
unfixedVersions.Insert(fmt.Sprintf("4.%d.", i))
}

// TODO: [lmeyer 2026-04-08] replace this temporary hack.
unfixedVersions.Insert("5.0")
// the algorithm below has permitted every release since 4.20 to flake because "4.2" is in the version.
// predictably, a number of violations have crept in. once those are fixed, fix hasOldVersion determination below.
}

type terminationMessagePolicyChecker struct {
Expand Down Expand Up @@ -65,19 +60,21 @@ func (w *terminationMessagePolicyChecker) StartCollection(ctx context.Context, a
return err
}

for _, history := range clusterVersion.Status.History {
for _, unfixedVersion := range unfixedVersions.List() {
if strings.Contains(history.Version, unfixedVersion) {
w.hasOldVersion = true
break
w.hasOldVersion = w.hasOldVersion || hasOldVersion(clusterVersion)
return nil
}

func hasOldVersion(clusterVersion *configv1.ClusterVersion) bool {
if clusterVersion != nil {
for _, history := range clusterVersion.Status.History {
for _, unfixedVersion := range unfixedVersions.List() {
if strings.HasPrefix(history.Version, unfixedVersion) {
return true
}
}
}
if w.hasOldVersion {
break
}
}

return nil
return false
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

func (w *terminationMessagePolicyChecker) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
Expand Down Expand Up @@ -139,6 +136,42 @@ func (w *terminationMessagePolicyChecker) CollectData(ctx context.Context, stora
),
"openshift-multus": sets.NewString(
"containers[multus-networkpolicy]",
"pods/dhcp-daemon",
),
// per TRT-2084 these were erroneously allowed to flake, so grandfather them in for now.
// they should be fixed and removed from here:
"openshift-backplane": sets.NewString("pods/osd-delete-backplane-serviceaccounts"),
"openshift-cloud-controller-manager": sets.NewString("pods/aws-cloud-controller-manager"),
"openshift-cluster-machine-approver": sets.NewString("pods/machine-approver-capi"),
"openshift-cluster-version": sets.NewString("pods/version--"),
"openshift-cnv": sets.NewString(
"pods/hostpath-provisioner-operator",
"pods/virt-platform-autopilot",
),
"openshift-deployment-validation-operator": sets.NewString("pods/deployment-validation-operator"),
"openshift-etcd": sets.NewString("pods/master-1ostesttestmetalkubeorg-debug"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Description: Search for how other debug pod exemptions are defined and check if there are patterns

# Look for similar debug pod patterns in existingViolations or elsewhere in the codebase
rg -n -C2 'debug' --type=go pkg/monitortests/

Repository: openshift/origin

Length of output: 9181


🏁 Script executed:

# Get context around line 152 and see the matching logic
cat -n pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go | sed -n '128,210p'

Repository: openshift/origin

Length of output: 4082


🏁 Script executed:

# Search for TRT-2084 references and etcd-related issue tracking
rg -n 'TRT-2084|master-1ostest' --type=go pkg/monitortests/clusterversionoperator/

Repository: openshift/origin

Length of output: 402


🏁 Script executed:

# Check if there are any comments or git history explaining this etcd entry
# Also check if there are similar master-* patterns elsewhere
rg -n 'master-' --type=go pkg/monitortests/ | head -20

Repository: openshift/origin

Length of output: 9224


🏁 Script executed:

# Search for ostest or testmetalkubeorg to see if this is a recurring pattern
rg -n 'ostest|testmetalkubeorg' --type=go pkg/monitortests/

Repository: openshift/origin

Length of output: 1212


This etcd exemption is part of a temporary TRT-2084 grandfather exemption and is overly environment-specific.

The pod name "pods/master-1ostesttestmetalkubeorg-debug" is derived from a specific test cluster hostname (master-1.ostest.test.metalkube.org) and will only match pods with that exact substring due to the strings.Contains matching at line 198. This entry is documented in the comment at lines 141-142 as one of the "erroneously allowed to flake" violations that should be "fixed and removed from here."

Ensure this exemption is tracked as part of TRT-2084 resolution and verify that either:

  1. The underlying issue this masks is resolved and the entry can be removed, or
  2. A more generalizable pattern is needed if this pod issue persists across multiple test environments.
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@pkg/monitortests/clusterversionoperator/terminationmessagepolicy/monitortest.go`
at line 152, Remove or generalize the environment-specific etcd exemption string
"pods/master-1ostesttestmetalkubeorg-debug" found in the exemptions map entry
for "openshift-etcd"; instead track this case under TRT-2084 by either deleting
the entry when the root cause is fixed or replacing the literal with a
non-host-specific pattern (e.g., match on a stable identifier) so the
strings.Contains check used elsewhere still works; update the comment that
references the "erroneously allowed to flake" exemptions to mention TRT-2084 and
ensure the change keeps the test logic in
terminationmessagepolicy/monitortest.go correct.

"openshift-frr-k8s": sets.NewString("pods/frr-k8s"),
"openshift-ingress": sets.NewString(
"pods/gateway",
"pods/istiod-openshift-gateway",
),
"openshift-insights": sets.NewString(
"pods/insights-runtime-extractor",
"pods/periodic-gathering",
),
"openshift-machine-config-operator": sets.NewString("containers[container-00]"),
"openshift-metallb-system": sets.NewString(
"pods/metallb-operator-controller-manager",
"pods/metallb-operator-webhook-server",
),
"openshift-marketplace": sets.NewString("pods/podman"),
"openshift-operators": sets.NewString("pods/servicemesh-operator3"),
"openshift-ovn-kubernetes": sets.NewString("pods/ovnkube-upgrades-prepuller"),
"openshift-sriov-network-operator": sets.NewString(
"pods/network-resources-injector",
"pods/operator-webhook",
"pods/sriov-network-config-daemon",
"pods/sriov-network-operator",
),
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package terminationmessagepolicy

import (
"testing"

configv1 "github.com/openshift/api/config/v1"
)

func TestHasOldVersion(t *testing.T) {
tests := []struct {
name string
versions []string
want bool
}{
{
name: "4.15.3 is old",
versions: []string{"4.15.3"},
want: true,
},
{
name: "4.15.0-rc.1 is old",
versions: []string{"4.15.0-rc.1"},
want: true,
},
{
name: "4.15 is not semver, don't recognize",
versions: []string{"4.15"},
want: false,
},
{
name: "4.16.0-okd is not old",
versions: []string{"4.16.0-okd"},
want: false,
},
{
name: "5.0 is not old",
versions: []string{"5.0"},
want: false,
},
{
name: "empty history",
versions: []string{},
want: false,
},
{
name: "new version with old version in history",
versions: []string{"4.16.2", "4.15.1"},
want: true,
},
{
name: "4.100 is not old",
versions: []string{"4.100"},
want: false,
},
{
name: "14.11 is not old (major ending in 4)",
versions: []string{"14.11"},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cv := &configv1.ClusterVersion{}
for _, v := range tt.versions {
cv.Status.History = append(cv.Status.History, configv1.UpdateHistory{Version: v})
}
if got := hasOldVersion(cv); got != tt.want {
t.Errorf("hasOldVersion() = %v, want %v", got, tt.want)
}
})
}
}