Skip to content

Commit 3f9afde

Browse files
Merge pull request #509 from DavidRajnoha/incident-detection-tests
OBSINTA-776: Incident detection tests
2 parents 76273e4 + d0bed32 commit 3f9afde

8 files changed

Lines changed: 457 additions & 13 deletions

File tree

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
The test verifies the whole lifecycle of the Incident feature, without any external dependencies.
3+
The run time can be 15 - 20 minutes. (Waiting untill the incident detection captures the new alert)
4+
*/
5+
import { commonPages } from '../../views/common';
6+
import { incidentsPage } from '../../views/incidents-page';
7+
8+
// Set constants for the operators that need to be installed for tests.
9+
const MCP = {
10+
namespace: 'openshift-cluster-observability-operator',
11+
packageName: 'cluster-observability-operator',
12+
operatorName: 'Cluster Observability Operator',
13+
config: {
14+
kind: 'UIPlugin',
15+
name: 'monitoring',
16+
},
17+
};
18+
19+
const MP = {
20+
namespace: 'openshift-monitoring',
21+
operatorName: 'Cluster Monitoring Operator',
22+
};
23+
24+
describe('BVT: Incidents - e2e', () => {
25+
let currentAlertName: string;
26+
27+
before(() => {
28+
cy.afterBlockCOO(MCP, MP); // Following cypher best practices, the cleanup is done before the test block
29+
cy.beforeBlockCOO(MCP, MP);
30+
31+
cy.cleanupIncidentPrometheusRules();
32+
33+
// Create the alert and capture the random name
34+
cy.createKubePodCrashLoopingAlert().then((alertName) => {
35+
currentAlertName = alertName;
36+
cy.log(`Test will look for alert: ${currentAlertName}`);
37+
});
38+
});
39+
40+
after(() => {
41+
cy.afterBlockCOO(MCP, MP); // For compatibility with other tests
42+
});
43+
44+
it('1. Admin perspective - Incidents page - Incident with custom alert lifecycle', () => {
45+
cy.log('1.1 Navigate to Incidents page and clear filters');
46+
incidentsPage.goTo();
47+
commonPages.titleShouldHaveText('Incidents');
48+
incidentsPage.clearAllFilters();
49+
50+
const intervalMs = 60_000;
51+
const maxMinutes = 30;
52+
53+
cy.log('1.2 Wait for incident with custom alert to appear');
54+
cy.waitUntilWithCustomTimeout(
55+
() => incidentsPage.findIncidentWithAlert(currentAlertName),
56+
{
57+
interval: intervalMs,
58+
timeout: maxMinutes * intervalMs,
59+
timeoutMessage: `Custom timeout: Incident with alert "${currentAlertName}" did not appear within ${maxMinutes} minutes.`
60+
}
61+
);
62+
63+
cy.log('1.3 Verify custom alert appears in alerts table');
64+
incidentsPage
65+
.elements
66+
.alertsTable()
67+
.contains(currentAlertName)
68+
.should('exist');
69+
});
70+
});
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
The test verifies the basic functionality of the Incidents page and serves
3+
as a verification that the Incidents View is working as expected.
4+
5+
Currently, it depends on an alert being present in the cluster.
6+
In the future, mocking requests / injecting alerts should be considered.
7+
Natural creation of the alert is done in the 00.coo_incidents_e2e.cy.ts test,
8+
but takes significant time.
9+
*/
10+
11+
import { commonPages } from '../../views/common';
12+
import { incidentsPage } from '../../views/incidents-page';
13+
14+
const MCP = {
15+
namespace: 'openshift-cluster-observability-operator',
16+
packageName: 'cluster-observability-operator',
17+
operatorName: 'Cluster Observability Operator',
18+
config: {
19+
kind: 'UIPlugin',
20+
name: 'monitoring',
21+
},
22+
};
23+
24+
const MP = {
25+
namespace: 'openshift-monitoring',
26+
operatorName: 'Cluster Monitoring Operator',
27+
};
28+
29+
const ALERTNAME = 'Watchdog';
30+
const NAMESPACE = 'openshift-monitoring';
31+
const SEVERITY = 'Critical';
32+
const ALERT_DESC = 'This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty.'
33+
const ALERT_SUMMARY = 'An alert that should always be firing to certify that Alertmanager is working properly.'
34+
describe('BVT: Incidents - UI', () => {
35+
before(() => {
36+
cy.afterBlockCOO(MCP, MP); // Following cypher best practices, the cleanup is done before the test block
37+
cy.beforeBlockCOO(MCP, MP);
38+
});
39+
40+
after(() => {
41+
cy.afterBlockCOO(MCP, MP); // For compatibility with other tests
42+
});
43+
44+
45+
beforeEach(() => {
46+
47+
cy.log('Navigate to Observe → Incidents');
48+
incidentsPage.goTo();
49+
commonPages.titleShouldHaveText('Incidents');
50+
});
51+
52+
it('1. Admin perspective - Incidents page - Toolbar and charts toggle functionality', () => {
53+
cy.log('1.1 Verify toolbar and toggle charts button');
54+
incidentsPage.elements.toolbar().should('be.visible');
55+
incidentsPage.elements.toggleChartsButton().should('be.visible');
56+
incidentsPage.elements.toggleChartsButton().click();
57+
58+
cy.log('1.2 Verify charts are hidden after toggle');
59+
incidentsPage.elements.incidentsChartTitle().should('not.exist');
60+
incidentsPage.elements.alertsChartTitle().should('not.exist');
61+
incidentsPage.elements.toggleChartsButton().click();
62+
});
63+
64+
it('2. Admin perspective - Incidents page - Days filter functionality', () => {
65+
cy.log('2.1 Set days filter to 3 days');
66+
incidentsPage.setDays('3 days');
67+
68+
cy.log('2.2 Verify filter and URL update');
69+
incidentsPage.elements.daysSelect().should('contain.text', '3 days');
70+
cy.url().should('match', /[?&]days=3\+days/);
71+
});
72+
73+
it('3. Admin perspective - Incidents page - Critical filter functionality', () => {
74+
cy.log('3.1 Clear filters and toggle Critical filter');
75+
incidentsPage.clearAllFilters();
76+
incidentsPage.toggleFilter('Critical');
77+
78+
cy.log('3.2 Verify URL is updated with incident filters');
79+
cy.url().should('match', /incidentFilters=.*Critical/);
80+
});
81+
82+
it('4. Admin perspective - Incidents page - Charts and alerts empty state', () => {
83+
cy.log('4.1 Verify chart titles are visible');
84+
incidentsPage.elements.incidentsChartTitle().should('be.visible');
85+
incidentsPage.elements.alertsChartTitle().should('be.visible');
86+
87+
cy.log('4.2 Verify alerts chart shows empty state');
88+
incidentsPage.elements.alertsChartEmptyState().should('exist');
89+
});
90+
91+
it('5. Admin perspective - Incidents page - Incident selection and alert details', () => {
92+
cy.log('5.1 Select incident and verify alert details');
93+
incidentsPage.clearAllFilters();
94+
incidentsPage.selectIncidentByBarIndex(0);
95+
cy.url().should('match', /[?&]groupId=/);
96+
incidentsPage.elements.alertsChartSvg().find('path').should('exist');
97+
98+
cy.log('5.2 Verify alerts table and expand first row');
99+
incidentsPage.elements.alertsTable().should('exist');
100+
incidentsPage.expandRow(0);
101+
});
102+
});
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: crash-loop
5+
namespace: openshift-monitoring
6+
spec:
7+
replicas: 1
8+
selector:
9+
matchLabels:
10+
app: crash-loop
11+
template:
12+
metadata:
13+
labels:
14+
app: crash-loop
15+
spec:
16+
containers:
17+
- name: crash-loop
18+
image: busybox
19+
command: ["sh", "-c", "exit 1"] # Exit immediately with a failure
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: PrometheusRule
3+
metadata:
4+
labels:
5+
app.kubernetes.io/name: kube-prometheus
6+
app.kubernetes.io/part-of: openshift-monitoring
7+
prometheus: k8s
8+
role: alert-rules
9+
name: kubernetes-monitoring-podcrash-rules
10+
namespace: openshift-monitoring
11+
spec:
12+
groups:
13+
- name: kubernetes-apps
14+
rules:
15+
- alert: {{ALERT_NAME}}
16+
annotations:
17+
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
18+
}}) is in waiting state (reason: "CrashLoopBackOff").'
19+
summary: Pod is crash looping.
20+
expr: |
21+
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", namespace=~"(openshift-.*|kube-.*|default)",job="kube-state-metrics"}[5m]) >= 1
22+
for: 5m
23+
labels:
24+
severity: warning

web/cypress/support/commands.ts

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import Shadow = Cypress.Shadow;
66
import 'cypress-wait-until';
77
import { guidedTour } from '../views/tour';
88
import { nav } from '../views/nav';
9+
import './nav';
910
import { operatorHubPage } from '../views/operator-hub-page';
1011

1112

@@ -42,11 +43,6 @@ declare global {
4243
bySemanticElement(element: string, text?: string): Chainable<JQuery<HTMLElement>>;
4344
byAriaLabel(label: string, options?: Partial<Loggable & Timeoutable & Withinable & Shadow>): Chainable<JQuery<HTMLElement>>;
4445
byPFRole(role: string, options?: Partial<Loggable & Timeoutable & Withinable & Shadow>): Chainable<JQuery<HTMLElement>>;
45-
}
46-
}
47-
48-
declare global {
49-
interface Chainable {
5046
switchPerspective(perspective: string);
5147
uiLogin(provider: string, username: string, password: string);
5248
uiLogout();
@@ -55,6 +51,10 @@ declare global {
5551
adminCLI(command: string, options?);
5652
login(provider?: string, username?: string, password?: string): Chainable<Element>;
5753
executeAndDelete(command: string);
54+
waitUntilWithCustomTimeout(
55+
fn: () => any,
56+
options: { interval: number; timeout: number; timeoutMessage: string }
57+
): Cypress.Chainable<any>;
5858
}
5959
}
6060

@@ -546,6 +546,7 @@ Cypress.Commands.add('beforeBlockCOO', (MCP: { namespace: string, operatorName:
546546
expect(result.code).to.eq(0);
547547
cy.log(`Monitoring plugin pod is now running in namespace: ${MCP.namespace}`);
548548
});
549+
cy.exec(`oc label namespace openshift-cluster-observability-operator openshift.io/cluster-monitoring="true" --kubeconfig ${Cypress.env('KUBECONFIG_PATH')}`)
549550
//TODO: https://issues.redhat.com/browse/OCPBUGS-58468 - console reload and logout was happening more often
550551
// cy.get('.pf-v5-c-alert, .pf-v6-c-alert', { timeout: readyTimeoutMilliseconds })
551552
// .contains('Web console update is available')
@@ -612,6 +613,8 @@ Cypress.Commands.add('afterBlockCOO', (MCP: { namespace: string, operatorName: s
612613
`oc adm policy remove-cluster-role-from-user cluster-admin ${Cypress.env('LOGIN_USERNAME')} --kubeconfig ${Cypress.env('KUBECONFIG_PATH')}`,
613614
);
614615

616+
cy.executeAndDelete(`oc label namespace openshift-cluster-observability-operator openshift.io/cluster-monitoring- --kubeconfig ${Cypress.env('KUBECONFIG_PATH')}`)
617+
615618
//TODO: https://issues.redhat.com/browse/OCPBUGS-58468 - console reload and logout was happening more often
616619
// cy.get('.pf-v5-c-alert, .pf-v6-c-alert', { timeout: 120000 })
617620
// .contains('Web console update is available')
@@ -626,4 +629,83 @@ Cypress.Commands.add('afterBlockCOO', (MCP: { namespace: string, operatorName: s
626629

627630
}
628631
cy.log('After block COO completed');
632+
});
633+
634+
// Apply incident fixture manifests to the cluster
635+
Cypress.Commands.add('createKubePodCrashLoopingAlert', () => {
636+
const kubeconfigPath = Cypress.env('KUBECONFIG_PATH');
637+
638+
// Generate a random alert name for this test run
639+
const randomAlertName = `CustomPodCrashLooping_${Math.random().toString(36).substring(2, 15)}`;
640+
641+
// Store the alert name globally so tests can access it
642+
Cypress.env('CURRENT_ALERT_NAME', randomAlertName);
643+
644+
cy.log(`Generated random alert name: ${randomAlertName}`);
645+
646+
// Read the template and replace the placeholder
647+
cy.readFile('./cypress/fixtures/incidents/prometheus_rule_pod_crash_loop.yaml').then((template) => {
648+
const yamlContent = template.replace(/\{\{ALERT_NAME\}\}/g, randomAlertName);
649+
650+
// Write the modified YAML to a temporary file
651+
cy.writeFile('./cypress/fixtures/incidents/temp_prometheus_rule.yaml', yamlContent).then(() => {
652+
// Apply the modified YAML
653+
cy.exec(
654+
`oc apply -f ./cypress/fixtures/incidents/temp_prometheus_rule.yaml --kubeconfig ${kubeconfigPath}`,
655+
);
656+
657+
// Clean up temporary file
658+
cy.exec('rm ./cypress/fixtures/incidents/temp_prometheus_rule.yaml');
659+
});
660+
});
661+
662+
cy.exec(
663+
`oc apply -f ./cypress/fixtures/incidents/pod_crash_loop.yaml --kubeconfig ${kubeconfigPath}`,
664+
);
665+
666+
// Return the alert name for the test to use
667+
return cy.wrap(randomAlertName);
668+
});
669+
670+
// Clean up incident fixture manifests from the cluster
671+
Cypress.Commands.add('cleanupIncidentPrometheusRules', () => {
672+
const kubeconfigPath = Cypress.env('KUBECONFIG_PATH');
673+
674+
// Delete all PrometheusRules that match our pattern (kubernetes-monitoring-podcrash-rules)
675+
// This ensures cleanup before tests and after tests
676+
cy.exec(
677+
`oc delete prometheusrule kubernetes-monitoring-podcrash-rules -n openshift-monitoring --kubeconfig ${kubeconfigPath} --ignore-not-found=true`,
678+
);
679+
680+
// Clear the environment variable if it exists
681+
if (Cypress.env('CURRENT_ALERT_NAME')) {
682+
Cypress.env('CURRENT_ALERT_NAME', null);
683+
}
684+
685+
cy.executeAndDelete(
686+
`oc delete -f ./cypress/fixtures/incidents/pod_crash_loop.yaml --ignore-not-found=true --kubeconfig ${kubeconfigPath}`,
687+
);
688+
});
689+
690+
// Custom waitUntil with timeout message
691+
Cypress.Commands.add('waitUntilWithCustomTimeout', (
692+
fn: () => any,
693+
options: { interval: number; timeout: number; timeoutMessage: string }
694+
) => {
695+
const { timeoutMessage, ...waitOptions } = options;
696+
697+
// Set up custom error handling before the waitUntil call
698+
cy.on('fail', (err) => {
699+
if (err.message.includes('Timed out retrying')) {
700+
// Create a new error with the custom message
701+
const customError = new Error(timeoutMessage);
702+
customError.stack = err.stack;
703+
throw customError;
704+
}
705+
// For any other errors, re-throw them unchanged
706+
throw err;
707+
});
708+
709+
// Execute the waitUntil with the original options (without timeoutMessage)
710+
return cy.waitUntil(fn, waitOptions);
629711
});

web/cypress/support/index.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,17 @@ export const checkErrors = () =>
77
assert.isTrue(!win.windowError, win.windowError);
88
});
99

10-
Cypress.on('uncaught:exception', (err, runnable) => {
11-
// returning false here prevents Cypress from failing the test
12-
// on a JavaScript exception
13-
if (err.message.includes('ResizeObserver loop completed with undelivered notifications')) {
14-
return false
15-
}
16-
});
10+
11+
// Ignore benign ResizeObserver errors globally so they don't fail tests
12+
// See: https://docs.cypress.io/api/cypress-api/catalog-of-events#Uncaught-Exceptions
13+
Cypress.on('uncaught:exception', (err) => {
14+
const message = err?.message || String(err || '');
15+
if (
16+
message.includes('ResizeObserver loop limit exceeded') ||
17+
message.includes('ResizeObserver loop completed with undelivered notifications') ||
18+
message.includes('ResizeObserver')
19+
) {
20+
return false;
21+
}
22+
// allow other errors to fail the test
23+
});

web/cypress/tsconfig.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,8 @@
44
"lib": ["es5", "dom"],
55
"types": ["cypress", "node"]
66
},
7-
"include": ["**/*.ts"]
7+
8+
"include": [
9+
"**/*.ts"
10+
]
811
}

0 commit comments

Comments
 (0)