Skip to content

Commit 174884c

Browse files
committed
make e2e data shard the catch-all
1 parent 9679ca9 commit 174884c

4 files changed

Lines changed: 64 additions & 13 deletions

File tree

.github/workflows/ci.yml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,17 @@ jobs:
9999
matrix:
100100
include:
101101
- shard-name: core
102-
test-paths: >-
103-
tests/e2e/test_core_sdk.py
104-
tests/e2e/test_decorators.py
105-
tests/e2e/test_media.py
102+
test-root: tests/e2e
103+
parallel-marker: "e2e_core and not serial_e2e"
104+
serial-marker: "e2e_core and serial_e2e"
106105
- shard-name: data
107-
test-paths: >-
108-
tests/e2e/test_batch_evaluation.py
109-
tests/e2e/test_datasets.py
110-
tests/e2e/test_experiments.py
111-
tests/e2e/test_prompt.py
106+
test-root: tests/e2e
107+
parallel-marker: "e2e_data and not serial_e2e"
108+
serial-marker: "e2e_data and serial_e2e"
109+
- shard-name: live-provider
110+
test-root: tests/live_provider
111+
parallel-marker: "live_provider"
112+
serial-marker: ""
112113
env:
113114
LANGFUSE_BASE_URL: "http://localhost:3000"
114115
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
@@ -191,11 +192,12 @@ jobs:
191192
- name: Run the end-to-end tests
192193
run: |
193194
uv run --frozen python --version
194-
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO ${{ matrix.test-paths }} -m "not serial_e2e"
195+
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.parallel-marker }}"
195196
196197
- name: Run serial end-to-end tests
198+
if: ${{ matrix.serial-marker != '' }}
197199
run: |
198-
uv run --frozen pytest -s -v --log-cli-level=INFO ${{ matrix.test-paths }} -m serial_e2e
200+
uv run --frozen pytest -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.serial-marker }}"
199201
200202
all-tests-passed:
201203
# This allows us to have a branch protection rule for tests and deploys with matrix

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ log_cli = true
5555
markers = [
5656
"unit: deterministic tests that run without a Langfuse server",
5757
"e2e: tests that require a real Langfuse server or persisted backend behaviour",
58+
"e2e_core: the explicitly curated core e2e shard",
59+
"e2e_data: the catch-all e2e shard for everything not in e2e_core",
5860
"serial_e2e: e2e tests that must not share server concurrency with the rest of the suite",
5961
"live_provider: tests that call live model providers and are kept out of default CI",
6062
]

tests/conftest.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
from langfuse._client.client import Langfuse
1111
from langfuse._client.resource_manager import LangfuseResourceManager
1212

13+
CORE_E2E_FILENAMES = {
14+
"test_core_sdk.py",
15+
"test_decorators.py",
16+
"test_media.py",
17+
}
18+
1319
SERIAL_E2E_NODEIDS = {
1420
"tests/e2e/test_core_sdk.py::test_create_trace",
1521
"tests/e2e/test_core_sdk.py::test_create_boolean_score",
@@ -49,14 +55,21 @@ def clear(self) -> None:
4955

5056
def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
5157
for item in items:
52-
test_group = Path(str(item.fspath)).parent.name
58+
file_path = Path(str(item.fspath))
59+
test_group = file_path.parent.name
5360

5461
if test_group == "unit":
5562
item.add_marker(pytest.mark.unit)
5663
continue
5764

5865
if test_group == "e2e":
5966
item.add_marker(pytest.mark.e2e)
67+
# Keep the data shard as the default so new tests under tests/e2e
68+
# are picked up automatically unless we explicitly promote them.
69+
if file_path.name in CORE_E2E_FILENAMES:
70+
item.add_marker(pytest.mark.e2e_core)
71+
else:
72+
item.add_marker(pytest.mark.e2e_data)
6073
if item.nodeid in SERIAL_E2E_NODEIDS:
6174
item.add_marker(pytest.mark.serial_e2e)
6275
continue

tests/e2e/test_batch_evaluation.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
EvaluatorStats,
1919
)
2020
from langfuse.experiment import Evaluation
21-
from tests.support.utils import create_uuid
21+
from tests.support.utils import create_uuid, get_api, wait_for_result
2222

2323
# ============================================================================
2424
# FIXTURES & SETUP
@@ -40,6 +40,40 @@ def sample_trace_name():
4040
return f"batch-eval-test-{create_uuid()}"
4141

4242

43+
def _seed_trace_corpus(
44+
*, trace_count: int = 6, tag: str | None = None
45+
) -> tuple[str, list[str]]:
46+
langfuse_client = get_client()
47+
corpus_tag = tag or f"batch-eval-seed-{create_uuid()}"
48+
trace_names: list[str] = []
49+
50+
for index in range(trace_count):
51+
trace_name = f"{corpus_tag}-trace-{index}"
52+
trace_names.append(trace_name)
53+
with langfuse_client.start_as_current_observation(name=trace_name) as span:
54+
with propagate_attributes(tags=[corpus_tag]):
55+
span.set_trace_io(
56+
input=f"Seed input {index}",
57+
output=f"Seed output {index}",
58+
)
59+
60+
langfuse_client.flush()
61+
62+
filter_json = f'[{{"type": "arrayOptions", "column": "tags", "operator": "any of", "value": ["{corpus_tag}"]}}]'
63+
api = get_api(retry=False)
64+
wait_for_result(
65+
lambda: api.trace.list(filter=filter_json, limit=trace_count),
66+
is_result_ready=lambda response: len(response.data) >= trace_count,
67+
)
68+
69+
return corpus_tag, trace_names
70+
71+
72+
@pytest.fixture(scope="module", autouse=True)
73+
def seeded_batch_evaluation_traces():
74+
_seed_trace_corpus()
75+
76+
4377
def simple_trace_mapper(*, item):
4478
"""Simple mapper for traces."""
4579
return EvaluatorInputs(

0 commit comments

Comments
 (0)