make e2e data shard the catch-all

hassiebp · hassiebp · commit 174884ce4dcc · 2026-04-04T22:30:28.000+02:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -99,16 +99,17 @@ jobs:
       matrix:
         include:
           - shard-name: core
-            test-paths: >-
-              tests/e2e/test_core_sdk.py
-              tests/e2e/test_decorators.py
-              tests/e2e/test_media.py
+            test-root: tests/e2e
+            parallel-marker: "e2e_core and not serial_e2e"
+            serial-marker: "e2e_core and serial_e2e"
           - shard-name: data
-            test-paths: >-
-              tests/e2e/test_batch_evaluation.py
-              tests/e2e/test_datasets.py
-              tests/e2e/test_experiments.py
-              tests/e2e/test_prompt.py
+            test-root: tests/e2e
+            parallel-marker: "e2e_data and not serial_e2e"
+            serial-marker: "e2e_data and serial_e2e"
+          - shard-name: live-provider
+            test-root: tests/live_provider
+            parallel-marker: "live_provider"
+            serial-marker: ""
     env:
       LANGFUSE_BASE_URL: "http://localhost:3000"
       LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
@@ -191,11 +192,12 @@ jobs:
       - name: Run the end-to-end tests
         run: |
           uv run --frozen python --version
-          uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO ${{ matrix.test-paths }} -m "not serial_e2e"
+          uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.parallel-marker }}"
 
       - name: Run serial end-to-end tests
+        if: ${{ matrix.serial-marker != '' }}
         run: |
-          uv run --frozen pytest -s -v --log-cli-level=INFO ${{ matrix.test-paths }} -m serial_e2e
+          uv run --frozen pytest -s -v --log-cli-level=INFO ${{ matrix.test-root }} -m "${{ matrix.serial-marker }}"
 
   all-tests-passed:
     # This allows us to have a branch protection rule for tests and deploys with matrix
diff --git a/pyproject.toml b/pyproject.toml
@@ -55,6 +55,8 @@ log_cli = true
 markers = [
     "unit: deterministic tests that run without a Langfuse server",
     "e2e: tests that require a real Langfuse server or persisted backend behaviour",
+    "e2e_core: the explicitly curated core e2e shard",
+    "e2e_data: the catch-all e2e shard for everything not in e2e_core",
     "serial_e2e: e2e tests that must not share server concurrency with the rest of the suite",
     "live_provider: tests that call live model providers and are kept out of default CI",
 ]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,6 +10,12 @@
 from langfuse._client.client import Langfuse
 from langfuse._client.resource_manager import LangfuseResourceManager
 
+CORE_E2E_FILENAMES = {
+    "test_core_sdk.py",
+    "test_decorators.py",
+    "test_media.py",
+}
+
 SERIAL_E2E_NODEIDS = {
     "tests/e2e/test_core_sdk.py::test_create_trace",
     "tests/e2e/test_core_sdk.py::test_create_boolean_score",
@@ -49,14 +55,21 @@ def clear(self) -> None:
 
 def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
     for item in items:
-        test_group = Path(str(item.fspath)).parent.name
+        file_path = Path(str(item.fspath))
+        test_group = file_path.parent.name
 
         if test_group == "unit":
             item.add_marker(pytest.mark.unit)
             continue
 
         if test_group == "e2e":
             item.add_marker(pytest.mark.e2e)
+            # Keep the data shard as the default so new tests under tests/e2e
+            # are picked up automatically unless we explicitly promote them.
+            if file_path.name in CORE_E2E_FILENAMES:
+                item.add_marker(pytest.mark.e2e_core)
+            else:
+                item.add_marker(pytest.mark.e2e_data)
             if item.nodeid in SERIAL_E2E_NODEIDS:
                 item.add_marker(pytest.mark.serial_e2e)
             continue
diff --git a/tests/e2e/test_batch_evaluation.py b/tests/e2e/test_batch_evaluation.py
@@ -18,7 +18,7 @@
     EvaluatorStats,
 )
 from langfuse.experiment import Evaluation
-from tests.support.utils import create_uuid
+from tests.support.utils import create_uuid, get_api, wait_for_result
 
 # ============================================================================
 # FIXTURES & SETUP
@@ -40,6 +40,40 @@ def sample_trace_name():
     return f"batch-eval-test-{create_uuid()}"
 
 
+def _seed_trace_corpus(
+    *, trace_count: int = 6, tag: str | None = None
+) -> tuple[str, list[str]]:
+    langfuse_client = get_client()
+    corpus_tag = tag or f"batch-eval-seed-{create_uuid()}"
+    trace_names: list[str] = []
+
+    for index in range(trace_count):
+        trace_name = f"{corpus_tag}-trace-{index}"
+        trace_names.append(trace_name)
+        with langfuse_client.start_as_current_observation(name=trace_name) as span:
+            with propagate_attributes(tags=[corpus_tag]):
+                span.set_trace_io(
+                    input=f"Seed input {index}",
+                    output=f"Seed output {index}",
+                )
+
+    langfuse_client.flush()
+
+    filter_json = f'[{{"type": "arrayOptions", "column": "tags", "operator": "any of", "value": ["{corpus_tag}"]}}]'
+    api = get_api(retry=False)
+    wait_for_result(
+        lambda: api.trace.list(filter=filter_json, limit=trace_count),
+        is_result_ready=lambda response: len(response.data) >= trace_count,
+    )
+
+    return corpus_tag, trace_names
+
+
+@pytest.fixture(scope="module", autouse=True)
+def seeded_batch_evaluation_traces():
+    _seed_trace_corpus()
+
+
 def simple_trace_mapper(*, item):
     """Simple mapper for traces."""
     return EvaluatorInputs(

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,8 @@ log_cli = true`
`55`	`55`	`markers = [`
`56`	`56`	`"unit: deterministic tests that run without a Langfuse server",`
`57`	`57`	`"e2e: tests that require a real Langfuse server or persisted backend behaviour",`
	`58`	`+ "e2e_core: the explicitly curated core e2e shard",`
	`59`	`+ "e2e_data: the catch-all e2e shard for everything not in e2e_core",`
`58`	`60`	`"serial_e2e: e2e tests that must not share server concurrency with the rest of the suite",`
`59`	`61`	`"live_provider: tests that call live model providers and are kept out of default CI",`
`60`	`62`	`]`