Skip to content

Commit 26d3966

Browse files
committed
split test suites by execution level
1 parent 31d513d commit 26d3966

43 files changed

Lines changed: 2137 additions & 1613 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,14 @@ jobs:
5252
- name: Run mypy type checking
5353
run: uv run --frozen mypy langfuse --no-error-summary
5454

55-
ci:
55+
unit-tests:
5656
runs-on: ubuntu-latest
5757
timeout-minutes: 30
5858
env:
5959
LANGFUSE_BASE_URL: "http://localhost:3000"
60-
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
61-
LANGFUSE_SECRET_KEY: "sk-lf-1234567890"
62-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
63-
# SERPAPI_API_KEY: ${{ secrets.SERPAPI_API_KEY }}
64-
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
65-
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
60+
LANGFUSE_PUBLIC_KEY: "pk-lf-test"
61+
LANGFUSE_SECRET_KEY: "sk-lf-test"
62+
OPENAI_API_KEY: "test-openai-key"
6663
strategy:
6764
fail-fast: false
6865
matrix:
@@ -73,7 +70,40 @@ jobs:
7370
- "3.13"
7471
- "3.14"
7572

76-
name: Test on Python version ${{ matrix.python-version }}
73+
name: Unit tests on Python ${{ matrix.python-version }}
74+
steps:
75+
- uses: actions/checkout@v3
76+
- name: Install uv and set Python version
77+
uses: astral-sh/setup-uv@v7
78+
with:
79+
version: "0.11.2"
80+
python-version: ${{ matrix.python-version }}
81+
enable-cache: true
82+
83+
- name: Check Python version
84+
run: python --version
85+
86+
- name: Install the project dependencies
87+
run: uv sync --locked
88+
89+
- name: Run the automated tests
90+
run: |
91+
python --version
92+
uv run --frozen pytest -n auto --dist loadfile -s -v --log-cli-level=INFO tests/unit
93+
94+
e2e-tests:
95+
runs-on: ubuntu-latest
96+
timeout-minutes: 30
97+
env:
98+
LANGFUSE_BASE_URL: "http://localhost:3000"
99+
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
100+
LANGFUSE_SECRET_KEY: "sk-lf-1234567890"
101+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
102+
# SERPAPI_API_KEY: ${{ secrets.SERPAPI_API_KEY }}
103+
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
104+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
105+
106+
name: E2E tests on Python 3.13
77107
steps:
78108
- uses: actions/checkout@v3
79109
- uses: pnpm/action-setup@v3
@@ -115,7 +145,7 @@ jobs:
115145
116146
echo "::group::Seed db"
117147
cp .env.dev.example .env
118-
pnpm run db:migrate
148+
pnpm run db:migrate
119149
pnpm run db:seed
120150
echo "::endgroup::"
121151
rm -rf .env
@@ -134,7 +164,6 @@ jobs:
134164
135165
echo "::endgroup::"
136166
137-
# Add this step to check the health of the container
138167
- name: Health check for langfuse server
139168
run: |
140169
echo "Checking if the langfuse server is up..."
@@ -158,7 +187,7 @@ jobs:
158187
uses: astral-sh/setup-uv@v7
159188
with:
160189
version: "0.11.2"
161-
python-version: ${{ matrix.python-version }}
190+
python-version: "3.13"
162191
enable-cache: true
163192

164193
- name: Check Python version
@@ -167,15 +196,15 @@ jobs:
167196
- name: Install the project dependencies
168197
run: uv sync --locked
169198

170-
- name: Run the automated tests
199+
- name: Run the end-to-end tests
171200
run: |
172201
python --version
173-
uv run --frozen pytest -n auto --dist loadfile -s -v --log-cli-level=INFO
202+
uv run --frozen pytest -s -v --log-cli-level=INFO tests/e2e
174203
175204
all-tests-passed:
176205
# This allows us to have a branch protection rule for tests and deploys with matrix
177206
runs-on: ubuntu-latest
178-
needs: [ci, linting, type-checking]
207+
needs: [unit-tests, e2e-tests, linting, type-checking]
179208
if: always()
180209
steps:
181210
- name: Successful deploy

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ module-root = ""
4848

4949
[tool.pytest.ini_options]
5050
log_cli = true
51+
markers = [
52+
"unit: deterministic tests that run without a Langfuse server",
53+
"e2e: tests that require a real Langfuse server or persisted backend behaviour",
54+
"live_provider: tests that call live model providers and are kept out of default CI",
55+
]
5156

5257
[tool.mypy]
5358
python_version = "3.12"

tests/conftest.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import json
2+
from pathlib import Path
3+
from typing import Any, Iterable, Sequence
4+
5+
import pytest
6+
from opentelemetry.sdk.resources import Resource
7+
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
8+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
9+
10+
from langfuse._client.client import Langfuse
11+
from langfuse._client.resource_manager import LangfuseResourceManager
12+
13+
14+
class InMemorySpanExporter(SpanExporter):
15+
"""Simple in-memory exporter to collect spans for deterministic tests."""
16+
17+
def __init__(self) -> None:
18+
self._finished_spans: list[ReadableSpan] = []
19+
self._stopped = False
20+
21+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
22+
if self._stopped:
23+
return SpanExportResult.FAILURE
24+
25+
self._finished_spans.extend(spans)
26+
return SpanExportResult.SUCCESS
27+
28+
def shutdown(self) -> None:
29+
self._stopped = True
30+
31+
def get_finished_spans(self) -> list[ReadableSpan]:
32+
return list(self._finished_spans)
33+
34+
def clear(self) -> None:
35+
self._finished_spans.clear()
36+
37+
38+
def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
39+
for item in items:
40+
test_group = Path(str(item.fspath)).parent.name
41+
42+
if test_group == "unit":
43+
item.add_marker(pytest.mark.unit)
44+
continue
45+
46+
if test_group == "e2e":
47+
item.add_marker(pytest.mark.e2e)
48+
continue
49+
50+
if test_group == "live_provider":
51+
item.add_marker(pytest.mark.e2e)
52+
item.add_marker(pytest.mark.live_provider)
53+
54+
55+
@pytest.fixture(autouse=True)
56+
def reset_langfuse_state() -> Iterable[None]:
57+
LangfuseResourceManager.reset()
58+
yield
59+
LangfuseResourceManager.reset()
60+
61+
62+
@pytest.fixture
63+
def memory_exporter() -> Iterable[InMemorySpanExporter]:
64+
exporter = InMemorySpanExporter()
65+
yield exporter
66+
exporter.shutdown()
67+
68+
69+
@pytest.fixture
70+
def langfuse_memory_client(
71+
monkeypatch: pytest.MonkeyPatch, memory_exporter: InMemorySpanExporter
72+
) -> Iterable[Langfuse]:
73+
monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "test-public-key")
74+
monkeypatch.setenv("LANGFUSE_SECRET_KEY", "test-secret-key")
75+
monkeypatch.setenv("LANGFUSE_BASE_URL", "http://test-host")
76+
77+
tracer_provider = TracerProvider(resource=Resource.create({"service.name": "test"}))
78+
79+
def mock_init(self: Any, **kwargs: Any) -> None:
80+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
81+
82+
from langfuse._client.span_filter import is_default_export_span
83+
84+
self.public_key = kwargs.get("public_key", "test-public-key")
85+
blocked_scopes = kwargs.get("blocked_instrumentation_scopes")
86+
self.blocked_instrumentation_scopes = (
87+
blocked_scopes if blocked_scopes is not None else []
88+
)
89+
self._should_export_span = (
90+
kwargs.get("should_export_span") or is_default_export_span
91+
)
92+
BatchSpanProcessor.__init__(
93+
self,
94+
span_exporter=memory_exporter,
95+
max_export_batch_size=512,
96+
schedule_delay_millis=1,
97+
)
98+
99+
monkeypatch.setattr(
100+
"langfuse._client.span_processor.LangfuseSpanProcessor.__init__",
101+
mock_init,
102+
)
103+
104+
client = Langfuse(
105+
public_key="test-public-key",
106+
secret_key="test-secret-key",
107+
base_url="http://test-host",
108+
tracing_enabled=True,
109+
tracer_provider=tracer_provider,
110+
)
111+
112+
yield client
113+
client.flush()
114+
115+
116+
@pytest.fixture
117+
def get_span(memory_exporter: InMemorySpanExporter):
118+
def _get_span(name: str) -> ReadableSpan:
119+
for span in memory_exporter.get_finished_spans():
120+
if span.name == name:
121+
return span
122+
123+
raise AssertionError(
124+
f"Span {name!r} not found in {[span.name for span in memory_exporter.get_finished_spans()]}"
125+
)
126+
127+
return _get_span
128+
129+
130+
@pytest.fixture
131+
def find_spans(memory_exporter: InMemorySpanExporter):
132+
def _find_spans(name: str) -> list[ReadableSpan]:
133+
return [
134+
span for span in memory_exporter.get_finished_spans() if span.name == name
135+
]
136+
137+
return _find_spans
138+
139+
140+
@pytest.fixture
141+
def json_attr():
142+
def _json_attr(span: ReadableSpan, attribute: str) -> Any:
143+
value = span.attributes[attribute]
144+
if not isinstance(value, str):
145+
return value
146+
147+
return json.loads(value)
148+
149+
return _json_attr

tests/e2e/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
EvaluatorStats,
1919
)
2020
from langfuse.experiment import Evaluation
21-
from tests.utils import create_uuid
21+
from tests.support.utils import create_uuid
2222

2323
# ============================================================================
2424
# FIXTURES & SETUP
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from langfuse import Langfuse, propagate_attributes
1010
from langfuse._client.resource_manager import LangfuseResourceManager
1111
from langfuse._utils import _get_timestamp
12-
from tests.api_wrapper import LangfuseAPI
13-
from tests.utils import (
12+
from tests.support.api_wrapper import LangfuseAPI
13+
from tests.support.utils import (
1414
create_uuid,
1515
get_api,
1616
)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from langfuse import Langfuse
55
from langfuse.api import DatasetStatus
6-
from tests.utils import create_uuid
6+
from tests.support.utils import create_uuid
77

88

99
def test_create_and_get_dataset():
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from langfuse._client.resource_manager import LangfuseResourceManager
1717
from langfuse.langchain import CallbackHandler
1818
from langfuse.media import LangfuseMedia
19-
from tests.utils import get_api
19+
from tests.support.utils import get_api
2020

2121
mock_metadata = {"key": "metadata"}
2222
mock_deep_metadata = {"key": "mock_deep_metadata"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
ExperimentItem,
1313
ExperimentItemResult,
1414
)
15-
from tests.utils import create_uuid, get_api
15+
from tests.support.utils import create_uuid, get_api
1616

1717

1818
@pytest.fixture

tests/e2e/test_media.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import base64
2+
import re
3+
from uuid import uuid4
4+
5+
from langfuse._client.client import Langfuse
6+
from langfuse.media import LangfuseMedia
7+
from tests.support.utils import get_api
8+
9+
10+
def test_replace_media_reference_string_in_object():
11+
audio_file = "static/joke_prompt.wav"
12+
with open(audio_file, "rb") as f:
13+
mock_audio_bytes = f.read()
14+
15+
langfuse = Langfuse()
16+
17+
mock_trace_name = f"test-trace-with-audio-{uuid4()}"
18+
base64_audio = base64.b64encode(mock_audio_bytes).decode()
19+
20+
span = langfuse.start_observation(
21+
name=mock_trace_name,
22+
metadata={
23+
"context": {
24+
"nested": LangfuseMedia(
25+
base64_data_uri=f"data:audio/wav;base64,{base64_audio}"
26+
)
27+
}
28+
},
29+
).end()
30+
31+
langfuse.flush()
32+
33+
fetched_trace = get_api().trace.get(span.trace_id)
34+
media_ref = fetched_trace.observations[0].metadata["context"]["nested"]
35+
assert re.match(
36+
r"^@@@langfuseMedia:type=audio/wav\|id=.+\|source=base64_data_uri@@@$",
37+
media_ref,
38+
)
39+
40+
resolved_obs = langfuse.resolve_media_references(
41+
obj=fetched_trace.observations[0], resolve_with="base64_data_uri"
42+
)
43+
44+
expected_base64 = f"data:audio/wav;base64,{base64_audio}"
45+
assert resolved_obs["metadata"]["context"]["nested"] == expected_base64
46+
47+
span2 = langfuse.start_observation(
48+
name=f"2-{mock_trace_name}",
49+
metadata={"context": {"nested": resolved_obs["metadata"]["context"]["nested"]}},
50+
).end()
51+
52+
langfuse.flush()
53+
54+
fetched_trace2 = get_api().trace.get(span2.trace_id)
55+
assert (
56+
fetched_trace2.observations[0].metadata["context"]["nested"]
57+
== fetched_trace.observations[0].metadata["context"]["nested"]
58+
)

0 commit comments

Comments
 (0)