Skip to content

Commit 82bfb88

Browse files
committed
Merge remote-tracking branch 'origin/main' into codex/split-test-suites-by-directory
2 parents 48fcd29 + 3e530af commit 82bfb88

15 files changed

Lines changed: 380 additions & 260 deletions

.github/dependabot.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,17 @@ updates:
2121
llama-index:
2222
patterns:
2323
- "llama-index*"
24+
25+
- package-ecosystem: "github-actions"
26+
directory: "/"
27+
schedule:
28+
interval: "daily"
29+
rebase-strategy: "disabled"
30+
commit-message:
31+
prefix: chore
32+
prefix-development: chore
33+
include: scope
34+
groups:
35+
github-actions:
36+
patterns:
37+
- "*"

.github/workflows/ci.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ jobs:
1818
linting:
1919
runs-on: ubuntu-latest
2020
steps:
21-
- uses: actions/checkout@v3
21+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
2222
- name: Install uv and set Python version
23-
uses: astral-sh/setup-uv@v7
23+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
2424
with:
2525
version: "0.11.2"
2626
python-version: "3.13"
@@ -33,14 +33,14 @@ jobs:
3333
type-checking:
3434
runs-on: ubuntu-latest
3535
steps:
36-
- uses: actions/checkout@v3
36+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
3737
- name: Install uv and set Python version
38-
uses: astral-sh/setup-uv@v7
38+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
3939
with:
4040
version: "0.11.2"
4141
python-version: "3.13"
4242
enable-cache: true
43-
- uses: actions/cache@v3
43+
- uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5
4444
name: Cache mypy cache
4545
with:
4646
path: ./.mypy_cache
@@ -105,8 +105,8 @@ jobs:
105105

106106
name: E2E tests on Python 3.13
107107
steps:
108-
- uses: actions/checkout@v3
109-
- uses: pnpm/action-setup@v3
108+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
109+
- uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5
110110
with:
111111
version: 10.33.0
112112

@@ -115,12 +115,12 @@ jobs:
115115
git clone https://github.com/langfuse/langfuse.git ./langfuse-server && echo $(cd ./langfuse-server && git rev-parse HEAD)
116116
117117
- name: Setup node (for langfuse server)
118-
uses: actions/setup-node@v3
118+
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
119119
with:
120120
node-version: 24
121121

122122
- name: Cache langfuse server dependencies
123-
uses: actions/cache@v3
123+
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5
124124
with:
125125
path: ./langfuse-server/node_modules
126126
key: |
@@ -184,7 +184,7 @@ jobs:
184184
echo "Langfuse server is up and running!"
185185
186186
- name: Install uv and set Python version
187-
uses: astral-sh/setup-uv@v7
187+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
188188
with:
189189
version: "0.11.2"
190190
python-version: "3.13"

.github/workflows/claude-review-maintainer-prs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
steps:
1717
- name: Check author permission and existing review request
1818
id: check
19-
uses: actions/github-script@v7
19+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
2020
with:
2121
script: |
2222
const owner = context.repo.owner;
@@ -57,7 +57,7 @@ jobs:
5757
5858
- name: Add Claude review comment
5959
if: steps.check.outputs.should_comment == 'true'
60-
uses: actions/github-script@v7
60+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
6161
with:
6262
script: |
6363
await github.rest.issues.createComment({

.github/workflows/codeql.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ jobs:
5555
# your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
5656
steps:
5757
- name: Checkout repository
58-
uses: actions/checkout@v4
58+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
5959

6060
# Initializes the CodeQL tools for scanning.
6161
- name: Initialize CodeQL
62-
uses: github/codeql-action/init@v3
62+
uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
6363
with:
6464
languages: ${{ matrix.language }}
6565
build-mode: ${{ matrix.build-mode }}
@@ -87,6 +87,6 @@ jobs:
8787
exit 1
8888
8989
- name: Perform CodeQL Analysis
90-
uses: github/codeql-action/analyze@v3
90+
uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
9191
with:
9292
category: "/language:${{matrix.language}}"

.github/workflows/dependabot-merge.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
steps:
1616
- name: Dependabot metadata
1717
id: metadata
18-
uses: dependabot/fetch-metadata@v1
18+
uses: dependabot/fetch-metadata@ffa630c65fa7e0ecfa0625b5ceda64399aea1b36 # v3
1919
with:
2020
github-token: "${{ secrets.GITHUB_TOKEN }}"
2121
- name: Enable auto-merge for Dependabot PRs

.github/workflows/dependabot-rebase-stale.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: "Rebase open Dependabot PR"
14-
uses: orange-buffalo/dependabot-auto-rebase@v1
14+
uses: orange-buffalo/dependabot-auto-rebase@fa9e05d7a8152381af0a92ffca942a0d46712544 # v1
1515
with:
1616
api-token: ${{ secrets.DEP_REBASE_PAT }}
1717
repository: ${{ github.repository }}

.github/workflows/package-availability-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515

1616
steps:
1717
- name: Set up Python ${{ matrix.python-version }}
18-
uses: actions/setup-python@v2
18+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
1919
with:
2020
python-version: ${{ matrix.python-version }}
2121
- name: Install dependencies using pip

.github/workflows/release.yml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ jobs:
6262
fi
6363
6464
- name: Checkout repository
65-
uses: actions/checkout@v4
65+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
6666
with:
6767
fetch-depth: 0
6868
token: ${{ secrets.GH_ACCESS_TOKEN }}
6969

7070
- name: Install uv and set Python version
71-
uses: astral-sh/setup-uv@v7
71+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
7272
with:
7373
version: "0.11.2"
7474
python-version: "3.12"
@@ -285,7 +285,7 @@ jobs:
285285

286286
- name: Create GitHub Release
287287
id: create-release
288-
uses: softprops/action-gh-release@v2
288+
uses: softprops/action-gh-release@153bb8e04406b158c6c84fc1615b65b24149a1fe # v2
289289
with:
290290
tag_name: v${{ steps.new-version.outputs.version }}
291291
name: v${{ steps.new-version.outputs.version }}
@@ -299,8 +299,10 @@ jobs:
299299

300300
- name: Notify Slack on success
301301
if: success()
302-
uses: slackapi/slack-github-action@v1.26.0
302+
uses: slackapi/slack-github-action@af78098f536edbc4de71162a307590698245be95 # v3
303303
with:
304+
webhook: ${{ secrets.SLACK_WEBHOOK_RELEASES }}
305+
webhook-type: incoming-webhook
304306
payload: |
305307
{
306308
"text": "✅ Langfuse Python SDK v${{ steps.new-version.outputs.version }} published to PyPI",
@@ -378,14 +380,13 @@ jobs:
378380
}
379381
]
380382
}
381-
env:
382-
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_RELEASES }}
383-
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
384383
385384
- name: Notify Slack on failure
386385
if: failure()
387-
uses: slackapi/slack-github-action@v1.26.0
386+
uses: slackapi/slack-github-action@af78098f536edbc4de71162a307590698245be95 # v3
388387
with:
388+
webhook: ${{ secrets.SLACK_WEBHOOK_ENGINEERING }}
389+
webhook-type: incoming-webhook
389390
payload: |
390391
{
391392
"text": "❌ Langfuse Python SDK release workflow failed",
@@ -471,6 +472,3 @@ jobs:
471472
}
472473
]
473474
}
474-
env:
475-
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_ENGINEERING }}
476-
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

langfuse/_client/client.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2427,6 +2427,7 @@ def run_experiment(
24272427
- run_name: The experiment run name. This is equal to the dataset run name if experiment was on Langfuse dataset.
24282428
- item_results: List of results for each processed item with outputs and evaluations
24292429
- run_evaluations: List of aggregate evaluation results for the entire run
2430+
- experiment_id: Stable identifier for the experiment run across all items
24302431
- dataset_run_id: ID of the dataset run (if using Langfuse datasets)
24312432
- dataset_run_url: Direct URL to view results in Langfuse UI (if applicable)
24322433
@@ -2577,6 +2578,8 @@ async def _run_experiment_async(
25772578
f"Starting experiment '{name}' run '{run_name}' with {len(data)} items"
25782579
)
25792580

2581+
shared_fallback_experiment_id = self._create_observation_id()
2582+
25802583
# Set up concurrency control
25812584
semaphore = asyncio.Semaphore(max_concurrency)
25822585

@@ -2588,6 +2591,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
25882591
task,
25892592
evaluators,
25902593
composite_evaluator,
2594+
shared_fallback_experiment_id,
25912595
name,
25922596
run_name,
25932597
description,
@@ -2619,7 +2623,14 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
26192623
langfuse_logger.error(f"Run evaluator failed: {e}")
26202624

26212625
# Generate dataset run URL if applicable
2622-
dataset_run_id = valid_results[0].dataset_run_id if valid_results else None
2626+
dataset_run_id = next(
2627+
(
2628+
result.dataset_run_id
2629+
for result in valid_results
2630+
if result.dataset_run_id
2631+
),
2632+
None,
2633+
)
26232634
dataset_run_url = None
26242635
if dataset_run_id and data:
26252636
try:
@@ -2665,6 +2676,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
26652676
description=description,
26662677
item_results=valid_results,
26672678
run_evaluations=run_evaluations,
2679+
experiment_id=dataset_run_id or shared_fallback_experiment_id,
26682680
dataset_run_id=dataset_run_id,
26692681
dataset_run_url=dataset_run_url,
26702682
)
@@ -2675,6 +2687,7 @@ async def _process_experiment_item(
26752687
task: Callable,
26762688
evaluators: List[Callable],
26772689
composite_evaluator: Optional[CompositeEvaluatorFunction],
2690+
fallback_experiment_id: str,
26782691
experiment_name: str,
26792692
experiment_run_name: str,
26802693
experiment_description: Optional[str],
@@ -2753,7 +2766,7 @@ async def _process_experiment_item(
27532766
if isinstance(item_metadata, dict):
27542767
final_observation_metadata.update(item_metadata)
27552768

2756-
experiment_id = dataset_run_id or self._create_observation_id()
2769+
experiment_id = dataset_run_id or fallback_experiment_id
27572770
experiment_item_id = (
27582771
dataset_item_id or get_sha256_hash_hex(_serialize(input_data))[:16]
27592772
)

langfuse/experiment.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,9 @@ class ExperimentResult:
303303
containing the original item, task output, evaluations, and trace information.
304304
run_evaluations: List of aggregate evaluation results computed across all items,
305305
such as average scores, statistical summaries, or cross-item analyses.
306+
experiment_id: ID of the experiment run propagated across all items. For
307+
Langfuse datasets, this matches the dataset run ID. For local experiments,
308+
this is a stable SDK-generated identifier for the run.
306309
dataset_run_id: Optional ID of the dataset run in Langfuse (when using Langfuse datasets).
307310
dataset_run_url: Optional direct URL to view the experiment results in Langfuse UI.
308311
@@ -361,6 +364,7 @@ def __init__(
361364
description: Optional[str],
362365
item_results: List[ExperimentItemResult],
363366
run_evaluations: List[Evaluation],
367+
experiment_id: str,
364368
dataset_run_id: Optional[str] = None,
365369
dataset_run_url: Optional[str] = None,
366370
):
@@ -372,6 +376,7 @@ def __init__(
372376
description: Optional description of the experiment.
373377
item_results: List of results from processing individual dataset items.
374378
run_evaluations: List of aggregate evaluation results for the entire run.
379+
experiment_id: ID of the experiment run.
375380
dataset_run_id: Optional ID of the dataset run (for Langfuse datasets).
376381
dataset_run_url: Optional URL to view results in Langfuse UI.
377382
"""
@@ -380,6 +385,7 @@ def __init__(
380385
self.description = description
381386
self.item_results = item_results
382387
self.run_evaluations = run_evaluations
388+
self.experiment_id = experiment_id
383389
self.dataset_run_id = dataset_run_id
384390
self.dataset_run_url = dataset_run_url
385391

0 commit comments

Comments
 (0)