Skip to content

Commit 6d03f47

Browse files
committed
feat(scores): add session and dataset run scores
1 parent 26fd9e3 commit 6d03f47

80 files changed

Lines changed: 7616 additions & 953 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

langfuse/_client/client.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,7 @@ def create_score(
12211221
data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
12221222
comment: Optional[str] = None,
12231223
config_id: Optional[str] = None,
1224+
metadata: Optional[Any] = None,
12241225
) -> None: ...
12251226

12261227
@overload
@@ -1235,19 +1236,23 @@ def create_score(
12351236
data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
12361237
comment: Optional[str] = None,
12371238
config_id: Optional[str] = None,
1239+
metadata: Optional[Any] = None,
12381240
) -> None: ...
12391241

12401242
def create_score(
12411243
self,
12421244
*,
12431245
name: str,
12441246
value: Union[float, str],
1245-
trace_id: str,
1247+
session_id: Optional[str] = None,
1248+
dataset_run_id: Optional[str] = None,
1249+
trace_id: Optional[str] = None,
12461250
observation_id: Optional[str] = None,
12471251
score_id: Optional[str] = None,
12481252
data_type: Optional[ScoreDataType] = None,
12491253
comment: Optional[str] = None,
12501254
config_id: Optional[str] = None,
1255+
metadata: Optional[Any] = None,
12511256
) -> None:
12521257
"""Create a score for a specific trace or observation.
12531258
@@ -1257,12 +1262,15 @@ def create_score(
12571262
Args:
12581263
name: Name of the score (e.g., "relevance", "accuracy")
12591264
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
1265+
session_id: ID of the Langfuse session to associate the score with
1266+
dataset_run_id: ID of the Langfuse dataset run to associate the score with
12601267
trace_id: ID of the Langfuse trace to associate the score with
1261-
observation_id: Optional ID of the specific observation to score
1268+
observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
12621269
score_id: Optional custom ID for the score (auto-generated if not provided)
12631270
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
12641271
comment: Optional comment or explanation for the score
12651272
config_id: Optional ID of a score config defined in Langfuse
1273+
metadata: Optional metadata to be attached to the score
12661274
12671275
Example:
12681276
```python
@@ -1293,6 +1301,8 @@ def create_score(
12931301
try:
12941302
score_event = {
12951303
"id": score_id,
1304+
"session_id": session_id,
1305+
"dataset_run_id": dataset_run_id,
12961306
"trace_id": trace_id,
12971307
"observation_id": observation_id,
12981308
"name": name,
@@ -1301,6 +1311,7 @@ def create_score(
13011311
"comment": comment,
13021312
"config_id": config_id,
13031313
"environment": self._environment,
1314+
"metadata": metadata,
13041315
}
13051316

13061317
new_body = ScoreBody(**score_event)

langfuse/_client/observe.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,9 @@ def sub_process():
145145
- For async functions, the decorator returns an async function wrapper.
146146
- For sync functions, the decorator returns a synchronous wrapper.
147147
"""
148-
function_io_capture_enabled = (
149-
os.environ.get(LANGFUSE_OBSERVE_DECORATOR_IO_CAPTURE_ENABLED, "True")
150-
.lower() not in ("false", "0")
151-
)
148+
function_io_capture_enabled = os.environ.get(
149+
LANGFUSE_OBSERVE_DECORATOR_IO_CAPTURE_ENABLED, "True"
150+
).lower() not in ("false", "0")
152151

153152
def decorator(func: F) -> F:
154153
return (

langfuse/_client/resource_manager.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,18 @@ def add_score_task(self, event: dict):
256256
# Sample scores with the same sampler that is used for tracing
257257
tracer_provider = cast(TracerProvider, otel_trace_api.get_tracer_provider())
258258
should_sample = (
259-
tracer_provider.sampler.should_sample(
260-
parent_context=None,
261-
trace_id=int(event["body"].trace_id, 16),
262-
name="score",
263-
).decision
264-
== Decision.RECORD_AND_SAMPLE
265-
if hasattr(event["body"], "trace_id")
259+
(
260+
tracer_provider.sampler.should_sample(
261+
parent_context=None,
262+
trace_id=int(event["body"].trace_id, 16),
263+
name="score",
264+
).decision
265+
== Decision.RECORD_AND_SAMPLE
266+
if hasattr(event["body"], "trace_id")
267+
else True
268+
)
269+
if event["body"].trace_id
270+
is not None # do not sample out session / dataset run scores
266271
else True
267272
)
268273

langfuse/api/__init__.py

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,20 @@
66
AnnotationQueueItem,
77
AnnotationQueueObjectType,
88
AnnotationQueueStatus,
9+
ApiKeyDeletionResponse,
10+
ApiKeyList,
11+
ApiKeyResponse,
12+
ApiKeySummary,
13+
AuthenticationScheme,
914
BaseEvent,
1015
BasePrompt,
1116
BaseScore,
17+
BaseScoreV1,
1218
BooleanScore,
19+
BooleanScoreV1,
20+
BulkConfig,
1321
CategoricalScore,
22+
CategoricalScoreV1,
1423
ChatMessage,
1524
ChatPrompt,
1625
Comment,
@@ -39,8 +48,6 @@
3948
CreateSpanBody,
4049
CreateSpanEvent,
4150
CreateTextPromptRequest,
42-
DailyMetrics,
43-
DailyMetricsDetails,
4451
Dataset,
4552
DatasetItem,
4653
DatasetRun,
@@ -51,7 +58,9 @@
5158
DeleteDatasetItemResponse,
5259
DeleteDatasetRunResponse,
5360
DeleteTraceResponse,
61+
EmptyResponse,
5462
Error,
63+
FilterConfig,
5564
GetCommentsResponse,
5665
GetMediaResponse,
5766
GetMediaUploadUrlRequest,
@@ -83,11 +92,18 @@
8392
IngestionUsage,
8493
MapValue,
8594
MediaContentType,
95+
MembershipRequest,
96+
MembershipResponse,
97+
MembershipRole,
98+
MembershipsResponse,
8699
MethodNotAllowedError,
100+
MetricsResponse,
87101
Model,
102+
ModelPrice,
88103
ModelUsageUnit,
89104
NotFoundError,
90105
NumericScore,
106+
NumericScoreV1,
91107
Observation,
92108
ObservationBody,
93109
ObservationLevel,
@@ -99,33 +115,53 @@
99115
OpenAiResponseUsageSchema,
100116
OpenAiUsage,
101117
OptionalObservationBody,
118+
OrganizationProject,
119+
OrganizationProjectsResponse,
102120
PaginatedAnnotationQueueItems,
103121
PaginatedAnnotationQueues,
104122
PaginatedDatasetItems,
123+
PaginatedDatasetRunItems,
105124
PaginatedDatasetRuns,
106125
PaginatedDatasets,
107126
PaginatedModels,
108127
PaginatedSessions,
109128
PatchMediaBody,
110129
Project,
130+
ProjectDeletionResponse,
111131
Projects,
112132
Prompt,
113133
PromptMeta,
114134
PromptMetaListResponse,
115135
Prompt_Chat,
116136
Prompt_Text,
137+
ResourceMeta,
138+
ResourceType,
139+
ResourceTypesResponse,
140+
SchemaExtension,
141+
SchemaResource,
142+
SchemasResponse,
143+
ScimEmail,
144+
ScimFeatureSupport,
145+
ScimName,
146+
ScimUser,
147+
ScimUsersListResponse,
117148
Score,
118149
ScoreBody,
119150
ScoreConfig,
120151
ScoreConfigs,
121152
ScoreDataType,
122153
ScoreEvent,
123154
ScoreSource,
155+
ScoreV1,
156+
ScoreV1_Boolean,
157+
ScoreV1_Categorical,
158+
ScoreV1_Numeric,
124159
Score_Boolean,
125160
Score_Categorical,
126161
Score_Numeric,
127162
SdkLogBody,
128163
SdkLogEvent,
164+
ServiceProviderConfig,
129165
ServiceUnavailableError,
130166
Session,
131167
SessionWithTraces,
@@ -146,8 +182,8 @@
146182
UpdateSpanBody,
147183
UpdateSpanEvent,
148184
Usage,
149-
UsageByModel,
150185
UsageDetails,
186+
UserMeta,
151187
annotation_queues,
152188
comments,
153189
commons,
@@ -160,11 +196,14 @@
160196
metrics,
161197
models,
162198
observations,
199+
organizations,
163200
projects,
164201
prompt_version,
165202
prompts,
203+
scim,
166204
score,
167205
score_configs,
206+
score_v_2,
168207
sessions,
169208
trace,
170209
utils,
@@ -176,11 +215,20 @@
176215
"AnnotationQueueItem",
177216
"AnnotationQueueObjectType",
178217
"AnnotationQueueStatus",
218+
"ApiKeyDeletionResponse",
219+
"ApiKeyList",
220+
"ApiKeyResponse",
221+
"ApiKeySummary",
222+
"AuthenticationScheme",
179223
"BaseEvent",
180224
"BasePrompt",
181225
"BaseScore",
226+
"BaseScoreV1",
182227
"BooleanScore",
228+
"BooleanScoreV1",
229+
"BulkConfig",
183230
"CategoricalScore",
231+
"CategoricalScoreV1",
184232
"ChatMessage",
185233
"ChatPrompt",
186234
"Comment",
@@ -209,8 +257,6 @@
209257
"CreateSpanBody",
210258
"CreateSpanEvent",
211259
"CreateTextPromptRequest",
212-
"DailyMetrics",
213-
"DailyMetricsDetails",
214260
"Dataset",
215261
"DatasetItem",
216262
"DatasetRun",
@@ -221,7 +267,9 @@
221267
"DeleteDatasetItemResponse",
222268
"DeleteDatasetRunResponse",
223269
"DeleteTraceResponse",
270+
"EmptyResponse",
224271
"Error",
272+
"FilterConfig",
225273
"GetCommentsResponse",
226274
"GetMediaResponse",
227275
"GetMediaUploadUrlRequest",
@@ -253,11 +301,18 @@
253301
"IngestionUsage",
254302
"MapValue",
255303
"MediaContentType",
304+
"MembershipRequest",
305+
"MembershipResponse",
306+
"MembershipRole",
307+
"MembershipsResponse",
256308
"MethodNotAllowedError",
309+
"MetricsResponse",
257310
"Model",
311+
"ModelPrice",
258312
"ModelUsageUnit",
259313
"NotFoundError",
260314
"NumericScore",
315+
"NumericScoreV1",
261316
"Observation",
262317
"ObservationBody",
263318
"ObservationLevel",
@@ -269,33 +324,53 @@
269324
"OpenAiResponseUsageSchema",
270325
"OpenAiUsage",
271326
"OptionalObservationBody",
327+
"OrganizationProject",
328+
"OrganizationProjectsResponse",
272329
"PaginatedAnnotationQueueItems",
273330
"PaginatedAnnotationQueues",
274331
"PaginatedDatasetItems",
332+
"PaginatedDatasetRunItems",
275333
"PaginatedDatasetRuns",
276334
"PaginatedDatasets",
277335
"PaginatedModels",
278336
"PaginatedSessions",
279337
"PatchMediaBody",
280338
"Project",
339+
"ProjectDeletionResponse",
281340
"Projects",
282341
"Prompt",
283342
"PromptMeta",
284343
"PromptMetaListResponse",
285344
"Prompt_Chat",
286345
"Prompt_Text",
346+
"ResourceMeta",
347+
"ResourceType",
348+
"ResourceTypesResponse",
349+
"SchemaExtension",
350+
"SchemaResource",
351+
"SchemasResponse",
352+
"ScimEmail",
353+
"ScimFeatureSupport",
354+
"ScimName",
355+
"ScimUser",
356+
"ScimUsersListResponse",
287357
"Score",
288358
"ScoreBody",
289359
"ScoreConfig",
290360
"ScoreConfigs",
291361
"ScoreDataType",
292362
"ScoreEvent",
293363
"ScoreSource",
364+
"ScoreV1",
365+
"ScoreV1_Boolean",
366+
"ScoreV1_Categorical",
367+
"ScoreV1_Numeric",
294368
"Score_Boolean",
295369
"Score_Categorical",
296370
"Score_Numeric",
297371
"SdkLogBody",
298372
"SdkLogEvent",
373+
"ServiceProviderConfig",
299374
"ServiceUnavailableError",
300375
"Session",
301376
"SessionWithTraces",
@@ -316,8 +391,8 @@
316391
"UpdateSpanBody",
317392
"UpdateSpanEvent",
318393
"Usage",
319-
"UsageByModel",
320394
"UsageDetails",
395+
"UserMeta",
321396
"annotation_queues",
322397
"comments",
323398
"commons",
@@ -330,11 +405,14 @@
330405
"metrics",
331406
"models",
332407
"observations",
408+
"organizations",
333409
"projects",
334410
"prompt_version",
335411
"prompts",
412+
"scim",
336413
"score",
337414
"score_configs",
415+
"score_v_2",
338416
"sessions",
339417
"trace",
340418
"utils",

0 commit comments

Comments
 (0)