remove project_id requirement for media handling

hassiebp · hassiebp · commit 1a04702d4214 · 2025-05-18T13:49:08.000+02:00
diff --git a/langfuse/_client/resource_manager.py b/langfuse/_client/resource_manager.py
@@ -229,16 +229,6 @@ def _initialize_instance(
         ingestion_consumer.start()
         self._ingestion_consumers.append(ingestion_consumer)
 
-        # Project ID handling
-        self._project_id = None
-        self._project_id_fetched = threading.Event()
-        self._fetch_project_id_thread = threading.Thread(
-            target=self._fetch_project_id_background,
-            name="langfuse-project-id-fetcher",
-            daemon=True,
-        )
-        self._fetch_project_id_thread.start()
-
         # Register shutdown handler
         atexit.register(self.shutdown)
 
@@ -251,49 +241,6 @@ def _initialize_instance(
             f"media_threads={media_upload_thread_count or 1}"
         )
 
-    def _fetch_project_id_background(self):
-        try:
-            projects = self.api.projects.get(
-                request_options={"max_retries": 3, "timeout_in_seconds": 5}
-            )
-            self._project_id = projects.data[0].id if projects.data else None
-
-            langfuse_logger.debug(
-                f"API: Successfully fetched project ID: {self._project_id} for project with public_key={self.public_key}"
-            )
-        except Exception as e:
-            langfuse_logger.warning(
-                f"API error: Failed to fetch project ID. This may affect media uploads and URL generation. Error: {str(e)}"
-            )
-
-        finally:
-            self._project_id_fetched.set()
-
-    @property
-    def project_id(self):
-        if self._project_id:
-            return self._project_id
-
-        if self._project_id_fetched.is_set():
-            langfuse_logger.warning(
-                "Configuration issue: Project ID unavailable. Media uploads and project-specific features may be affected. "
-                "Check API connectivity and permissions for your public/secret key pair."
-            )
-            return None
-
-        fetch_completed = self._project_id_fetched.wait(0.5)
-
-        if not self._project_id:
-            langfuse_logger.warning(
-                "Configuration issue: Project ID retrieval failed. Media uploads and project-specific features may be affected. "
-                "Check API connectivity and permissions for your public/secret key pair."
-                if fetch_completed
-                else "Timing issue: Project ID still being fetched. Operation proceeding without project ID. "
-                "This is normal during startup, but may temporarily affect media uploads and URL generation."
-            )
-
-        return self._project_id
-
     def add_score_task(self, event: dict):
         try:
             # Sample scores with the same sampler that is used for tracing
diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py
@@ -467,7 +467,6 @@ def _process_media_in_attribute(
                 field=field,
                 trace_id=self.trace_id,
                 observation_id=self.id,
-                project_id=self._langfuse_client._resources.project_id,
             )
         )
 
diff --git a/langfuse/_task_manager/media_manager.py b/langfuse/_task_manager/media_manager.py
@@ -1,5 +1,3 @@
-import base64
-import hashlib
 import logging
 import time
 from queue import Empty, Full, Queue
@@ -54,58 +52,14 @@ def process_next_media_upload(self):
             )
             self._queue.task_done()
 
-    def process_media_in_event(self, event: dict):
-        try:
-            if "body" not in event:
-                return
-
-            body = event["body"]
-            trace_id = body.get("traceId", None) or (
-                body.get("id", None)
-                if "type" in event and "trace" in event["type"]
-                else None
-            )
-
-            if trace_id is None:
-                raise ValueError("trace_id is required for media upload")
-
-            observation_id = (
-                body.get("id", None)
-                if "type" in event
-                and ("generation" in event["type"] or "span" in event["type"])
-                else None
-            )
-
-            multimodal_fields = ["input", "output", "metadata"]
-
-            for field in multimodal_fields:
-                if field in body:
-                    processed_data = self._find_and_process_media(
-                        data=body[field],
-                        trace_id=trace_id,
-                        observation_id=observation_id,
-                        field=field,
-                    )
-
-                    body[field] = processed_data
-
-        except Exception as e:
-            self._log.error(
-                f"Media processing error: Failed to process multimodal event content. Event data may be incomplete. Error: {e}"
-            )
-
     def _find_and_process_media(
         self,
         *,
         data: Any,
         trace_id: str,
         observation_id: Optional[str],
         field: str,
-        project_id: Optional[str],
     ):
-        if not project_id:
-            return data
-
         seen = set()
         max_levels = 10
 
@@ -121,7 +75,6 @@ def _process_data_recursively(data: Any, level: int):
                     trace_id=trace_id,
                     observation_id=observation_id,
                     field=field,
-                    project_id=project_id,
                 )
 
                 return data
@@ -137,7 +90,6 @@ def _process_data_recursively(data: Any, level: int):
                     trace_id=trace_id,
                     observation_id=observation_id,
                     field=field,
-                    project_id=project_id,
                 )
 
                 return media
@@ -159,7 +111,6 @@ def _process_data_recursively(data: Any, level: int):
                     trace_id=trace_id,
                     observation_id=observation_id,
                     field=field,
-                    project_id=project_id,
                 )
 
                 data["data"] = media
@@ -183,7 +134,6 @@ def _process_data_recursively(data: Any, level: int):
                     trace_id=trace_id,
                     observation_id=observation_id,
                     field=field,
-                    project_id=project_id,
                 )
 
                 data["data"] = media
@@ -210,7 +160,6 @@ def _process_media(
         trace_id: str,
         observation_id: Optional[str],
         field: str,
-        project_id: str,
     ):
         if (
             media._content_length is None
@@ -220,10 +169,9 @@ def _process_media(
         ):
             return
 
-        # Important as this is will be used in the media reference string in serializer
-        media._media_id = self._get_media_id(
-            project_id=project_id, content_sha256_hash=media._content_sha256_hash
-        )
+        if media._media_id is None:
+            self._log.error("Media ID is None. Skipping upload.")
+            return
 
         try:
             upload_media_job = UploadMediaJob(
@@ -255,13 +203,6 @@ def _process_media(
                 f"Media processing error: Failed to process media_id={media._media_id} for trace_id={trace_id}. Error: {str(e)}"
             )
 
-    def _get_media_id(self, *, project_id: str, content_sha256_hash) -> str:
-        hash_obj = hashlib.sha256()
-        hash_obj.update((project_id + content_sha256_hash).encode("utf-8"))
-        media_id = base64.urlsafe_b64encode(hash_obj.digest()).decode("utf-8")[:22]
-
-        return media_id
-
     def _process_upload_media_job(
         self,
         *,
diff --git a/langfuse/media.py b/langfuse/media.py
@@ -68,7 +68,6 @@ def __init__(
                 the current working directory is used.
         """
         self.obj = obj
-        self._media_id = None
 
         if base64_data_uri is not None:
             parsed_data = self._parse_base64_data_uri(base64_data_uri)
@@ -96,6 +95,8 @@ def __init__(
             self._content_type = None
             self._source = None
 
+        self._media_id = self._get_media_id()
+
     def _read_file(self, file_path: str) -> Optional[bytes]:
         try:
             with open(file_path, "rb") as file:
@@ -105,6 +106,17 @@ def _read_file(self, file_path: str) -> Optional[bytes]:
 
             return None
 
+    def _get_media_id(self):
+        content_hash = self._content_sha256_hash
+
+        if content_hash is None:
+            return
+
+        # Convert hash to base64Url
+        url_safe_content_hash = content_hash.replace("+", "-").replace("/", "_")
+
+        return url_safe_content_hash[:22]
+
     @property
     def _content_length(self) -> Optional[int]:
         return len(self._content_bytes) if self._content_bytes else None

Original file line number	Diff line number	Diff line change
`@@ -467,7 +467,6 @@ def _process_media_in_attribute(`
`467`	`467`	`field=field,`
`468`	`468`	`trace_id=self.trace_id,`
`469`	`469`	`observation_id=self.id,`
`470`		`- project_id=self._langfuse_client._resources.project_id,`
`471`	`470`	`)`
`472`	`471`	`)`
`473`	`472`