Skip to content

Commit 23543fc

Browse files
ntjohnson1claude
andcommitted
feat: allow replacing the global SessionContext
Promote the previously immutable global context slot in `datafusion-python-util` from `OnceLock<Arc<SessionContext>>` to a `RwLock<Arc<SessionContext>>` and expose `set_global_ctx` (Rust) / `SessionContext.set_as_global` (Python). Users who register UDFs or otherwise customize a context can now make it the default seen by `SessionContext.global_ctx()` and the module-level `read_*` helpers. Existing snapshots returned by `get_global_ctx()` are unaffected — the swap only changes what subsequent readers see. Also fixes a pre-existing clippy `uninlined_format_args` nit in `dataframe.rs` that was tripping the pre-commit hook. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e0284c6 commit 23543fc

4 files changed

Lines changed: 90 additions & 8 deletions

File tree

crates/core/src/context.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ use datafusion_ffi::table_provider_factory::FFI_TableProviderFactory;
5656
use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
5757
use datafusion_python_util::{
5858
create_logical_extension_capsule, ffi_logical_codec_from_pycapsule, get_global_ctx,
59-
get_tokio_runtime, spawn_future, wait_for_future,
59+
get_tokio_runtime, set_global_ctx, spawn_future, wait_for_future,
6060
};
6161
use object_store::ObjectStore;
6262
use pyo3::IntoPyObjectExt;
@@ -407,11 +407,22 @@ impl PySessionContext {
407407
#[staticmethod]
408408
#[pyo3(signature = ())]
409409
pub fn global_ctx() -> PyResult<Self> {
410-
let ctx = get_global_ctx().clone();
410+
let ctx = get_global_ctx();
411411
let logical_codec = Self::default_logical_codec(&ctx);
412412
Ok(Self { ctx, logical_codec })
413413
}
414414

415+
/// Replace the process-wide global `SessionContext` with this one.
416+
///
417+
/// All subsequent callers of `SessionContext.global_ctx()` (and Rust
418+
/// helpers that fall back to the global context, such as the
419+
/// `read_parquet` / `read_csv` / etc. module-level helpers) will see this
420+
/// context. Existing references already obtained from `global_ctx()` are
421+
/// not affected.
422+
pub fn set_as_global(&self) {
423+
set_global_ctx(self.ctx.clone());
424+
}
425+
415426
/// Register an object store with the given name
416427
#[pyo3(signature = (scheme, store, host=None))]
417428
pub fn register_object_store(

crates/core/src/dataframe.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ impl PyDataFrame {
851851
Some(f) => f
852852
.parse::<datafusion::common::format::ExplainFormat>()
853853
.map_err(|e| {
854-
PyDataFusionError::Common(format!("Invalid explain format '{}': {}", f, e))
854+
PyDataFusionError::Common(format!("Invalid explain format '{f}': {e}"))
855855
})?,
856856
None => datafusion::common::format::ExplainFormat::Indent,
857857
};

crates/util/src/lib.rs

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::future::Future;
1919
use std::ptr::NonNull;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::{Arc, OnceLock, RwLock};
2121
use std::time::Duration;
2222

2323
use datafusion::datasource::TableProvider;
@@ -59,11 +59,29 @@ pub fn is_ipython_env(py: Python) -> &'static bool {
5959
})
6060
}
6161

62-
/// Utility to get the Global Datafussion CTX
62+
fn global_ctx_slot() -> &'static RwLock<Arc<SessionContext>> {
63+
static CTX: OnceLock<RwLock<Arc<SessionContext>>> = OnceLock::new();
64+
CTX.get_or_init(|| RwLock::new(Arc::new(SessionContext::new())))
65+
}
66+
67+
/// Utility to get the Global DataFusion CTX.
68+
///
69+
/// Returns an owned `Arc<SessionContext>` snapshot. The underlying slot can be
70+
/// replaced via [`set_global_ctx`]; existing snapshots are unaffected.
6371
#[inline]
64-
pub fn get_global_ctx() -> &'static Arc<SessionContext> {
65-
static CTX: OnceLock<Arc<SessionContext>> = OnceLock::new();
66-
CTX.get_or_init(|| Arc::new(SessionContext::new()))
72+
pub fn get_global_ctx() -> Arc<SessionContext> {
73+
global_ctx_slot()
74+
.read()
75+
.expect("global SessionContext lock poisoned")
76+
.clone()
77+
}
78+
79+
/// Replace the Global DataFusion CTX. Subsequent calls to [`get_global_ctx`]
80+
/// will return the new context. Already-cloned `Arc`s are not affected.
81+
pub fn set_global_ctx(ctx: Arc<SessionContext>) {
82+
*global_ctx_slot()
83+
.write()
84+
.expect("global SessionContext lock poisoned") = ctx;
6785
}
6886

6987
/// Utility to collect rust futures with GIL released and respond to
@@ -224,3 +242,40 @@ pub fn ffi_logical_codec_from_pycapsule(obj: Bound<PyAny>) -> PyResult<FFI_Logic
224242

225243
Ok(codec.clone())
226244
}
245+
246+
#[cfg(test)]
247+
mod tests {
248+
use super::*;
249+
250+
/// The global slot must round-trip a custom `SessionContext`. Since the
251+
/// global is process-wide, this test only asserts identity through a
252+
/// single set/get cycle and restores the prior value at the end so the
253+
/// test is independent of ordering with other tests in the binary.
254+
#[test]
255+
fn set_global_ctx_replaces_default() {
256+
let prior = get_global_ctx();
257+
let custom = Arc::new(SessionContext::new());
258+
let custom_ptr = Arc::as_ptr(&custom);
259+
260+
set_global_ctx(custom.clone());
261+
let observed = get_global_ctx();
262+
assert_eq!(
263+
Arc::as_ptr(&observed),
264+
custom_ptr,
265+
"get_global_ctx should return the context installed by set_global_ctx",
266+
);
267+
268+
// A snapshot taken before the swap should be unaffected after another
269+
// set_global_ctx call, because get_global_ctx clones the Arc.
270+
let snapshot = get_global_ctx();
271+
let replacement = Arc::new(SessionContext::new());
272+
set_global_ctx(replacement);
273+
assert_eq!(
274+
Arc::as_ptr(&snapshot),
275+
custom_ptr,
276+
"previously cloned snapshots must not be invalidated by set_global_ctx",
277+
);
278+
279+
set_global_ctx(prior);
280+
}
281+
}

python/datafusion/context.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,22 @@ def global_ctx(cls) -> SessionContext:
566566
wrapper.ctx = internal_ctx
567567
return wrapper
568568

569+
def set_as_global(self) -> None:
570+
"""Install this context as the process-wide global ``SessionContext``.
571+
572+
After this call, :meth:`SessionContext.global_ctx` (and the module-level
573+
helpers in :mod:`datafusion.io` that fall back to the global context)
574+
will return this context. Existing references already obtained from
575+
``global_ctx()`` are not invalidated.
576+
577+
Example::
578+
579+
ctx = SessionContext()
580+
ctx.register_udf(my_udf)
581+
ctx.set_as_global()
582+
"""
583+
self.ctx.set_as_global()
584+
569585
def enable_url_table(self) -> SessionContext:
570586
"""Control if local files can be queried as tables.
571587

0 commit comments

Comments
 (0)