@@ -415,6 +415,80 @@ def schema(self) -> pa.Schema:
415415 """
416416 return self .df .schema ()
417417
418+ def column (self , name : str ) -> Expr :
419+ """Return a fully qualified column expression for ``name``.
420+
421+ Resolves an unqualified column name against this DataFrame's schema
422+ and returns an :py:class:`Expr` whose underlying column reference
423+ includes the table qualifier. This is especially useful after joins,
424+ where the same column name may appear in multiple relations.
425+
426+ Args:
427+ name: Unqualified column name to look up.
428+
429+ Returns:
430+ A fully qualified column expression.
431+
432+ Raises:
433+ Exception: If the column is not found or is ambiguous (exists in
434+ multiple relations).
435+
436+ Examples:
437+ Resolve a column from a simple DataFrame:
438+
439+ >>> ctx = dfn.SessionContext()
440+ >>> df = ctx.from_pydict({"a": [1, 2], "b": [3, 4]})
441+ >>> expr = df.column("a")
442+ >>> df.select(expr).to_pydict()
443+ {'a': [1, 2]}
444+
445+ Resolve qualified columns after a join:
446+
447+ >>> left = ctx.from_pydict({"id": [1, 2], "x": [10, 20]})
448+ >>> right = ctx.from_pydict({"id": [1, 2], "y": [30, 40]})
449+ >>> joined = left.join(right, on="id", how="inner")
450+ >>> expr = joined.column("y")
451+ >>> joined.select("id", expr).sort("id").to_pydict()
452+ {'id': [1, 2], 'y': [30, 40]}
453+ """
454+ return self .find_qualified_columns (name )[0 ]
455+
456+ def col (self , name : str ) -> Expr :
457+ """Alias for :py:meth:`column`.
458+
459+ See Also:
460+ :py:meth:`column`
461+ """
462+ return self .column (name )
463+
464+ def find_qualified_columns (self , * names : str ) -> list [Expr ]:
465+ """Return fully qualified column expressions for the given names.
466+
467+ This is a batch version of :py:meth:`column` — it resolves each
468+ unqualified name against the DataFrame's schema and returns a list
469+ of qualified column expressions.
470+
471+ Args:
472+ names: Unqualified column names to look up.
473+
474+ Returns:
475+ List of fully qualified column expressions, one per name.
476+
477+ Raises:
478+ Exception: If any column is not found or is ambiguous.
479+
480+ Examples:
481+ Resolve multiple columns at once:
482+
483+ >>> ctx = dfn.SessionContext()
484+ >>> df = ctx.from_pydict({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
485+ >>> exprs = df.find_qualified_columns("a", "c")
486+ >>> df.select(*exprs).to_pydict()
487+ {'a': [1, 2], 'c': [5, 6]}
488+ """
489+ raw_exprs = self .df .find_qualified_columns (list (names ))
490+ return [Expr (e ) for e in raw_exprs ]
491+
418492 @deprecated (
419493 "select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead"
420494 )
@@ -887,7 +961,13 @@ def join(
887961 ) -> DataFrame :
888962 """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`.
889963
890- `on` has to be provided or both `left_on` and `right_on` in conjunction.
964+ ``on`` has to be provided or both ``left_on`` and ``right_on`` in
965+ conjunction.
966+
967+ When non-key columns share the same name in both DataFrames, use
968+ :py:meth:`DataFrame.col` on each DataFrame **before** the join to
969+ obtain fully qualified column references that can disambiguate them.
970+ See :py:meth:`join_on` for an example.
891971
892972 Args:
893973 right: Other DataFrame to join with.
@@ -961,7 +1041,14 @@ def join_on(
9611041 built with :func:`datafusion.col`. On expressions are used to support
9621042 in-equality predicates. Equality predicates are correctly optimized.
9631043
1044+ Use :py:meth:`DataFrame.col` on each DataFrame **before** the join to
1045+ obtain fully qualified column references. These qualified references
1046+ can then be used in the join predicate and to disambiguate columns
1047+ with the same name when selecting from the result.
1048+
9641049 Examples:
1050+ Join with unique column names:
1051+
9651052 >>> ctx = dfn.SessionContext()
9661053 >>> left = ctx.from_pydict({"a": [1, 2], "x": ["a", "b"]})
9671054 >>> right = ctx.from_pydict({"b": [1, 2], "y": ["c", "d"]})
@@ -970,6 +1057,18 @@ def join_on(
9701057 ... ).sort(col("x")).to_pydict()
9711058 {'a': [1, 2], 'x': ['a', 'b'], 'b': [1, 2], 'y': ['c', 'd']}
9721059
1060+ Use :py:meth:`col` to disambiguate shared column names:
1061+
1062+ >>> left = ctx.from_pydict({"id": [1, 2], "val": [10, 20]})
1063+ >>> right = ctx.from_pydict({"id": [1, 2], "val": [30, 40]})
1064+ >>> joined = left.join_on(
1065+ ... right, left.col("id") == right.col("id"), how="inner"
1066+ ... )
1067+ >>> joined.select(
1068+ ... left.col("id"), left.col("val"), right.col("val")
1069+ ... ).sort(left.col("id")).to_pydict()
1070+ {'id': [1, 2], 'val': [10, 20], 'val': [30, 40]}
1071+
9731072 Args:
9741073 right: Other DataFrame to join with.
9751074 on_exprs: single or multiple (in)-equality predicates.
0 commit comments