4040from datafusion ._internal import DataFrame as DataFrameInternal
4141from datafusion ._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
4242from datafusion ._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
43- from datafusion .expr import Expr , SortExpr , sort_or_default
43+ from datafusion .expr import Expr , SortExpr , sort_or_default , Window
4444from datafusion .plan import ExecutionPlan , LogicalPlan
4545from datafusion .record_batch import RecordBatchStream
46+ from datafusion .functions import col , nvl , last_value
47+ from datafusion .common import NullTreatment
4648
4749if TYPE_CHECKING :
4850 import pathlib
@@ -360,6 +362,9 @@ def describe(self) -> DataFrame:
360362 """
361363 return DataFrame (self .df .describe ())
362364
365+ @deprecated (
366+ "schema() is deprecated. Use :py:meth:`~DataFrame.get_schema` instead"
367+ )
363368 def schema (self ) -> pa .Schema :
364369 """Return the :py:class:`pyarrow.Schema` of this DataFrame.
365370
@@ -370,6 +375,39 @@ def schema(self) -> pa.Schema:
370375 Describing schema of the DataFrame
371376 """
372377 return self .df .schema ()
378+
379+ def to_batches (self ) -> list [pa .RecordBatch ]:
380+ """Convert DataFrame to list of RecordBatches."""
381+ return self .collect () # delegate to existing method
382+
383+ def interpolate (self , method : str = "forward_fill" , ** kwargs ) -> DataFrame :
384+ """Interpolate missing values per column.
385+
386+ Args:
387+ method: Interpolation method ('linear', 'forward_fill', 'backward_fill')
388+
389+ Returns:
390+ DataFrame with interpolated values
391+
392+ Raises:
393+ NotImplementedError: Linear interpolation not yet supported
394+ """
395+ if method == "forward_fill" :
396+ exprs = []
397+ for field in self .schema ():
398+ window = Window (order_by = col (field .name ))
399+ expr = nvl (col (field .name ),last_value (col (field .name )).over (window )).alias (field .name )
400+ exprs .append (expr )
401+ return self .select (* exprs )
402+
403+ elif method == "backward_fill" :
404+ raise NotImplementedError ("backward_fill not yet implemented" )
405+
406+ elif method == "linear" :
407+ raise NotImplementedError ("Linear interpolation requires complex window function logic" )
408+
409+ else :
410+ raise ValueError (f"Unknown interpolation method: { method } " )
373411
374412 @deprecated (
375413 "select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead"
@@ -592,6 +630,9 @@ def tail(self, n: int = 5) -> DataFrame:
592630 """
593631 return DataFrame (self .df .limit (n , max (0 , self .count () - n )))
594632
633+ @deprecated (
634+ "collect() returning RecordBatch list is deprecated. Use to_batches() for RecordBatch list or collect() will return DataFrame in future versions"
635+ )
595636 def collect (self ) -> list [pa .RecordBatch ]:
596637 """Execute this :py:class:`DataFrame` and collect results into memory.
597638
0 commit comments