Skip to content

Commit b719b47

Browse files
timsaucerclaude
andcommitted
Add optional argument examples to register_arrow and read_arrow docstrings
Demonstrate schema= and file_extension= keyword arguments in the docstring examples for register_arrow and read_arrow, following project guidelines for optional parameter documentation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d0b5d23 commit b719b47

1 file changed

Lines changed: 66 additions & 0 deletions

File tree

python/datafusion/context.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,42 @@ def register_arrow(
11641164
20,
11651165
30
11661166
]
1167+
1168+
Provide an explicit ``schema`` to override schema inference:
1169+
1170+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1171+
... path = os.path.join(tmpdir, "data.arrow")
1172+
... with pa.ipc.new_file(path, table.schema) as writer:
1173+
... writer.write_table(table)
1174+
... ctx.register_arrow(
1175+
... "arrow_schema",
1176+
... path,
1177+
... schema=pa.schema([("x", pa.int64())]),
1178+
... )
1179+
... ctx.sql("SELECT * FROM arrow_schema").collect()[0].column(0)
1180+
<pyarrow.lib.Int64Array object at ...>
1181+
[
1182+
10,
1183+
20,
1184+
30
1185+
]
1186+
1187+
Use ``file_extension`` to read files with a non-default extension:
1188+
1189+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1190+
... path = os.path.join(tmpdir, "data.ipc")
1191+
... with pa.ipc.new_file(path, table.schema) as writer:
1192+
... writer.write_table(table)
1193+
... ctx.register_arrow(
1194+
... "arrow_ipc", path, file_extension=".ipc"
1195+
... )
1196+
... ctx.sql("SELECT * FROM arrow_ipc").collect()[0].column(0)
1197+
<pyarrow.lib.Int64Array object at ...>
1198+
[
1199+
10,
1200+
20,
1201+
30
1202+
]
11671203
"""
11681204
if table_partition_cols is None:
11691205
table_partition_cols = []
@@ -1465,6 +1501,36 @@ def read_arrow(
14651501
2,
14661502
3
14671503
]
1504+
1505+
Provide an explicit ``schema`` to override schema inference:
1506+
1507+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1508+
... path = os.path.join(tmpdir, "data.arrow")
1509+
... with pa.ipc.new_file(path, table.schema) as writer:
1510+
... writer.write_table(table)
1511+
... df = ctx.read_arrow(path, schema=pa.schema([("a", pa.int64())]))
1512+
... df.collect()[0].column(0)
1513+
<pyarrow.lib.Int64Array object at ...>
1514+
[
1515+
1,
1516+
2,
1517+
3
1518+
]
1519+
1520+
Use ``file_extension`` to read files with a non-default extension:
1521+
1522+
>>> with tempfile.TemporaryDirectory() as tmpdir:
1523+
... path = os.path.join(tmpdir, "data.ipc")
1524+
... with pa.ipc.new_file(path, table.schema) as writer:
1525+
... writer.write_table(table)
1526+
... df = ctx.read_arrow(path, file_extension=".ipc")
1527+
... df.collect()[0].column(0)
1528+
<pyarrow.lib.Int64Array object at ...>
1529+
[
1530+
1,
1531+
2,
1532+
3
1533+
]
14681534
"""
14691535
if file_partition_cols is None:
14701536
file_partition_cols = []

0 commit comments

Comments
 (0)