Skip to content

Commit f0363d5

Browse files
authored
feat: add read, write, and search *_sync (#232)
Closes #96
1 parent 3970222 commit f0363d5

8 files changed

Lines changed: 341 additions & 67 deletions

File tree

python/rustac/rustac.pyi

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,30 @@ async def read(
299299
>>> item = await rustac.read("item.json")
300300
"""
301301

302+
def read_sync(
303+
href: str,
304+
*,
305+
format: str | None = None,
306+
store: AnyObjectStore | None = None,
307+
set_self_link: bool = True,
308+
) -> dict[str, Any]:
309+
"""
310+
Reads STAC from a href synchronously.
311+
312+
Args:
313+
href: The href to write to
314+
format: The input format. If not provided, will be inferred
315+
from the href's extension.
316+
store: An optional [ObjectStore][]
317+
set_self_link: If True, set the `self` link to the value of `href`.
318+
319+
Returns:
320+
The STAC value
321+
322+
Examples:
323+
>>> item = rustac.read_sync("item.json")
324+
"""
325+
302326
def from_arrow(
303327
table: arro3.core.Table,
304328
) -> dict[str, Any]:
@@ -405,6 +429,82 @@ async def search(
405429
... )
406430
"""
407431

432+
def search_sync(
433+
href: str,
434+
*,
435+
intersects: str | dict[str, Any] | None = None,
436+
ids: str | list[str] | None = None,
437+
collections: str | list[str] | None = None,
438+
max_items: int | None = None,
439+
limit: int | None = None,
440+
bbox: list[float] | None = None,
441+
datetime: str | None = None,
442+
include: str | list[str] | None = None,
443+
exclude: str | list[str] | None = None,
444+
sortby: str | list[str | dict[str, str]] | None = None,
445+
filter: str | dict[str, Any] | None = None,
446+
query: dict[str, Any] | None = None,
447+
use_duckdb: bool | None = None,
448+
**kwargs: str,
449+
) -> list[dict[str, Any]]:
450+
"""
451+
Searches a STAC API server or a stac-geoparquet file synchronously.
452+
453+
Args:
454+
href: The STAC API to search.
455+
intersects: Searches items
456+
by performing intersection between their geometry and provided GeoJSON
457+
geometry.
458+
ids: Array of Item ids to return.
459+
collections: Array of one or more Collection IDs that
460+
each matching Item must be in.
461+
max_items: The maximum number of items to iterate through.
462+
limit: The page size returned from the server. Use
463+
`max_items` to actually limit the number of items returned from this
464+
function.
465+
bbox: Requested bounding box.
466+
datetime: Single date+time, or a range (`/` separator),
467+
formatted to RFC 3339, section 5.6. Use double dots .. for open
468+
date ranges.
469+
470+
Partial dates are also supported and will be automatically expanded
471+
to full RFC 3339 datetime ranges:
472+
473+
- Year only (e.g., "2023") expands to 2023-01-01T00:00:00Z/2023-12-31T23:59:59Z
474+
- Year-Month (e.g., "2023-06") expands to 2023-06-01T00:00:00Z/2023-06-30T23:59:59Z
475+
- ISO 8601 date (e.g., "2023-06-15") expands to 2023-06-15T00:00:00Z/2023-06-15T23:59:59Z
476+
- Ranges also support partial dates (e.g., "2017/2018", "2017-06/2017-07")
477+
include: fields to include in the response (see [the
478+
extension
479+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
480+
for more on the semantics).
481+
exclude: fields to exclude from the response (see [the
482+
extension
483+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
484+
for more on the semantics).
485+
sortby: Fields by which to sort results (use `-field` to sort descending).
486+
filter: CQL2 filter expression. Strings
487+
will be interpreted as cql2-text, dictionaries as cql2-json.
488+
query: Additional filtering based on properties.
489+
It is recommended to use filter instead, if possible.
490+
use_duckdb: Query with DuckDB. If None and the href has a
491+
'parquet' or 'geoparquet' extension, will be set to True. Defaults
492+
to None.
493+
kwargs: Additional parameters to pass in to the search.
494+
495+
Returns:
496+
STAC items
497+
498+
Examples:
499+
>>> items = rustac.search_sync(
500+
... "https://landsatlook.usgs.gov/stac-server",
501+
... collections=["landsat-c2l2-sr"],
502+
... intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
503+
... sortby="-properties.datetime",
504+
... max_items=1,
505+
... )
506+
"""
507+
408508
async def iter_search(
409509
href: str,
410510
*,
@@ -613,6 +713,39 @@ async def write(
613713
>>> await rustac.write("items.parquet", items)
614714
"""
615715

716+
def write_sync(
717+
href: str,
718+
value: dict[str, Any] | Sequence[dict[str, Any]],
719+
*,
720+
format: str | None = None,
721+
parquet_compression: str | None = None,
722+
store: AnyObjectStore | None = None,
723+
) -> dict[str, str] | None:
724+
"""
725+
Writes STAC to a href synchronously.
726+
727+
Args:
728+
href: The href to write to
729+
value: The value to write. This
730+
can be a STAC dictionary or a list of items.
731+
format: The output format to write. If not provided, will be
732+
inferred from the href's extension.
733+
parquet_compression: If writing stac-geoparquet, sets the compression
734+
algorithm.
735+
https://docs.rs/parquet/latest/parquet/basic/enum.Compression.html
736+
is a list of what's available.
737+
store: The object store to use for writing.
738+
739+
Returns:
740+
The result of putting data into an object store, e.g. the e_tag and the
741+
version. None is returned if the file was written locally.
742+
743+
Examples:
744+
>>> with open("items.json") as f:
745+
... items = json.load(f)
746+
>>> rustac.write_sync("items.parquet", items)
747+
"""
748+
616749
def version(
617750
name: Literal["stac"]
618751
| Literal["stac-api"]

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,16 @@ fn rustac(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
3838
)?)?;
3939
m.add_function(wrap_pyfunction!(migrate::migrate, m)?)?;
4040
m.add_function(wrap_pyfunction!(read::read, m)?)?;
41+
m.add_function(wrap_pyfunction!(read::read_sync, m)?)?;
4142
m.add_function(wrap_pyfunction!(search::iter_search, m)?)?;
4243
m.add_function(wrap_pyfunction!(search::search, m)?)?;
44+
m.add_function(wrap_pyfunction!(search::search_sync, m)?)?;
4345
m.add_function(wrap_pyfunction!(search::search_to, m)?)?;
4446
m.add_function(wrap_pyfunction!(version::sha, m)?)?;
4547
m.add_function(wrap_pyfunction!(version::version, m)?)?;
4648
m.add_function(wrap_pyfunction!(walk::walk, m)?)?;
4749
m.add_function(wrap_pyfunction!(write::write, m)?)?;
50+
m.add_function(wrap_pyfunction!(write::write_sync, m)?)?;
4851

4952
pyo3_object_store::register_store_module(py, m, "rustac", "store")?;
5053
pyo3_object_store::register_exceptions_module(py, m, "rustac", "exceptions")?;

src/read.rs

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,35 @@ use pyo3_object_store::AnyObjectStore;
44
use stac::{Link, Links, SelfHref, Value};
55
use stac_io::{Format, StacStore};
66

7+
async fn read_inner(
8+
href: String,
9+
format: Format,
10+
store: Option<AnyObjectStore>,
11+
set_self_link: bool,
12+
) -> PyResult<Json<Value>> {
13+
let mut value: Value = if let Some(store) = store {
14+
StacStore::from(store)
15+
.get_format(&href, format)
16+
.await
17+
.map_err(Error::from)?
18+
} else {
19+
let (store, path) = stac_io::parse_href(&href).map_err(Error::from)?;
20+
store.get_format(path, format).await.map_err(Error::from)?
21+
};
22+
if set_self_link {
23+
value.set_link(Link::self_(href.clone()));
24+
}
25+
*value.self_href_mut() = Some(href.into());
26+
Ok(Json(value))
27+
}
28+
29+
fn parse_format(format: Option<String>, href: &str) -> Format {
30+
format
31+
.and_then(|f| f.parse::<Format>().ok())
32+
.or_else(|| Format::infer_from_href(href))
33+
.unwrap_or_default()
34+
}
35+
736
#[pyfunction]
837
#[pyo3(signature = (href, *, format=None, store=None, set_self_link=true))]
938
pub fn read(
@@ -13,24 +42,24 @@ pub fn read(
1342
store: Option<AnyObjectStore>,
1443
set_self_link: bool,
1544
) -> PyResult<Bound<'_, PyAny>> {
16-
let format = format
17-
.and_then(|f| f.parse::<Format>().ok())
18-
.or_else(|| Format::infer_from_href(&href))
19-
.unwrap_or_default();
45+
let format = parse_format(format, &href);
2046
pyo3_async_runtimes::tokio::future_into_py(py, async move {
21-
let mut value: Value = if let Some(store) = store {
22-
StacStore::from(store)
23-
.get_format(&href, format)
24-
.await
25-
.map_err(Error::from)?
26-
} else {
27-
let (store, path) = stac_io::parse_href(&href).map_err(Error::from)?;
28-
store.get_format(path, format).await.map_err(Error::from)?
29-
};
30-
if set_self_link {
31-
value.set_link(Link::self_(href.clone()));
32-
}
33-
*value.self_href_mut() = Some(href.into());
34-
Ok(Json(value))
47+
read_inner(href, format, store, set_self_link).await
48+
})
49+
}
50+
51+
#[pyfunction]
52+
#[pyo3(signature = (href, *, format=None, store=None, set_self_link=true))]
53+
pub fn read_sync(
54+
py: Python<'_>,
55+
href: String,
56+
format: Option<String>,
57+
store: Option<AnyObjectStore>,
58+
set_self_link: bool,
59+
) -> PyResult<Json<Value>> {
60+
let format = parse_format(format, &href);
61+
py.detach(|| {
62+
pyo3_async_runtimes::tokio::get_runtime()
63+
.block_on(async { read_inner(href, format, store, set_self_link).await })
3564
})
3665
}

src/search.rs

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ pub fn search<'py>(
110110
query,
111111
kwargs,
112112
)?;
113-
if use_duckdb
114-
.unwrap_or_else(|| matches!(Format::infer_from_href(&href), Some(Format::Geoparquet(_))))
115-
{
113+
let use_duckdb = use_duckdb
114+
.unwrap_or_else(|| matches!(Format::infer_from_href(&href), Some(Format::Geoparquet(_))));
115+
if use_duckdb {
116116
pyo3_async_runtimes::tokio::future_into_py(py, async move {
117117
let value = search_duckdb(href, search, max_items)?;
118118
Ok(Json(value.items))
@@ -125,6 +125,56 @@ pub fn search<'py>(
125125
}
126126
}
127127

128+
#[pyfunction]
129+
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, use_duckdb=None, **kwargs))]
130+
#[allow(clippy::too_many_arguments)]
131+
pub fn search_sync<'py>(
132+
py: Python<'py>,
133+
href: String,
134+
intersects: Option<StringOrDict>,
135+
ids: Option<StringOrList>,
136+
collections: Option<StringOrList>,
137+
max_items: Option<usize>,
138+
limit: Option<u64>,
139+
bbox: Option<Vec<f64>>,
140+
datetime: Option<String>,
141+
include: Option<StringOrList>,
142+
exclude: Option<StringOrList>,
143+
sortby: Option<PySortby<'py>>,
144+
filter: Option<StringOrDict>,
145+
query: Option<Bound<'py, PyDict>>,
146+
use_duckdb: Option<bool>,
147+
kwargs: Option<Bound<'_, PyDict>>,
148+
) -> PyResult<Json<Vec<Map<String, Value>>>> {
149+
let search = build(
150+
intersects,
151+
ids,
152+
collections,
153+
limit,
154+
bbox,
155+
datetime,
156+
include,
157+
exclude,
158+
sortby,
159+
filter,
160+
query,
161+
kwargs,
162+
)?;
163+
let use_duckdb = use_duckdb
164+
.unwrap_or_else(|| matches!(Format::infer_from_href(&href), Some(Format::Geoparquet(_))));
165+
if use_duckdb {
166+
let value = search_duckdb(href, search, max_items)?;
167+
Ok(Json(value.items))
168+
} else {
169+
py.detach(|| {
170+
pyo3_async_runtimes::tokio::get_runtime().block_on(async {
171+
let value = search_api(href, search, max_items).await?;
172+
Ok(Json(value.items))
173+
})
174+
})
175+
}
176+
}
177+
128178
#[pyfunction]
129179
#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, parquet_compression=None, store=None, use_duckdb=None, **kwargs))]
130180
#[allow(clippy::too_many_arguments)]

0 commit comments

Comments
 (0)