Skip to content

Commit

Permalink
Add Array.query in docs and improve docs in general (#1965)
Browse files Browse the repository at this point in the history
- Added `Array.query` in docs, both `SparseArray` and `DenseArray`.
- Fixed some minor errors/warnings while building docs like importing modules from `tiledb.libtiledb` instead of `tiledb`.
- Setting default language for docs - it was causing warning.
- Fixed some typos, like double 'a'
- Addressed the issue with new `__repr__` of `OrderedDict` in Python 3.12 using [np.testing.assert_equal](https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_equal.html).
  • Loading branch information
kounelisagis authored May 18, 2024
1 parent 9430e9a commit 5b1f60e
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 59 deletions.
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand Down
21 changes: 10 additions & 11 deletions doc/source/python-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Filters
.. automethod:: __getitem__(idx)
.. automethod:: __len__

.. autoclass:: tiledb.libtiledb.CompressionFilter
.. autoclass:: tiledb.CompressionFilter
:members:
.. autoclass:: tiledb.GzipFilter
:members:
Expand Down Expand Up @@ -116,22 +116,21 @@ Dense Array
-----------

.. autoclass:: tiledb.DenseArray
:members:

.. automethod:: __getitem__(selection)
.. automethod:: __setitem__(selection, value)
.. automethod:: query
.. automethod:: from_numpy(uri, array, ctx=None, **kwargs)
:members: query
:special-members: __getitem__, __setitem__

Sparse Array
------------

.. autoclass:: tiledb.SparseArray
:members:
:members: query
:special-members: __getitem__, __setitem__

.. automethod:: __getitem__(selection)
.. automethod:: __setitem__(selection, value)
.. automethod:: query
Query
---------------

.. autoclass:: tiledb.libtiledb.Query
:members:

Query Condition
---------------
Expand Down
17 changes: 12 additions & 5 deletions tiledb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
Bzip2Filter,
ChecksumMD5Filter,
ChecksumSHA256Filter,
CompressionFilter,
DeltaFilter,
DictionaryFilter,
DoubleDeltaFilter,
Expand Down Expand Up @@ -89,6 +90,8 @@
)
from .libtiledb import (
Array,
DenseArrayImpl,
SparseArrayImpl,
consolidate,
ls,
move,
Expand All @@ -101,8 +104,6 @@
vacuum,
walk,
)
from .libtiledb import DenseArrayImpl as DenseArray
from .libtiledb import SparseArrayImpl as SparseArray
from .multirange_indexing import EmptyRange
from .object import Object
from .parquet_ import from_parquet
Expand Down Expand Up @@ -132,13 +133,19 @@
try:
from tiledb.cloud.cloudarray import CloudArray
except ImportError:
pass

class DenseArray(DenseArrayImpl):
pass

class SparseArray(SparseArrayImpl):
pass

else:

class DenseArray(DenseArray, CloudArray):
class DenseArray(DenseArrayImpl, CloudArray):
pass

class SparseArray(SparseArray, CloudArray):
class SparseArray(SparseArrayImpl, CloudArray):
pass

del CloudArray
2 changes: 1 addition & 1 deletion tiledb/array_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def version(self) -> int:
"""The array's schema (storage) version.
:rtype: int
:raises :py:exc:`tiledb.TileDBError`
:raises: :py:exc:`tiledb.TileDBError`
"""
return self._version

Expand Down
2 changes: 1 addition & 1 deletion tiledb/ctx.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def dict(self, prefix: str = ""):
:param str prefix: return only parameters with a given prefix
:rtype: dict
:return: Config parameter / values as a a Python dict
:return: Config parameter / values as a Python dict
"""
return dict(ConfigItems(self, prefix=prefix))
Expand Down
6 changes: 3 additions & 3 deletions tiledb/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,7 @@ class DoubleDeltaFilter(CompressionFilter):
:param level: -1 (default) sets the compressor level to the default level as specified in TileDB core. Otherwise, sets the compressor level to the given value.
:type level: int
:param reinterp_dtype: (optional) sets the compressor to compress the data treating
as the new datatype.
:param reinterp_dtype: (optional) sets the compressor to compress the data treating as the new datatype.
**Example:**
Expand Down Expand Up @@ -501,7 +500,8 @@ class PositiveDeltaFilter(Filter):
:param ctx: A TileDB Context
:type ctx: tiledb.Ctx
:param window: -1 (default) sets the max window size for the filter to the default window size as specified in TileDB core. Otherwise, sets the compressor level to the given value.
:type window: int
:type window: int
**Example:**
>>> import tiledb, numpy as np, tempfile
Expand Down
76 changes: 46 additions & 30 deletions tiledb/libtiledb.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1525,8 +1525,8 @@ cdef class Array(object):
** Example **
>>> import tiledb, numpy as np
>>>
>>> import tiledb, numpy as np, tempfile
>>> from collections import OrderedDict
>>> dim1 = tiledb.Dim("d1", domain=(1, 4))
>>> dim2 = tiledb.Dim("d2", domain=(1, 3))
>>> dom = tiledb.Domain(dim1, dim2)
Expand All @@ -1551,21 +1551,30 @@ cdef class Array(object):
... A[:] = {"a1": a1_data, "l1": l1_data, "l2": l2_data, "l3": l3_data}
...
... with tiledb.open(tmp, "r") as A:
... A.label_index(["l1"])[3:4] # doctest: +ELLIPSIS
... A.label_index(["l1", "l3"])[2, 0.5:1.0] # doctest: +ELLIPSIS
... A.label_index(["l2"])[:, -1:0] # doctest: +ELLIPSIS
... A.label_index(["l3"])[:, 0.5:1.0] # doctest: +ELLIPSIS
OrderedDict(...'l1'... array([4, 3])..., ...'a1'... array([[1, 2, 3],
[4, 5, 6]])...)
OrderedDict(...'l3'... array([0.5, 1. ])..., ...'l1'... array([2])..., ...'a1'... array([[8, 9]])...)
OrderedDict(...'l2'... array([-1, 0])..., ...'a1'... array([[ 1, 2],
[ 4, 5],
[ 7, 8],
[10, 11]])...)
OrderedDict(...'l3'... array([0.5, 1. ])..., ...'a1'... array([[ 2, 3],
[ 5, 6],
[ 8, 9],
[11, 12]])...)
... np.testing.assert_equal(
... A.label_index(["l1"])[3:4],
... OrderedDict({"l1": [4, 3], "a1": [[1, 2, 3], [4, 5, 6]]}),
... )
... np.testing.assert_equal(
... A.label_index(["l1", "l3"])[2, 0.5:1.0],
... OrderedDict(
... {"l3": [0.5, 1.0], "l1": [2], "a1": [[8, 9]]}
... ),
... )
... np.testing.assert_equal(
... A.label_index(["l2"])[:, -1:0],
... OrderedDict(
... {"l2": [-1, 0],
... "a1": [[1, 2], [4, 5], [7, 8], [10, 11]]},
... ),
... )
... np.testing.assert_equal(
... A.label_index(["l3"])[:, 0.5:1.0],
... OrderedDict(
... {"l3": [0.5, 1.],
... "a1": [[2, 3], [5, 6], [8, 9], [11, 12]]},
... ),
... )
:param labels: List of labels to use when querying. Can only use at most one
label per dimension.
Expand All @@ -1574,6 +1583,7 @@ cdef class Array(object):
query the array on the corresponding dimension.
:returns: dict of {'label/attribute': result}.
:raises: :py:exc:`tiledb.TileDBError`
"""
# Delayed to avoid circular import
from .multirange_indexing import LabelIndexer
Expand Down Expand Up @@ -2158,8 +2168,7 @@ cdef class DenseArrayImpl(Array):
def query(self, attrs=None, attr_cond=None, cond=None, dims=None,
coords=False, order='C', use_arrow=None, return_arrow=False,
return_incomplete=False):
"""
Construct a proxy Query object for easy subarray queries of cells
"""Construct a proxy Query object for easy subarray queries of cells
for an item or region of the array across one or more attributes.
Optionally subselect over attributes, return dense result coordinate values,
Expand Down Expand Up @@ -2202,8 +2211,8 @@ cdef class DenseArrayImpl(Array):
... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))}
... with tiledb.DenseArray(tmp + "/array", mode='r') as A:
... # Access specific attributes individually.
... A.query(attrs=("a1",))[0:5] # doctest: +ELLIPSIS
OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...)
... np.testing.assert_equal(A.query(attrs=("a1",))[0:5],
... {"a1": np.zeros(5)})
"""
if not self.isopen or self.mode != 'r':
Expand Down Expand Up @@ -2257,8 +2266,8 @@ cdef class DenseArrayImpl(Array):
... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))}
... with tiledb.DenseArray(tmp + "/array", mode='r') as A:
... # A[0:5], attribute a1, row-major without coordinates
... A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C') # doctest: +ELLIPSIS
OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...)
... np.testing.assert_equal(A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C'),
... OrderedDict({'a1': np.zeros(5)}))
"""
from .subarray import Subarray
Expand Down Expand Up @@ -3178,6 +3187,7 @@ cdef class SparseArrayImpl(Array):
**Example:**
>>> import tiledb, numpy as np, tempfile
>>> from collections import OrderedDict
>>> # Write to multi-attribute 2D array
>>> with tempfile.TemporaryDirectory() as tmp:
... dom = tiledb.Domain(
Expand All @@ -3195,10 +3205,12 @@ cdef class SparseArrayImpl(Array):
... "a2": np.array([3, 4])}
... with tiledb.SparseArray(tmp + "/array", mode='r') as A:
... # Return an OrderedDict with values and coordinates
... A[0:3, 0:10] # doctest: +ELLIPSIS
... np.testing.assert_equal(A[0:3, 0:10], OrderedDict({'a1': np.array([1, 2]),
... 'a2': np.array([3, 4]), 'y': np.array([0, 2], dtype=np.uint64),
... 'x': np.array([0, 3], dtype=np.uint64)}))
... # Return just the "x" coordinates values
... A[0:3, 0:10]["x"] # doctest: +ELLIPSIS
OrderedDict(...'a1'... array([1, 2])..., ...'a2'... array([3, 4])..., ...'y'... array([0, 2], dtype=uint64)..., ...'x'... array([0, 3], dtype=uint64)...)
... A[0:3, 0:10]["x"]
array([0, 3], dtype=uint64)
With a floating-point array domain, index bounds are inclusive, e.g.:
Expand Down Expand Up @@ -3255,6 +3267,7 @@ cdef class SparseArrayImpl(Array):
**Example:**
>>> import tiledb, numpy as np, tempfile
>>> from collections import OrderedDict
>>> # Write to multi-attribute 2D array
>>> with tempfile.TemporaryDirectory() as tmp:
... dom = tiledb.Domain(
Expand All @@ -3271,8 +3284,8 @@ cdef class SparseArrayImpl(Array):
... A[I, J] = {"a1": np.array([1, 2]),
... "a2": np.array([3, 4])}
... with tiledb.SparseArray(tmp + "/array", mode='r') as A:
... A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10] # doctest: +ELLIPSIS
OrderedDict(...'a1'... array([1, 2])...)
... np.testing.assert_equal(A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10],
... OrderedDict({'a1': np.array([1, 2])}))
"""
if not self.isopen or self.mode not in ('r', 'd'):
Expand Down Expand Up @@ -3364,6 +3377,7 @@ cdef class SparseArrayImpl(Array):
**Example:**
>>> import tiledb, numpy as np, tempfile
>>> from collections import OrderedDict
>>> # Write to multi-attribute 2D array
>>> with tempfile.TemporaryDirectory() as tmp:
... dom = tiledb.Domain(
Expand All @@ -3381,8 +3395,10 @@ cdef class SparseArrayImpl(Array):
... "a2": np.array([3, 4])}
... with tiledb.SparseArray(tmp + "/array", mode='r') as A:
... # A[0:3, 0:10], attribute a1, row-major without coordinates
... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G') # doctest: +ELLIPSIS
OrderedDict(...'a1'... array([1, 2])...)
... np.testing.assert_equal(
... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G'),
... OrderedDict({'a1': np.array([1, 2])})
... )
"""
from .subarray import Subarray
Expand Down
11 changes: 4 additions & 7 deletions tiledb/vfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def write(self, file: lt.FileHandle, buff: Union[str, bytes]):
"""
if isinstance(file, FileIO):
raise lt.TileDBError(
"`tiledb.VFS().open` now returns a a FileIO object. Use "
"`tiledb.VFS().open` now returns a FileIO object. Use "
"`FileIO.write`. This message will be removed in 0.21.0.",
)
if isinstance(buff, str):
Expand All @@ -115,7 +115,7 @@ def read(self, file: lt.FileHandle, offset: int, nbytes: int) -> bytes:
"""
if isinstance(file, FileIO):
raise lt.TileDBError(
"`tiledb.VFS().open` now returns a a FileIO object. Use "
"`tiledb.VFS().open` now returns a FileIO object. Use "
"`FileIO.seek` and `FileIO.read`. This message will be removed "
"in 0.21.0."
)
Expand Down Expand Up @@ -436,9 +436,7 @@ def flush(self):
def seek(self, offset: int, whence: int = 0):
"""
:param int offset: Byte position to set the file pointer
:param int whence: Reference point. A whence value of 0 measures from the
beginning of the file, 1 uses the current file position, and 2 uses the
end of the file as the reference point. whence can be omitted and defaults to 0.
:param int whence: Reference point. A whence value of 0 measures from the beginning of the file, 1 uses the current file position, and 2 uses the end of the file as the reference point. whence can be omitted and defaults to 0.
"""
if not np.issubdtype(type(offset), np.integer):
raise TypeError(
Expand Down Expand Up @@ -475,8 +473,7 @@ def read(self, size: int = -1) -> bytes:
"""
Read the file from the current pointer position.
:param int size: Number of bytes to read. By default, size is set to -1
which will read until the end of the file.
:param int size: Number of bytes to read. By default, size is set to -1 which will read until the end of the file.
:rtype: bytes
:return: The bytes in the file
Expand Down

0 comments on commit 5b1f60e

Please sign in to comment.