Skip to content

Commit

Permalink
Merge pull request #147 from martindurant/intake2
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant authored Nov 13, 2024
2 parents 8dfcba8 + 20172a7 commit 719a7c8
Show file tree
Hide file tree
Showing 22 changed files with 172 additions and 1,293 deletions.
14 changes: 8 additions & 6 deletions ci/environment-py310.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,27 @@ name: test_env
channels:
- conda-forge
dependencies:
- python=3.10
- python=3.9
- aiohttp
- boto3
- exifread
- flask
- h5netcdf
- intake
- netcdf4
- pip
- pydap
- pytest
- rasterio
- s3fs >= 2021.08.0
- s3fs
- scikit-image
- xarray >= 0.17
- rangehttpserver
- xarray
- zarr
- moto < 3
- moto
- s3fs
- rioxarray
- werkzeug < 2.2.0
- werkzeug
- dask
- numpy <2
- pip:
- git+https://github.com/intake/intake
14 changes: 8 additions & 6 deletions ci/environment-py311.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,27 @@ name: test_env
channels:
- conda-forge
dependencies:
- python=3.11
- python=3.9
- aiohttp
- boto3
- exifread
- flask
- h5netcdf
- intake
- netcdf4
- pip
- pydap
- pytest
- rasterio
- s3fs >= 2021.08.0
- s3fs
- scikit-image
- xarray >= 0.17
- rangehttpserver
- xarray
- zarr
- moto < 3
- moto
- s3fs
- rioxarray
- werkzeug < 2.2.0
- werkzeug
- dask
- numpy <2
- pip:
- git+https://github.com/intake/intake
14 changes: 8 additions & 6 deletions ci/environment-py312.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,27 @@ name: test_env
channels:
- conda-forge
dependencies:
- python=3.12
- python=3.9
- aiohttp
- boto3
- exifread
- flask
- h5netcdf
- intake
- netcdf4
- pip
- pydap
- pytest
- rasterio
- s3fs >= 2021.08.0
- s3fs
- scikit-image
- rangehttpserver
- xarray >= 0.17
- xarray
- zarr
- moto < 3
- moto
- s3fs
- rioxarray
- werkzeug < 2.2.0
- werkzeug
- dask
- numpy <2
- pip:
- git+https://github.com/intake/intake
13 changes: 7 additions & 6 deletions ci/environment-py39.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ dependencies:
- exifread
- flask
- h5netcdf
- intake
- netcdf4
- pip
- pydap
- pytest
- rasterio
- s3fs >= 2021.08.0
- s3fs
- scikit-image
- rangehttpserver
- xarray >= 0.17
- xarray
- zarr
- moto < 3
- moto
- s3fs
- rioxarray
- werkzeug < 2.2.0
- werkzeug
- dask

- numpy <2
- pip:
- git+https://github.com/intake/intake
5 changes: 3 additions & 2 deletions ci/environment-upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ dependencies:
- pandas
- tornado
- zarr
- moto < 3
- moto
- intake
- rioxarray
- gdal
- werkzeug < 2.2.0
- werkzeug
- rioxarray
- dask
- numpy <2
- pip:
- git+https://github.com/fsspec/filesystem_spec.git
- git+https://github.com/intake/intake.git
Expand Down
15 changes: 3 additions & 12 deletions intake_xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,10 @@
__version__ = get_versions()['version']
del get_versions

import intake # Import this first to avoid circular imports during discovery.
from intake.container import register_container
import intake_xarray.base
import intake
from .netcdf import NetCDFSource
from .opendap import OpenDapSource
from .raster import RasterIOSource
from .xzarr import ZarrSource
from .xarray_container import RemoteXarray
#from .xzarr import ZarrSource
from .image import ImageSource


try:
intake.register_driver('remote-xarray', RemoteXarray)
except ValueError:
pass

register_container('xarray', RemoteXarray)
82 changes: 15 additions & 67 deletions intake_xarray/base.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,22 @@
from . import __version__
from intake.source.base import DataSource, Schema
class IntakeXarraySourceAdapter:
container = "xarray"
name = "xarray"
version = ""

def to_dask(self):
if "chunks" not in self.reader.kwargs:
return self.reader(chunks={}).read()
else:
return self.reader.read()

class DataSourceMixin(DataSource):
"""Common behaviours for plugins in this repo"""
version = __version__
container = 'xarray'
partition_access = True

def _get_schema(self):
"""Make schema object, which embeds xarray object and some details"""
from .xarray_container import serialize_zarr_ds

self.urlpath = self._get_cache(self.urlpath)[0]

if self._ds is None:
self._open_dataset()
def __call__(self, *args, **kwargs):
return self

metadata = {
'dims': dict(self._ds.dims),
'data_vars': {k: list(self._ds[k].coords)
for k in self._ds.data_vars.keys()},
'coords': tuple(self._ds.coords.keys()),
}
if getattr(self, 'on_server', False):
metadata['internal'] = serialize_zarr_ds(self._ds)
metadata.update(self._ds.attrs)
self._schema = Schema(
datashape=None,
dtype=None,
shape=None,
npartitions=None,
extra_metadata=metadata)
return self._schema
get = __call__

def read(self):
"""Return a version of the xarray with all the data in memory"""
self._load_metadata()
return self._ds.load()
return self.reader(chunks=None).read()

def read_chunked(self):
"""Return xarray object (which will have chunks)"""
self._load_metadata()
return self._ds

def read_partition(self, i):
"""Fetch one chunk of data at tuple index i
"""
import numpy as np
self._load_metadata()
if not isinstance(i, (tuple, list)):
raise TypeError('For Xarray sources, must specify partition as '
'tuple')
if isinstance(i, list):
i = tuple(i)
if hasattr(self._ds, 'variables') or i[0] in self._ds.coords:
arr = self._ds[i[0]].data
i = i[1:]
else:
arr = self._ds.data
if isinstance(arr, np.ndarray):
return arr
# dask array
return arr.blocks[i].compute()

def to_dask(self):
"""Return xarray object where variables are dask arrays"""
return self.read_chunked()
discover = read

def close(self):
"""Delete open file from memory"""
self._ds = None
self._schema = None
read_chunked = to_dask
64 changes: 0 additions & 64 deletions intake_xarray/derived.py

This file was deleted.

Loading

0 comments on commit 719a7c8

Please sign in to comment.