diff --git a/terragon/copernicus_data_space_ecosystem.py b/terragon/copernicus_data_space_ecosystem.py index 460a500..4b3da2b 100644 --- a/terragon/copernicus_data_space_ecosystem.py +++ b/terragon/copernicus_data_space_ecosystem.py @@ -203,7 +203,9 @@ def _get_pages(self, data): for i in range(1, 100): _data = data.copy() _data["page"] = i - page = requests.post(urljoin(self.base_url, "search"), json=_data).json() + response = requests.post(urljoin(self.base_url, "search"), json=_data) + response.raise_for_status() + page = response.json() if "features" not in page: raise ValueError(f"There was an error with the request: {page}") @@ -212,7 +214,7 @@ def _get_pages(self, data): else: items.extend(page["features"]) - if i == 100: + if i == 99: raise ValueError( "Max number of pages reached. Consider using a smaller time frame." ) @@ -501,7 +503,7 @@ def _download_file_rasterio(self, f_path, shp, resampling): aws_secret_access_key=self.credentials["aws_secret_access_key"], ) with rasterio.env.Env(session=session, AWS_VIRTUAL_HOSTING=False): - clipped = self._clip_to_region("s3://eodata/" + str(f_path), shp, resampling) + clipped = self._clip_to_region("s3://eodata/" + f_path.as_posix(), shp, resampling) return clipped def _download_file_tile(self, f_path, shp, resampling): @@ -516,7 +518,7 @@ def _download_file_tile(self, f_path, shp, resampling): region_name="default", ).resource("s3", endpoint_url=self.end_point_url) - _s3.Bucket("eodata").download_file(str(f_path), download_path) + _s3.Bucket("eodata").download_file(f_path.as_posix(), download_path) # clip to shp clipped = self._clip_to_region(download_path, shp, resampling) @@ -571,7 +573,7 @@ def _get_asset_path(self, item, collection, band, resolution, filter_asset_path) # apply regex path filters if filter_asset_path and collection in filter_asset_path: pattern = re.compile(filter_asset_path[collection]) - paths = [path for path in paths if re.search(pattern, str(path))] + paths = [path for path in paths if re.search(pattern, path.as_posix())] if len(paths) == 0: raise RuntimeError( "There are no files matching the filter_asset_path: ", @@ -618,16 +620,15 @@ def _clip_to_region(self, file, shp, resampling): shp_crs.bounds.maxy.item() + margin[1], ) gdf = gpd.GeoDataFrame(geometry=[shapely_box], crs=src_crs) - ds = ds.rio.clip_box(*list(gdf.total_bounds)) + clipped = ds.rio.clip_box(*list(gdf.total_bounds)) # then reproject to shp crs - ds = ds.rio.reproject(shp.crs, resampling=resampling) - ds = ds.rio.clip_box(*list(shp.total_bounds)) + clipped = clipped.rio.reproject(shp.crs, resampling=resampling) + clipped = clipped.rio.clip_box(*list(shp.total_bounds)) else: - ds = ds.rio.clip_box(*list(shp.total_bounds)) - ds.load() + clipped = ds.rio.clip_box(*list(shp.total_bounds)) + clipped.load() ds.close() + return clipped except rxr.exceptions.NoDataInBounds: warnings.warn("No data found in bounds.") - ds = None - - return ds + return None diff --git a/tests/copernicus_data_space_ecosystem.py b/tests/copernicus_data_space_ecosystem.py index 5957f47..44061a2 100644 --- a/tests/copernicus_data_space_ecosystem.py +++ b/tests/copernicus_data_space_ecosystem.py @@ -29,6 +29,19 @@ def test_error_on_no_band(self): args["bands"] = [] self.assertRaises(ValueError, self.tg.create, **args) + def test_not_using_rasterio_virt_env(self): + """test when the rasterio environment is not used""" + args = self.arguments.copy() + args["bands"] = ["B02"] + args["use_virtual_rasterio_file"] = False + ds = self.tg.create(**args) + + self.assertTrue( + len(ds.time) == self.nr_time_steps + and self.width - 1 <= len(ds.x) <= self.width + 1 + and self.height - 1 <= len(ds.y) <= self.height + 1 + ) + def test_s2_mosaic(self): """test Sentinel-2 data""" args = self.arguments.copy()