From fde515c4d758318777c9fa677376b16fd07fdb6d Mon Sep 17 00:00:00 2001 From: Gautam Ahuja Date: Mon, 16 Dec 2024 16:44:44 +0530 Subject: [PATCH] run get_feat_suffix() only when feature names and feature ids are same (#135) --- scarf/merge.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/scarf/merge.py b/scarf/merge.py index 0e5f9b5..0c60133 100644 --- a/scarf/merge.py +++ b/scarf/merge.py @@ -15,11 +15,7 @@ from dask.array.core import Array as daskArrayType from scipy.sparse import coo_matrix -from .assay import ( - ADTassay, - ATACassay, - RNAassay, -) +from .assay import Assay from .datastore.datastore import DataStore from .metadata import MetaData from .utils import ( @@ -96,7 +92,7 @@ class AssayMerge: def __init__( self, zarr_path: ZARRLOC, - assays: List[Union[RNAassay, ATACassay, ADTassay]], + assays: List[Assay], names: List[str], merge_assay_name: str, in_workspaces: Union[list[str], None] = None, @@ -124,10 +120,10 @@ def __init__( ) self.nCells: int = self.mergedCells.shape[0] self.featCollection: List[Dict[str, str]] = self._get_feat_ids(assays) - self.feat_suffix: Dict[int, int] = self.get_feat_suffix() self.feat_name_ids_same: bool = self.check_feat_ids(self.featCollection) if self.feat_name_ids_same is True: + self.feat_suffix: Dict[int, int] = self.get_feat_suffix() self.featCollection = self.update_feat_ids() self.featCollection_map: List[Dict[str, str]] = ( self.update_feat_ids_for_map() @@ -197,7 +193,7 @@ def perform_randomization_rows( for i in range(len(permutations)): in__dict: dict[int, np.ndarray] = {} last_key = i - 1 if i > 0 else 0 - offset = nCells[last_key] + offset if i > 0 else 0 + offset = nCells[last_key] + offset if i > 0 else 0 # noqa: F821 for j, arr in enumerate(permutations[i]): in__dict[j] = arr + offset permutations_rows_offset[i] = in__dict @@ -580,7 +576,9 @@ def _ini_cell_data(self, overwrite) -> None: f"cellData already exists so skipping _ini_cell_data" # noqa: F541 ) - def _dask_to_coo(self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int) -> coo_matrix: + def _dask_to_coo( + self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int + ) -> coo_matrix: """ Convert a Dask array to a sparse COO matrix. Args: @@ -780,7 +778,9 @@ def generate_dummy_assay(self, ds: DataStore, assay_name: str) -> DummyAssay: # Create a dummy assay with zero counts and matching features dummy_shape = (ds.cells.N, reference_assay.feats.N) - dummy_counts = zarr.zeros(dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype) + dummy_counts = zarr.zeros( + dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype + ) dummy_counts = from_array(dummy_counts, chunks=chunkShape) dummy_assay = DummyAssay( ds, dummy_counts, reference_assay.feats, reference_assay.name