run get_feat_suffix() only when feature names and feature ids are same (

#135)
parashardhapola · Dec 16, 2024 · fde515c · fde515c
1 parent 31fe772
commit fde515c
Showing 1 changed file with 10 additions and 10 deletions.
diff --git a/scarf/merge.py b/scarf/merge.py
@@ -15,11 +15,7 @@
 from dask.array.core import Array as daskArrayType
 from scipy.sparse import coo_matrix
 
-from .assay import (
-    ADTassay,
-    ATACassay,
-    RNAassay,
-)
+from .assay import Assay
 from .datastore.datastore import DataStore
 from .metadata import MetaData
 from .utils import (
@@ -96,7 +92,7 @@ class AssayMerge:
     def __init__(
         self,
         zarr_path: ZARRLOC,
-        assays: List[Union[RNAassay, ATACassay, ADTassay]],
+        assays: List[Assay],
         names: List[str],
         merge_assay_name: str,
         in_workspaces: Union[list[str], None] = None,
@@ -124,10 +120,10 @@ def __init__(
         )
         self.nCells: int = self.mergedCells.shape[0]
         self.featCollection: List[Dict[str, str]] = self._get_feat_ids(assays)
-        self.feat_suffix: Dict[int, int] = self.get_feat_suffix()
         self.feat_name_ids_same: bool = self.check_feat_ids(self.featCollection)
 
         if self.feat_name_ids_same is True:
+            self.feat_suffix: Dict[int, int] = self.get_feat_suffix()
             self.featCollection = self.update_feat_ids()
             self.featCollection_map: List[Dict[str, str]] = (
                 self.update_feat_ids_for_map()
@@ -197,7 +193,7 @@ def perform_randomization_rows(
         for i in range(len(permutations)):
             in__dict: dict[int, np.ndarray] = {}
             last_key = i - 1 if i > 0 else 0
-            offset = nCells[last_key] + offset if i > 0 else 0
+            offset = nCells[last_key] + offset if i > 0 else 0  # noqa: F821
             for j, arr in enumerate(permutations[i]):
                 in__dict[j] = arr + offset
             permutations_rows_offset[i] = in__dict
@@ -580,7 +576,9 @@ def _ini_cell_data(self, overwrite) -> None:
                 f"cellData already exists so skipping _ini_cell_data"  # noqa: F541
             )
 
-    def _dask_to_coo(self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int) -> coo_matrix:
+    def _dask_to_coo(
+        self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int
+    ) -> coo_matrix:
         """
         Convert a Dask array to a sparse COO matrix.
         Args:
@@ -780,7 +778,9 @@ def generate_dummy_assay(self, ds: DataStore, assay_name: str) -> DummyAssay:
 
         # Create a dummy assay with zero counts and matching features
         dummy_shape = (ds.cells.N, reference_assay.feats.N)
-        dummy_counts = zarr.zeros(dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype)
+        dummy_counts = zarr.zeros(
+            dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype
+        )
         dummy_counts = from_array(dummy_counts, chunks=chunkShape)
         dummy_assay = DummyAssay(
             ds, dummy_counts, reference_assay.feats, reference_assay.name