From fde515c4d758318777c9fa677376b16fd07fdb6d Mon Sep 17 00:00:00 2001
From: Gautam Ahuja <goutamahuja8387@gmail.com>
Date: Mon, 16 Dec 2024 16:44:44 +0530
Subject: [PATCH] run get_feat_suffix() only when feature names and feature ids
 are same (#135)

---
 scarf/merge.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scarf/merge.py b/scarf/merge.py
index 0e5f9b5..0c60133 100644
--- a/scarf/merge.py
+++ b/scarf/merge.py
@@ -15,11 +15,7 @@
 from dask.array.core import Array as daskArrayType
 from scipy.sparse import coo_matrix
 
-from .assay import (
-    ADTassay,
-    ATACassay,
-    RNAassay,
-)
+from .assay import Assay
 from .datastore.datastore import DataStore
 from .metadata import MetaData
 from .utils import (
@@ -96,7 +92,7 @@ class AssayMerge:
     def __init__(
         self,
         zarr_path: ZARRLOC,
-        assays: List[Union[RNAassay, ATACassay, ADTassay]],
+        assays: List[Assay],
         names: List[str],
         merge_assay_name: str,
         in_workspaces: Union[list[str], None] = None,
@@ -124,10 +120,10 @@ def __init__(
         )
         self.nCells: int = self.mergedCells.shape[0]
         self.featCollection: List[Dict[str, str]] = self._get_feat_ids(assays)
-        self.feat_suffix: Dict[int, int] = self.get_feat_suffix()
         self.feat_name_ids_same: bool = self.check_feat_ids(self.featCollection)
 
         if self.feat_name_ids_same is True:
+            self.feat_suffix: Dict[int, int] = self.get_feat_suffix()
             self.featCollection = self.update_feat_ids()
             self.featCollection_map: List[Dict[str, str]] = (
                 self.update_feat_ids_for_map()
@@ -197,7 +193,7 @@ def perform_randomization_rows(
         for i in range(len(permutations)):
             in__dict: dict[int, np.ndarray] = {}
             last_key = i - 1 if i > 0 else 0
-            offset = nCells[last_key] + offset if i > 0 else 0
+            offset = nCells[last_key] + offset if i > 0 else 0  # noqa: F821
             for j, arr in enumerate(permutations[i]):
                 in__dict[j] = arr + offset
             permutations_rows_offset[i] = in__dict
@@ -580,7 +576,9 @@ def _ini_cell_data(self, overwrite) -> None:
                 f"cellData already exists so skipping _ini_cell_data"  # noqa: F541
             )
 
-    def _dask_to_coo(self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int) -> coo_matrix:
+    def _dask_to_coo(
+        self, d_arr, order: np.ndarray, order_map: np.ndarray, n_threads: int
+    ) -> coo_matrix:
         """
         Convert a Dask array to a sparse COO matrix.
         Args:
@@ -780,7 +778,9 @@ def generate_dummy_assay(self, ds: DataStore, assay_name: str) -> DummyAssay:
 
         # Create a dummy assay with zero counts and matching features
         dummy_shape = (ds.cells.N, reference_assay.feats.N)
-        dummy_counts = zarr.zeros(dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype)
+        dummy_counts = zarr.zeros(
+            dummy_shape, chunks=chunkShape, dtype=reference_assay.rawData.dtype
+        )
         dummy_counts = from_array(dummy_counts, chunks=chunkShape)
         dummy_assay = DummyAssay(
             ds, dummy_counts, reference_assay.feats, reference_assay.name