Merge branch 'branch-25.04' into bug-mse_objective_nan

rapidsai · Mar 5, 2025 · a75c084 · a75c084
2 parents 8a2e116 + 6ff0ef7
commit a75c084
Show file tree

Hide file tree

Showing 16 changed files with 267 additions and 130 deletions.
diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -0,0 +1,81 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- certifi
+- cmake>=3.30.4
+- cuda-python>=11.8.5,<12.0a0
+- cuda-version=11.8
+- cudatoolkit
+- cudf==25.4.*,>=0.0.0a0
+- cupy>=12.0.0
+- cuvs==25.4.*,>=0.0.0a0
+- cxx-compiler
+- cython>=3.0.0
+- dask-cuda==25.4.*,>=0.0.0a0
+- dask-cudf==25.4.*,>=0.0.0a0
+- dask-ml
+- doxygen=1.9.1
+- gcc_linux-64=11.*
+- graphviz
+- hdbscan>=0.8.39,<0.8.40
+- hypothesis>=6.0,<7
+- ipykernel
+- ipython
+- joblib>=0.11
+- libcublas-dev=11.11.3.6
+- libcublas=11.11.3.6
+- libcufft-dev=10.9.0.58
+- libcufft=10.9.0.58
+- libcumlprims==25.4.*,>=0.0.0a0
+- libcurand-dev=10.3.0.86
+- libcurand=10.3.0.86
+- libcusolver-dev=11.4.1.48
+- libcusolver=11.4.1.48
+- libcusparse-dev=11.7.5.86
+- libcusparse=11.7.5.86
+- libcuvs==25.4.*,>=0.0.0a0
+- libraft==25.4.*,>=0.0.0a0
+- librmm==25.4.*,>=0.0.0a0
+- nbsphinx
+- ninja
+- nltk
+- numba>=0.59.1,<0.61.0a0
+- numpy>=1.23,<3.0a0
+- numpydoc
+- nvcc_linux-aarch64=11.8
+- packaging
+- pydata-sphinx-theme!=0.14.2
+- pylibraft==25.4.*,>=0.0.0a0
+- pynndescent
+- pytest-benchmark
+- pytest-cases
+- pytest-cov
+- pytest-xdist
+- pytest==7.*
+- python>=3.10,<3.13
+- raft-dask==25.4.*,>=0.0.0a0
+- rapids-build-backend>=0.3.0,<0.4.0.dev0
+- rapids-dask-dependency==25.4.*,>=0.0.0a0
+- rapids-logger==0.1.*,>=0.0.0a0
+- recommonmark
+- rmm==25.4.*,>=0.0.0a0
+- scikit-build-core>=0.10.0
+- scikit-learn==1.5.*
+- scipy>=1.8.0
+- seaborn
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx<8.2.0
+- statsmodels
+- sysroot_linux-aarch64==2.28
+- treelite==4.4.1
+- umap-learn==0.5.6
+- xgboost>=2.1.0
+name: all_cuda-118_arch-aarch64
diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml
@@ -0,0 +1,77 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- certifi
+- cmake>=3.30.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-profiler-api
+- cuda-python>=12.6.2,<13.0a0
+- cuda-version=12.8
+- cudf==25.4.*,>=0.0.0a0
+- cupy>=12.0.0
+- cuvs==25.4.*,>=0.0.0a0
+- cxx-compiler
+- cython>=3.0.0
+- dask-cuda==25.4.*,>=0.0.0a0
+- dask-cudf==25.4.*,>=0.0.0a0
+- dask-ml
+- doxygen=1.9.1
+- gcc_linux-aarch64=13.*
+- graphviz
+- hdbscan>=0.8.39,<0.8.40
+- hypothesis>=6.0,<7
+- ipykernel
+- ipython
+- joblib>=0.11
+- libcublas-dev
+- libcufft-dev
+- libcumlprims==25.4.*,>=0.0.0a0
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- libcuvs==25.4.*,>=0.0.0a0
+- libraft==25.4.*,>=0.0.0a0
+- librmm==25.4.*,>=0.0.0a0
+- nbsphinx
+- ninja
+- nltk
+- numba>=0.59.1,<0.61.0a0
+- numpy>=1.23,<3.0a0
+- numpydoc
+- packaging
+- pydata-sphinx-theme!=0.14.2
+- pylibraft==25.4.*,>=0.0.0a0
+- pynndescent
+- pytest-benchmark
+- pytest-cases
+- pytest-cov
+- pytest-xdist
+- pytest==7.*
+- python>=3.10,<3.13
+- raft-dask==25.4.*,>=0.0.0a0
+- rapids-build-backend>=0.3.0,<0.4.0.dev0
+- rapids-dask-dependency==25.4.*,>=0.0.0a0
+- rapids-logger==0.1.*,>=0.0.0a0
+- recommonmark
+- rmm==25.4.*,>=0.0.0a0
+- scikit-build-core>=0.10.0
+- scikit-learn==1.5.*
+- scipy>=1.8.0
+- seaborn
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx<8.2.0
+- statsmodels
+- sysroot_linux-aarch64==2.28
+- treelite==4.4.1
+- umap-learn==0.5.6
+- xgboost>=2.1.0
+name: all_cuda-128_arch-aarch64
diff --git a/cpp/src/umap/umap.cuh b/cpp/src/umap/umap.cuh
@@ -34,7 +34,10 @@ static const int TPB_X = 256;
 
 inline bool dispatch_to_uint64_t(int n_rows, int n_neighbors, int n_components)
 {
-  uint64_t nnz1 = static_cast<uint64_t>(n_rows) * n_neighbors;
+  // The fuzzy simplicial set graph can have at most 2 * n * n_neighbors elements after
+  // symmetrization and removal of zeroes
+  uint64_t nnz1 = 2 * static_cast<uint64_t>(n_rows) * n_neighbors;
+  // The embeddings have n * n_neighbors elements
   uint64_t nnz2 = static_cast<uint64_t>(n_rows) * n_components;
   return nnz1 > std::numeric_limits<int32_t>::max() || nnz2 > std::numeric_limits<int32_t>::max();
 }

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -4,7 +4,7 @@ files:
     output: conda
     matrix:
       cuda: ["11.8", "12.8"]
-      arch: [x86_64]
+      arch: [x86_64, aarch64]
     includes:
       - common_build
       - cuda

diff --git a/python/cuml/cuml/cluster/dbscan.pyx b/python/cuml/cuml/cluster/dbscan.pyx
@@ -320,8 +320,8 @@ class DBSCAN(UniversalBase,
 
             # metric
             metric_parsing = {
-                "L2": DistanceType.L2SqrtUnexpanded,
-                "euclidean": DistanceType.L2SqrtUnexpanded,
+                "L2": DistanceType.L2SqrtExpanded,
+                "euclidean": DistanceType.L2SqrtExpanded,
                 "cosine": DistanceType.CosineExpanded,
                 "precomputed": DistanceType.Precomputed
             }

diff --git a/python/cuml/cuml/common/array_descriptor.py b/python/cuml/cuml/common/array_descriptor.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -66,7 +66,6 @@ def __init__(self, order="K"):
 
     def __set_name__(self, owner, name):
         self.name = name
-        setattr(owner, name + "_order", self.order)
 
     def _get_meta(
         self, instance, throw_on_missing=False

diff --git a/python/cuml/cuml/common/numba_utils.py b/python/cuml/cuml/common/numba_utils.py
diff --git a/python/cuml/cuml/internals/base.pyx b/python/cuml/cuml/internals/base.pyx
@@ -39,6 +39,7 @@ except ImportError:
 import cuml
 import cuml.common
 from cuml.common.sparse_utils import is_sparse
+from cuml.common.array_descriptor import CumlArrayDescriptor
 import cuml.internals.logger as logger
 import cuml.internals
 from cuml.internals import api_context_managers
@@ -664,55 +665,40 @@ class UniversalBase(Base):
         self._cpu_model = self._cpu_model_class(**filtered_kwargs)
 
     def gpu_to_cpu(self):
-        # transfer attributes from GPU to CPU estimator
-        for attr in self.get_attr_names():
-            if hasattr(self, attr):
-                cu_attr = getattr(self, attr)
-                if isinstance(cu_attr, CumlArray):
-                    # transform cumlArray to numpy and set it
-                    # as an attribute in the CPU estimator
-                    setattr(self._cpu_model, attr, cu_attr.to_output('numpy'))
-                elif isinstance(cu_attr, cp_ndarray):
-                    # transform cupy to numpy and set it
-                    # as an attribute in the CPU estimator
-                    setattr(self._cpu_model, attr, cp.asnumpy(cu_attr))
-                else:
-                    # transfer all other types of attributes directly
-                    setattr(self._cpu_model, attr, cu_attr)
+        """Transfer attributes from GPU estimator to CPU estimator."""
+        for name in self.get_attr_names():
+            try:
+                value = getattr(self, name)
+            except AttributeError:
+                # Skip missing attributes
+                continue
+
+            # Coerce all arrays to numpy
+            if isinstance(value, CumlArray):
+                value = value.to_output("numpy")
+            elif isinstance(value, cp_ndarray):
+                value = cp.asnumpy(value)
+
+            setattr(self._cpu_model, name, value)
 
     def cpu_to_gpu(self):
-        # transfer attributes from CPU to GPU estimator
-        with using_memory_type(
-            (MemoryType.host, MemoryType.device)[
-                is_cuda_available()
-            ]
-        ):
-            for attr in self.get_attr_names():
-                if hasattr(self._cpu_model, attr):
-                    cpu_attr = getattr(self._cpu_model, attr)
-                    # if the cpu attribute is an array
-                    if isinstance(cpu_attr, np.ndarray):
-                        # get data order wished for by
-                        # CumlArrayDescriptor
-                        if hasattr(self, attr + '_order'):
-                            order = getattr(self, attr + '_order')
-                        else:
-                            order = 'K'
-                        # transfer array to gpu and set it as a cuml
-                        # attribute
-                        cuml_array = input_to_cuml_array(
-                            cpu_attr,
-                            order=order,
-                            convert_to_mem_type=(
-                                MemoryType.host,
-                                MemoryType.device
-                            )[is_cuda_available()]
-                        )[0]
-                        setattr(self, attr, cuml_array)
-                    else:
-                        # transfer all other types of attributes
-                        # directly
-                        setattr(self, attr, cpu_attr)
+        """Transfer attributes from CPU estimator to GPU estimator."""
+        mem_type = MemoryType.device if is_cuda_available() else MemoryType.host
+        with using_memory_type(mem_type):
+            for name in self.get_attr_names():
+                try:
+                    value = getattr(self._cpu_model, name)
+                except AttributeError:
+                    # Skip missing attributes
+                    continue
+
+                if isinstance(value, np.ndarray):
+                    # Coerce arrays to CumlArrays with the proper order
+                    descriptor = getattr(type(self), name, None)
+                    order = descriptor.order if isinstance(descriptor, CumlArrayDescriptor) else "K"
+                    value = input_to_cuml_array(value, order=order, convert_to_mem_type=mem_type)[0]
+
+                setattr(self, name, value)
 
     def args_to_cpu(self, *args, **kwargs):
         # put all the args on host

diff --git a/python/cuml/cuml/internals/logger.pyx b/python/cuml/cuml/internals/logger.pyx
@@ -94,6 +94,14 @@ def set_level(level):
         return context_object
 
 
+def get_level() -> level_enum:
+    """
+    Get the current logging level.
+    """
+    IF GPUBUILD == 1:
+        return default_logger().level()
+
+
 cdef class PatternSetter:
     """Internal "context manager" object for restoring previous log pattern"""