Merge pull request #1198 from mindsdb/staging

Release candidate
mindsdb · Oct 26, 2023 · a73a547 · a73a547
2 parents 9717e9e + be24227
commit a73a547
Show file tree

Hide file tree

Showing 17 changed files with 282 additions and 82 deletions.
diff --git a/.github/workflows/add_to_bugs_project.yml b/.github/workflows/add_to_bugs_project.yml
diff --git a/.github/workflows/add_to_pr_review.yml b/.github/workflows/add_to_pr_review.yml
@@ -0,0 +1,16 @@
+name: Add Pull Requests to PR review project
+
+on:
+  pull_request:
+    types:
+      - opened
+
+jobs:
+  add-to-project:
+    name: Add issue to project
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/[email protected]
+        with:
+          project-url: https://github.com/orgs/mindsdb/projects/65
+          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
diff --git a/.github/workflows/add_to_roadmap_project.yml b/.github/workflows/add_to_roadmap_project.yml
@@ -1,19 +1,16 @@
 name: Add issue to roadmap project
-
 on:
   issues:
     types:
       - opened
-
 jobs:
   add-to-project:
     name: Add issue to roadmap project
     runs-on: ubuntu-latest
     steps:
       - uses: actions/[email protected]
         with:
-          # You can target a repository in a different organization
-          # to the issue
-          project-url: https://github.com/orgs/mindsdb/projects/54
+          project-url: https://github.com/orgs/mindsdb/projects/53
           github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
-          labeled: enhancement
+          labeled: bug, enhancement
+          label-operator: OR
diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml
@@ -24,7 +24,7 @@ jobs:
       run: |
         sudo apt install pandoc
         python -m pip install --upgrade pip
-        pip install install 'Sphinx==4.1.2' 'sphinx-autoapi==1.8.4' 'sphinx-autodoc-typehints==1.12.0' 'sphinx-code-include==1.1.1' 'sphinx-rtd-theme==0.5.2' 'sphinxcontrib-applehelp==1.0.2' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.0' 'sphinxcontrib-jsmath==1.0.1' 'sphinxcontrib-napoleon==0.7' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec
+        pip install install 'Sphinx==6.2.1' 'sphinx-autoapi==3.0.0' 'sphinx-autodoc-typehints' 'sphinx-code-include' 'sphinx-rtd-theme' 'sphinxcontrib-applehelp' 'sphinxcontrib-devhelp' 'sphinxcontrib-htmlhelp' 'sphinxcontrib-jsmath' 'sphinxcontrib-napoleon' 'sphinxcontrib-qthelp' 'sphinxcontrib-serializinghtml' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec
         pip install --no-cache-dir -e .
     - name: Re-run notebooks
       run: |

diff --git a/docssrc/Makefile b/docssrc/Makefile
@@ -26,4 +26,4 @@ github:
 	@cp -a build/html/. ../docs
 	@rm -r build
 	@touch ../docs/.nojekyll
-	@echo lightwood.io > ../docs/CNAME
+	@echo https://mindsdb.github.io/lightwood > ../docs/CNAME
diff --git a/lightwood/__about__.py b/lightwood/__about__.py
@@ -1,6 +1,6 @@
 __title__ = 'lightwood'
 __package_name__ = 'lightwood'
-__version__ = '23.8.1.0'
+__version__ = '23.11.1.0'
 __description__ = "Lightwood is a toolkit for automatic machine learning model building"
 __email__ = "[email protected]"
 __author__ = 'MindsDB Inc'

diff --git a/lightwood/api/json_ai.py b/lightwood/api/json_ai.py
@@ -203,6 +203,8 @@ def generate_json_ai(
             ]
         )
     else:
+
+        # add neural model
         if not tss.is_timeseries:
             submodels.extend(
                 [
@@ -226,10 +228,11 @@ def generate_json_ai(
                             "stop_after": "$problem_definition.seconds_per_mixer",
                             "search_hyperparameters": True,
                         },
-                    }
+                    },
                 ]
             )
 
+        # add other models
         if (not tss.is_timeseries or tss.horizon == 1) and dtype_dict[target] not in (dtype.num_array, dtype.cat_array):
             submodels.extend(
                 [
@@ -255,34 +258,15 @@ def generate_json_ai(
                     },
                 ]
             )
-        elif tss.is_timeseries and tss.horizon > 1 and tss.use_previous_target and \
-                dtype_dict[target] in (dtype.integer, dtype.float, dtype.quantity):
 
-            submodels.extend(
-                [
-                    {
-                        "module": "SkTime",
-                        "args": {
-                            "stop_after": "$problem_definition.seconds_per_mixer",
-                            "horizon": "$problem_definition.timeseries_settings.horizon",
-                        },
-                    },
-                    {
-                        "module": "ETSMixer",
-                        "args": {
-                            "stop_after": "$problem_definition.seconds_per_mixer",
-                            "horizon": "$problem_definition.timeseries_settings.horizon",
-                        },
-                    },
-                    {
-                        "module": "ARIMAMixer",
-                        "args": {
-                            "stop_after": "$problem_definition.seconds_per_mixer",
-                            "horizon": "$problem_definition.timeseries_settings.horizon",
-                        },
-                    }
-                ]
-            )
+        # special forecasting dispatch
+        elif tss.is_timeseries:
+            submodels.extend([
+                {
+                    "module": "XGBoostArrayMixer",
+                    "args": {},
+                },
+            ])
 
     model = {
         "module": "BestOf",
@@ -571,7 +555,7 @@ def add_implicit_values(json_ai: JsonAI) -> JsonAI:
                 "target_encoder", "$encoders[self.target]"
             )
 
-        elif mixers[i]["module"] == "LightGBMArray":
+        elif mixers[i]["module"] in ("LightGBMArray", "XGBoostArrayMixer"):
             mixers[i]["args"]["input_cols"] = mixers[i]["args"].get(
                 "input_cols", "$input_cols"
             )

diff --git a/lightwood/api/types.py b/lightwood/api/types.py
@@ -313,7 +313,7 @@ class JsonAI:
     analysis_blocks: Optional[List[Module]] = None
     timeseries_transformer: Optional[Module] = None
     timeseries_analyzer: Optional[Module] = None
-    accuracy_functions: Optional[List[str]] = None
+    accuracy_functions: Optional[List[Union[str, Module]]] = None
 
     @staticmethod
     def from_dict(obj: Dict):

diff --git a/lightwood/helpers/device.py b/lightwood/helpers/device.py
@@ -5,6 +5,24 @@
 
 
 def is_cuda_compatible():
+    """
+    Check if the system has CUDA-compatible devices with the required architecture and
+    compiled CUDA version.
+
+    This function checks the compatibility of CUDA devices available on the system by
+    comparing their architectures and the compiled CUDA version. It iterates through
+    the available devices and verifies if their architectures meet the minimum
+    requirement specified by the function, and also checks if the compiled CUDA
+    version is greater than a specific version.
+
+    Returns:
+        bool: True if there are compatible CUDA devices, otherwise False.
+
+    Example:
+        >>> is_compatible = is_cuda_compatible()
+        >>> print(is_compatible)
+        True
+    """
     compatible_device_count = 0
     if torch.version.cuda is not None:
         for d in range(device_count()):
@@ -23,6 +41,27 @@ def is_cuda_compatible():
 
 
 def get_devices():
+    """
+    Get the appropriate Torch device(s) based on CUDA availability and compatibility.
+
+    This function determines the appropriate Torch device(s) to be used for
+    computations based on the availability of CUDA and compatible devices. It checks
+    if CUDA is available and if the available CUDA devices are compatible according to
+    the 'is_cuda_compatible()' function. If compatible devices are found, the function
+    selects either the first available CUDA device or a randomly selected one based on
+    the 'RANDOM_GPU' environment variable. If CUDA is not available or no compatible
+    devices are found, the function returns the CPU device.
+
+    Returns:
+        Tuple: A tuple containing the selected Torch device and the number of available
+        devices.
+    Example:
+        >>> device, num_devices = get_devices()
+        >>> print(device)
+        cuda:0
+        >>> print(num_devices)
+        1
+    """
     if torch.cuda.is_available() and is_cuda_compatible():
         device_str = "cuda"
         available_devices = torch.cuda.device_count()
@@ -40,10 +79,22 @@ def get_devices():
 
 def get_device_from_name(device_name=''):
     """
-    Returns the device specified as an argument.
-    If the argument is left empty it will returns the output of get_devices().
-    
-    :param device_name: name of the device to use (default is an empty string), if is an empty string will use the output of get_devices() instead")
+    Get a Torch device based on the specified device name or default behavior.
+
+    This function returns a Torch device based on the specified device name or the
+    default behavior, which is to return the output of the 'get_devices()' function.
+
+    Args:
+        device_name (str, optional): Name of the device to use. Default is an empty
+        string.
+        
+    Returns:
+        torch.device: The selected Torch device.
+
+    Example:
+        >>> device = get_device_from_name('cuda:1')
+        >>> print(device)
+        cuda:1
     """ # noqa E501
     if(device_name != ''):
         device = torch.device(device_name)

diff --git a/lightwood/helpers/torch.py b/lightwood/helpers/torch.py
@@ -5,6 +5,32 @@
 
 
 def concat_vectors_and_pad(vec_list, max_):
+    """
+    Concatenates a list of input vectors and pads them to match a specified maximum
+    length.
+
+    This function takes a list of input vectors, concatenates them along a specified
+    dimension (dim=0), and then pads the concatenated vector to achieve a specified
+    maximum length. The padding is done with zeros.
+
+    Args:
+        vec_list (list of torch.Tensor): List of input vectors to concatenate and pad.
+        max_ (int): The maximum length of the concatenated and padded vector.
+
+    Returns:
+        torch.Tensor: The concatenated and padded vector.
+
+    Raises:
+        AssertionError: If the length of 'vec_list' is not greater than 0, or if it
+                        exceeds 'max_len', or if 'max_len' is not greater than 0.
+
+    Example:
+        >>> input_tensors = [torch.tensor([1, 2]), torch.tensor([3, 4, 5])]
+        >>> max_length = 5
+        >>> concatenated_padded = concat_vectors_and_pad(input_tensors, max_length)
+        >>> print(concatenated_padded)
+        tensor([1, 2, 3, 4, 5])
+    """
     assert len(vec_list) > 0
     assert len(vec_list) <= max_
     assert max_ > 0
@@ -27,10 +53,29 @@ class LightwoodAutocast:
     """
     Equivalent to torch.cuda.amp.autocast, but checks device compute capability
     to activate the feature only when the GPU has tensor cores to leverage AMP.
+
+    **Attributes:**
+
+    * `active` (bool): Whether AMP is currently active. This attribute is at the class
+    level
+
+    **Usage:**
+
+    ```python
+    >>> import lightwood.helpers.torch as lt
+    >>> with lt.LightwoodAutocast():
+    ...     # This code will be executed in AMP mode.
+    ...     pass
     """
     active = False
 
     def __init__(self, enabled=True):
+        """
+        Initializes the context manager for Automatic Mixed Precision (AMP) functionality.
+
+        Args:
+            enabled (bool, optional): Whether to enable AMP. Defaults to True.
+        """
         self.major = 0  # GPU major version
         torch_version = [int(i) for i in torch.__version__.split('.')[:-1]]
 
@@ -50,12 +95,18 @@ def __init__(self, enabled=True):
         LightwoodAutocast.active = self._enabled
 
     def __enter__(self):
+        """
+        * `__enter__()`: Enters the context manager and enables AMP if it is not already enabled.
+        """
         if self._enabled:
             self.prev = torch.is_autocast_enabled()
             torch.set_autocast_enabled(self._enabled)
             torch.autocast_increment_nesting()
 
     def __exit__(self, *args):
+        """
+        * `__exit__()`: Exits the context manager and disables AMP.
+        """
         if self._enabled:
             # Drop the cache when we exit to a nesting level that's outside any instance of autocast
             if torch.autocast_decrement_nesting() == 0:
@@ -64,6 +115,9 @@ def __exit__(self, *args):
         return False
 
     def __call__(self, func):
+        """
+        * `__call__(self, func)`: Returns a decorated function that enables AMP when it is called.
+        """
         @functools.wraps(func)
         def decorate_autocast(*args, **kwargs):
             with self:

diff --git a/lightwood/mixer/__init__.py b/lightwood/mixer/__init__.py
@@ -3,6 +3,7 @@
 from lightwood.mixer.neural import Neural
 from lightwood.mixer.neural_ts import NeuralTs
 from lightwood.mixer.xgboost import XGBoostMixer
+from lightwood.mixer.xgboost_array import XGBoostArrayMixer
 from lightwood.mixer.random_forest import RandomForest
 from lightwood.mixer.sktime import SkTime
 from lightwood.mixer.arima import ARIMAMixer
@@ -43,4 +44,4 @@
 
 __all__ = ['BaseMixer', 'Neural', 'NeuralTs', 'LightGBM', 'RandomForest', 'LightGBMArray', 'Unit', 'Regression',
            'SkTime', 'QClassic', 'ProphetMixer', 'ETSMixer', 'ARIMAMixer', 'NHitsMixer', 'GluonTSMixer', 'XGBoostMixer',
-           'TabTransformerMixer']
+           'TabTransformerMixer', 'XGBoostArrayMixer']