Skip to content

Commit

Permalink
Implement KedroDataCatalog CLI tests (#4456)
Browse files Browse the repository at this point in the history
* Draft for TestCatalogFactoryCommands

Signed-off-by: Elena Khaustova <[email protected]>

* Added template for TestCatalogListCommand

Signed-off-by: Elena Khaustova <[email protected]>

* Updated list command to print _LazyDataset actual type

Signed-off-by: Elena Khaustova <[email protected]>

* Test fixture parametrization

Signed-off-by: Elena Khaustova <[email protected]>

* Implemented tests for TestCatalogCreateCommand

Signed-off-by: Elena Khaustova <[email protected]>

* Added rank factories tests

Signed-off-by: Elena Khaustova <[email protected]>

* Updated test_catalog.py to use fixtures

Signed-off-by: Elena Khaustova <[email protected]>

* Removed test_kedro_data_catalog.py

Signed-off-by: Elena Khaustova <[email protected]>

* Duplicated fake project cli

Signed-off-by: Elena Khaustova <[email protected]>

* Removed duplicasted fixture

Signed-off-by: Elena Khaustova <[email protected]>

* Added a clarification about setting fixture params

Signed-off-by: Elena Khaustova <[email protected]>

* Returned fake_project_cli_parametrized

Signed-off-by: Elena Khaustova <[email protected]>

* Returned trial fail loop

Signed-off-by: Elena Khaustova <[email protected]>

---------

Signed-off-by: Elena Khaustova <[email protected]>
  • Loading branch information
ElenaKhaustova authored Feb 7, 2025
1 parent a064c46 commit a47677d
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 38 deletions.
7 changes: 5 additions & 2 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from kedro.framework.project import pipelines, settings
from kedro.framework.session import KedroSession
from kedro.io.data_catalog import DataCatalog
from kedro.io.kedro_data_catalog import _LazyDataset

if TYPE_CHECKING:
from pathlib import Path
Expand Down Expand Up @@ -64,7 +65,6 @@ def list_datasets(metadata: ProjectMetadata, pipeline: str, env: str) -> None:

session = _create_session(metadata.package_name, env=env)
context = session.load_context()

try:
data_catalog = context.catalog
datasets_meta = data_catalog._datasets
Expand Down Expand Up @@ -126,7 +126,10 @@ def _map_type_to_datasets(
"""
mapping = defaultdict(list) # type: ignore[var-annotated]
for dataset_name in filterfalse(is_parameter, datasets):
ds_type = datasets_meta[dataset_name].__class__.__name__
if isinstance(datasets_meta[dataset_name], _LazyDataset):
ds_type = str(datasets_meta[dataset_name]).split(".")[-1]
else:
ds_type = datasets_meta[dataset_name].__class__.__name__
if dataset_name not in mapping[ds_type]:
mapping[ds_type].append(dataset_name)
return mapping
Expand Down
66 changes: 64 additions & 2 deletions tests/framework/cli/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from kedro.framework.cli.starters import create_cli
from kedro.framework.project import configure_project, pipelines, settings
from kedro.framework.startup import ProjectMetadata
from kedro.io import KedroDataCatalog

REPO_NAME = "dummy_project"
PACKAGE_NAME = "dummy_package"
Expand Down Expand Up @@ -116,7 +117,9 @@ def fake_kedro_cli():

@fixture(scope="module")
def fake_project_cli(
fake_repo_path: Path, dummy_config: Path, fake_kedro_cli: click.CommandCollection
fake_repo_path: Path,
dummy_config: Path,
fake_kedro_cli: click.CommandCollection,
):
old_settings = settings.as_dict()
starter_path = Path(__file__).resolve().parents[3]
Expand All @@ -126,7 +129,64 @@ def fake_project_cli(
)
# Delete the project logging.yml, which leaves behind info.log and error.log files.
# This leaves logging config as the framework default.
(fake_repo_path / "conf" / "logging.yml").unlink()
try:
(fake_repo_path / "conf" / "logging.yml").unlink()
except FileNotFoundError:
pass

# NOTE: Here we load a couple of modules, as they would be imported in
# the code and tests.
# It's safe to remove the new entries from path due to the python
# module caching mechanism. Any `reload` on it will not work though.
old_path = sys.path.copy()
sys.path = [str(fake_repo_path / "src"), *sys.path]

import_module(PACKAGE_NAME)
configure_project(PACKAGE_NAME)
yield fake_kedro_cli

# reset side-effects of configure_project
pipelines.configure()

for key, value in old_settings.items():
settings.set(key, value)
sys.path = old_path

# configure_project does imports that add PACKAGE_NAME.pipelines,
# PACKAGE_NAME.settings to sys.modules. These need to be removed.
# Ideally we would reset sys.modules to exactly what it was before
# running anything, but removal of distutils.build.commands from
# sys.modules mysteriously makes some tests for `kedro micropkg package`
# fail on Windows, Python 3.7 and 3.8.
for module in list(sys.modules.keys()):
if module.startswith(PACKAGE_NAME):
del sys.modules[module]


# We use `None` as the first parameter since the default `DATA_CATALOG_CLASS`
# set in `settings.py` is `DataCatalog`. We do not set `DataCatalog` explicitly
# in fixture params to test loading the default class.
@fixture(scope="module", params=[None, KedroDataCatalog])
def fake_project_cli_parametrized(
fake_repo_path: Path,
dummy_config: Path,
fake_kedro_cli: click.CommandCollection,
request,
):
# TODO: remove parametrization after removing old catalog as KedroDataCatalog will be default
default_catalog = request.param
old_settings = settings.as_dict()
starter_path = Path(__file__).resolve().parents[3]
starter_path = starter_path / "features" / "steps" / "test_starter"
CliRunner().invoke(
fake_kedro_cli, ["new", "-c", str(dummy_config), "--starter", str(starter_path)]
)
# Delete the project logging.yml, which leaves behind info.log and error.log files.
# This leaves logging config as the framework default.
try:
(fake_repo_path / "conf" / "logging.yml").unlink()
except FileNotFoundError:
pass

# NOTE: Here we load a couple of modules, as they would be imported in
# the code and tests.
Expand All @@ -137,6 +197,8 @@ def fake_project_cli(

import_module(PACKAGE_NAME)
configure_project(PACKAGE_NAME)
if default_catalog is not None:
settings.set("DATA_CATALOG_CLASS", default_catalog)
yield fake_kedro_cli

# reset side-effects of configure_project
Expand Down
2 changes: 1 addition & 1 deletion tests/framework/cli/pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_catalog_and_params(

with KedroSession.create() as session:
ctx = session.load_context()
assert isinstance(ctx.catalog._datasets["ds_from_pipeline"], CSVDataset)
assert isinstance(ctx.catalog._get_dataset("ds_from_pipeline"), CSVDataset)
assert isinstance(ctx.catalog.load("ds_from_pipeline"), DataFrame)
assert ctx.params["params_from_pipeline"] == params_dict["params_from_pipeline"]

Expand Down
Loading

0 comments on commit a47677d

Please sign in to comment.