From ab9d20b2c912b7ff79f6ce60dca217a71edae133 Mon Sep 17 00:00:00 2001 From: thomas Date: Thu, 15 Feb 2024 18:58:48 +0000 Subject: [PATCH] rename import path --- src/lightning/data/__init__.py | 8 ++++---- .../data/processing/data_processor.py | 16 ++++++++-------- src/lightning/data/processing/functions.py | 10 +++++----- src/lightning/data/processing/utilities.py | 4 ++-- src/lightning/data/streaming/__init__.py | 10 +++++----- src/lightning/data/streaming/cache.py | 18 +++++++++--------- src/lightning/data/streaming/client.py | 2 +- src/lightning/data/streaming/combined.py | 4 ++-- src/lightning/data/streaming/config.py | 10 +++++----- src/lightning/data/streaming/dataloader.py | 18 +++++++++--------- src/lightning/data/streaming/dataset.py | 16 ++++++++-------- src/lightning/data/streaming/downloader.py | 4 ++-- src/lightning/data/streaming/item_loader.py | 4 ++-- src/lightning/data/streaming/reader.py | 12 ++++++------ src/lightning/data/streaming/serializers.py | 2 +- src/lightning/data/streaming/shuffle.py | 6 +++--- src/lightning/data/streaming/writer.py | 12 ++++++------ src/lightning/data/utilities/shuffle.py | 2 +- .../processing/test_data_processor.py | 16 ++++++++-------- tests/tests_data/processing/test_functions.py | 4 ++-- tests/tests_data/processing/test_readers.py | 4 ++-- tests/tests_data/processing/test_utilities.py | 6 +++--- tests/tests_data/streaming/test_cache.py | 12 ++++++------ tests/tests_data/streaming/test_client.py | 2 +- tests/tests_data/streaming/test_combined.py | 8 ++++---- tests/tests_data/streaming/test_dataloader.py | 4 ++-- tests/tests_data/streaming/test_dataset.py | 18 +++++++++--------- tests/tests_data/streaming/test_downloader.py | 2 +- tests/tests_data/streaming/test_reader.py | 14 +++++++------- tests/tests_data/streaming/test_resolver.py | 2 +- tests/tests_data/streaming/test_sampler.py | 2 +- tests/tests_data/streaming/test_serializer.py | 2 +- tests/tests_data/streaming/test_writer.py | 10 +++++----- tests/tests_data/utilities/test_broadcast.py | 2 +- tests/tests_data/utilities/test_format.py | 2 +- tests/tests_data/utilities/test_packing.py | 2 +- tests/tests_data/utilities/test_shuffle.py | 4 ++-- 37 files changed, 137 insertions(+), 137 deletions(-) diff --git a/src/lightning/data/__init__.py b/src/lightning/data/__init__.py index 0b4816a1..b48fe616 100644 --- a/src/lightning/data/__init__.py +++ b/src/lightning/data/__init__.py @@ -1,9 +1,9 @@ from lightning_utilities.core.imports import RequirementCache -from lightning.data.processing.functions import map, optimize, walk -from lightning.data.streaming.combined import CombinedStreamingDataset -from lightning.data.streaming.dataloader import StreamingDataLoader -from lightning.data.streaming.dataset import StreamingDataset +from lit_data.processing.functions import map, optimize, walk +from lit_data.streaming.combined import CombinedStreamingDataset +from lit_data.streaming.dataloader import StreamingDataLoader +from lit_data.streaming.dataset import StreamingDataset __all__ = [ "LightningDataset", diff --git a/src/lightning/data/processing/data_processor.py b/src/lightning/data/processing/data_processor.py index 8b28b20e..33602799 100644 --- a/src/lightning/data/processing/data_processor.py +++ b/src/lightning/data/processing/data_processor.py @@ -20,7 +20,7 @@ from tqdm.auto import tqdm as _tqdm from lightning import seed_everything -from lightning.data.constants import ( +from lit_data.constants import ( _BOTO3_AVAILABLE, _DEFAULT_FAST_DEV_RUN_ITEMS, _INDEX_FILENAME, @@ -28,13 +28,13 @@ _LIGHTNING_CLOUD_LATEST, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning.data.processing.readers import BaseReader -from lightning.data.streaming import Cache -from lightning.data.streaming.cache import Dir -from lightning.data.streaming.client import S3Client -from lightning.data.streaming.resolver import _resolve_dir -from lightning.data.utilities.broadcast import broadcast_object -from lightning.data.utilities.packing import _pack_greedily +from lit_data.processing.readers import BaseReader +from lit_data.streaming import Cache +from lit_data.streaming.cache import Dir +from lit_data.streaming.client import S3Client +from lit_data.streaming.resolver import _resolve_dir +from lit_data.utilities.broadcast import broadcast_object +from lit_data.utilities.packing import _pack_greedily if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_flatten, tree_unflatten, treespec_loads diff --git a/src/lightning/data/processing/functions.py b/src/lightning/data/processing/functions.py index a5c3a095..83b632cf 100644 --- a/src/lightning/data/processing/functions.py +++ b/src/lightning/data/processing/functions.py @@ -22,11 +22,11 @@ import torch -from lightning.data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0 -from lightning.data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe -from lightning.data.processing.readers import BaseReader -from lightning.data.processing.utilities import optimize_dns_context -from lightning.data.streaming.resolver import ( +from lit_data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0 +from lit_data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe +from lit_data.processing.readers import BaseReader +from lit_data.processing.utilities import optimize_dns_context +from lit_data.streaming.resolver import ( Dir, _assert_dir_has_index_file, _assert_dir_is_empty, diff --git a/src/lightning/data/processing/utilities.py b/src/lightning/data/processing/utilities.py index 1793b727..051e25dd 100644 --- a/src/lightning/data/processing/utilities.py +++ b/src/lightning/data/processing/utilities.py @@ -5,7 +5,7 @@ from subprocess import Popen from typing import Any, Callable, Optional, Tuple -from lightning.data.constants import _IS_IN_STUDIO +from lit_data.constants import _IS_IN_STUDIO def get_worker_rank() -> Optional[str]: @@ -66,7 +66,7 @@ def optimize_dns(enable: bool) -> None: ): cmd = ( f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python" - f" -c 'from lightning.data.processing.utilities import _optimize_dns; _optimize_dns({enable})'" + f" -c 'from lit_data.processing.utilities import _optimize_dns; _optimize_dns({enable})'" ) Popen(cmd, shell=True).wait() # E501 diff --git a/src/lightning/data/streaming/__init__.py b/src/lightning/data/streaming/__init__.py index 2e6c49cf..527bd55d 100644 --- a/src/lightning/data/streaming/__init__.py +++ b/src/lightning/data/streaming/__init__.py @@ -11,11 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from lightning.data.streaming.cache import Cache -from lightning.data.streaming.combined import CombinedStreamingDataset -from lightning.data.streaming.dataloader import StreamingDataLoader -from lightning.data.streaming.dataset import StreamingDataset -from lightning.data.streaming.item_loader import TokensLoader +from lit_data.streaming.cache import Cache +from lit_data.streaming.combined import CombinedStreamingDataset +from lit_data.streaming.dataloader import StreamingDataLoader +from lit_data.streaming.dataset import StreamingDataset +from lit_data.streaming.item_loader import TokensLoader __all__ = [ "Cache", diff --git a/src/lightning/data/streaming/cache.py b/src/lightning/data/streaming/cache.py index 1fa8874a..dc31bf2b 100644 --- a/src/lightning/data/streaming/cache.py +++ b/src/lightning/data/streaming/cache.py @@ -15,19 +15,19 @@ import os from typing import Any, Dict, List, Optional, Tuple, Union -from lightning.data.constants import ( +from lit_data.constants import ( _INDEX_FILENAME, _LIGHTNING_CLOUD_LATEST, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning.data.streaming.item_loader import BaseItemLoader -from lightning.data.streaming.reader import BinaryReader -from lightning.data.streaming.resolver import Dir, _resolve_dir -from lightning.data.streaming.sampler import ChunkedIndex -from lightning.data.streaming.serializers import Serializer -from lightning.data.streaming.writer import BinaryWriter -from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv -from lightning.data.utilities.format import _convert_bytes_to_int +from lit_data.streaming.item_loader import BaseItemLoader +from lit_data.streaming.reader import BinaryReader +from lit_data.streaming.resolver import Dir, _resolve_dir +from lit_data.streaming.sampler import ChunkedIndex +from lit_data.streaming.serializers import Serializer +from lit_data.streaming.writer import BinaryWriter +from lit_data.utilities.env import _DistributedEnv, _WorkerEnv +from lit_data.utilities.format import _convert_bytes_to_int logger = logging.Logger(__name__) diff --git a/src/lightning/data/streaming/client.py b/src/lightning/data/streaming/client.py index b93eca4c..2c8fa116 100644 --- a/src/lightning/data/streaming/client.py +++ b/src/lightning/data/streaming/client.py @@ -2,7 +2,7 @@ from time import time from typing import Any, Optional -from lightning.data.constants import _BOTO3_AVAILABLE +from lit_data.constants import _BOTO3_AVAILABLE if _BOTO3_AVAILABLE: import boto3 diff --git a/src/lightning/data/streaming/combined.py b/src/lightning/data/streaming/combined.py index 6e6a0486..ce0cc503 100644 --- a/src/lightning/data/streaming/combined.py +++ b/src/lightning/data/streaming/combined.py @@ -16,8 +16,8 @@ from torch.utils.data import IterableDataset -from lightning.data.streaming.dataset import StreamingDataset -from lightning.data.utilities.env import _WorkerEnv +from lit_data.streaming.dataset import StreamingDataset +from lit_data.utilities.env import _WorkerEnv __NUM_SAMPLES_YIELDED_KEY__ = "__NUM_SAMPLES_YIELDED__" __SAMPLES_KEY__ = "__SAMPLES__" diff --git a/src/lightning/data/streaming/config.py b/src/lightning/data/streaming/config.py index 4a5a4ba8..c94edfc2 100644 --- a/src/lightning/data/streaming/config.py +++ b/src/lightning/data/streaming/config.py @@ -15,11 +15,11 @@ import os from typing import Any, Dict, List, Optional, Tuple -from lightning.data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 -from lightning.data.streaming.downloader import get_downloader_cls -from lightning.data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader -from lightning.data.streaming.sampler import ChunkedIndex -from lightning.data.streaming.serializers import Serializer +from lit_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 +from lit_data.streaming.downloader import get_downloader_cls +from lit_data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader +from lit_data.streaming.sampler import ChunkedIndex +from lit_data.streaming.serializers import Serializer if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_unflatten, treespec_loads diff --git a/src/lightning/data/streaming/dataloader.py b/src/lightning/data/streaming/dataloader.py index 72ed7556..09d7ce47 100644 --- a/src/lightning/data/streaming/dataloader.py +++ b/src/lightning/data/streaming/dataloader.py @@ -33,16 +33,16 @@ ) from torch.utils.data.sampler import BatchSampler, Sampler -from lightning.data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE -from lightning.data.streaming import Cache -from lightning.data.streaming.combined import ( +from lit_data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE +from lit_data.streaming import Cache +from lit_data.streaming.combined import ( __NUM_SAMPLES_YIELDED_KEY__, __SAMPLES_KEY__, CombinedStreamingDataset, ) -from lightning.data.streaming.dataset import StreamingDataset -from lightning.data.streaming.sampler import CacheBatchSampler -from lightning.data.utilities.env import _DistributedEnv +from lit_data.streaming.dataset import StreamingDataset +from lit_data.streaming.sampler import CacheBatchSampler +from lit_data.utilities.env import _DistributedEnv if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_flatten @@ -105,7 +105,7 @@ def __getitem__(self, index: int) -> Any: if not _equal_items(data_1, data2): raise ValueError( f"Your dataset items aren't deterministic. Found {data_1} and {data2} for index {index}." - " HINT: Use the `lightning.data.cache.Cache` directly within your dataset." + " HINT: Use the `lit_data.cache.Cache` directly within your dataset." ) self._is_deterministic = True self._cache[index] = data_1 @@ -180,7 +180,7 @@ def __call__( ) -> None: from torch.utils.data._utils import worker - from lightning.data.streaming.cache import Cache + from lit_data.streaming.cache import Cache enable_profiling = self._global_rank == 0 and worker_id == 0 and _VIZ_TRACKER_AVAILABLE and self._profile @@ -481,7 +481,7 @@ def _try_put_index(self) -> None: class StreamingDataLoader(DataLoader): r"""The StreamingDataLoader combines a dataset and a sampler, and provides an iterable over the given dataset. - The :class:`~lightning.data.streaming.dataloader.StreamingDataLoader` supports either a + The :class:`~lit_data.streaming.dataloader.StreamingDataLoader` supports either a StreamingDataset and CombinedStreamingDataset datasets with single- or multi-process loading, customizing loading order and optional automatic batching (collation) and memory pinning. diff --git a/src/lightning/data/streaming/dataset.py b/src/lightning/data/streaming/dataset.py index 61f619da..bfa200eb 100644 --- a/src/lightning/data/streaming/dataset.py +++ b/src/lightning/data/streaming/dataset.py @@ -20,17 +20,17 @@ import numpy as np from torch.utils.data import IterableDataset -from lightning.data.constants import ( +from lit_data.constants import ( _DEFAULT_CACHE_DIR, _INDEX_FILENAME, ) -from lightning.data.streaming import Cache -from lightning.data.streaming.item_loader import BaseItemLoader -from lightning.data.streaming.resolver import Dir, _resolve_dir -from lightning.data.streaming.sampler import ChunkedIndex -from lightning.data.streaming.serializers import Serializer -from lightning.data.streaming.shuffle import FullShuffle, NoShuffle, Shuffle -from lightning.data.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv +from lit_data.streaming import Cache +from lit_data.streaming.item_loader import BaseItemLoader +from lit_data.streaming.resolver import Dir, _resolve_dir +from lit_data.streaming.sampler import ChunkedIndex +from lit_data.streaming.serializers import Serializer +from lit_data.streaming.shuffle import FullShuffle, NoShuffle, Shuffle +from lit_data.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv logger = Logger(__name__) diff --git a/src/lightning/data/streaming/downloader.py b/src/lightning/data/streaming/downloader.py index c3e0d170..40429bac 100644 --- a/src/lightning/data/streaming/downloader.py +++ b/src/lightning/data/streaming/downloader.py @@ -19,8 +19,8 @@ from filelock import FileLock, Timeout -from lightning.data.constants import _INDEX_FILENAME -from lightning.data.streaming.client import S3Client +from lit_data.constants import _INDEX_FILENAME +from lit_data.streaming.client import S3Client class Downloader(ABC): diff --git a/src/lightning/data/streaming/item_loader.py b/src/lightning/data/streaming/item_loader.py index 2a1a02da..fd4ca090 100644 --- a/src/lightning/data/streaming/item_loader.py +++ b/src/lightning/data/streaming/item_loader.py @@ -19,11 +19,11 @@ import numpy as np import torch -from lightning.data.constants import ( +from lit_data.constants import ( _TORCH_DTYPES_MAPPING, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning.data.streaming.serializers import Serializer +from lit_data.streaming.serializers import Serializer if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import PyTree, tree_unflatten diff --git a/src/lightning/data/streaming/reader.py b/src/lightning/data/streaming/reader.py index 298452ea..848db7f7 100644 --- a/src/lightning/data/streaming/reader.py +++ b/src/lightning/data/streaming/reader.py @@ -20,12 +20,12 @@ from threading import Thread from typing import Any, Dict, List, Optional, Tuple, Union -from lightning.data.constants import _TORCH_GREATER_EQUAL_2_1_0 -from lightning.data.streaming.config import ChunksConfig -from lightning.data.streaming.item_loader import BaseItemLoader, PyTreeLoader -from lightning.data.streaming.sampler import ChunkedIndex -from lightning.data.streaming.serializers import Serializer, _get_serializers -from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv +from lit_data.constants import _TORCH_GREATER_EQUAL_2_1_0 +from lit_data.streaming.config import ChunksConfig +from lit_data.streaming.item_loader import BaseItemLoader, PyTreeLoader +from lit_data.streaming.sampler import ChunkedIndex +from lit_data.streaming.serializers import Serializer, _get_serializers +from lit_data.utilities.env import _DistributedEnv, _WorkerEnv warnings.filterwarnings("ignore", message=".*The given buffer is not writable.*") diff --git a/src/lightning/data/streaming/serializers.py b/src/lightning/data/streaming/serializers.py index 605c3bdb..d491f9a9 100644 --- a/src/lightning/data/streaming/serializers.py +++ b/src/lightning/data/streaming/serializers.py @@ -23,7 +23,7 @@ import torch from lightning_utilities.core.imports import RequirementCache -from lightning.data.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING +from lit_data.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING _PIL_AVAILABLE = RequirementCache("PIL") _TORCH_VISION_AVAILABLE = RequirementCache("torchvision") diff --git a/src/lightning/data/streaming/shuffle.py b/src/lightning/data/streaming/shuffle.py index 92cc05f6..04a051d8 100644 --- a/src/lightning/data/streaming/shuffle.py +++ b/src/lightning/data/streaming/shuffle.py @@ -17,9 +17,9 @@ import numpy as np -from lightning.data.streaming import Cache -from lightning.data.utilities.env import _DistributedEnv -from lightning.data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle +from lit_data.streaming import Cache +from lit_data.utilities.env import _DistributedEnv +from lit_data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle class Shuffle(ABC): diff --git a/src/lightning/data/streaming/writer.py b/src/lightning/data/streaming/writer.py index b7b50fd7..f0cb2075 100644 --- a/src/lightning/data/streaming/writer.py +++ b/src/lightning/data/streaming/writer.py @@ -21,12 +21,12 @@ import numpy as np import torch -from lightning.data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 -from lightning.data.processing.utilities import get_worker_rank -from lightning.data.streaming.compression import _COMPRESSORS, Compressor -from lightning.data.streaming.serializers import Serializer, _get_serializers -from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv -from lightning.data.utilities.format import _convert_bytes_to_int, _human_readable_bytes +from lit_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 +from lit_data.processing.utilities import get_worker_rank +from lit_data.streaming.compression import _COMPRESSORS, Compressor +from lit_data.streaming.serializers import Serializer, _get_serializers +from lit_data.utilities.env import _DistributedEnv, _WorkerEnv +from lit_data.utilities.format import _convert_bytes_to_int, _human_readable_bytes if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import PyTree, tree_flatten, treespec_dumps diff --git a/src/lightning/data/utilities/shuffle.py b/src/lightning/data/utilities/shuffle.py index 4dc34a7d..072715ab 100644 --- a/src/lightning/data/utilities/shuffle.py +++ b/src/lightning/data/utilities/shuffle.py @@ -2,7 +2,7 @@ import numpy as np -from lightning.data.utilities.env import _DistributedEnv +from lit_data.utilities.env import _DistributedEnv def _intra_node_chunk_shuffle( diff --git a/tests/tests_data/processing/test_data_processor.py b/tests/tests_data/processing/test_data_processor.py index d3c40a89..c410893a 100644 --- a/tests/tests_data/processing/test_data_processor.py +++ b/tests/tests_data/processing/test_data_processor.py @@ -10,9 +10,9 @@ import pytest import torch from lightning import seed_everything -from lightning.data.processing import data_processor as data_processor_module -from lightning.data.processing import functions -from lightning.data.processing.data_processor import ( +from lit_data.processing import data_processor as data_processor_module +from lit_data.processing import functions +from lit_data.processing.data_processor import ( DataChunkRecipe, DataProcessor, DataTransformRecipe, @@ -25,9 +25,9 @@ _wait_for_disk_usage_higher_than_threshold, _wait_for_file_to_exist, ) -from lightning.data.processing.functions import LambdaDataTransformRecipe, map, optimize -from lightning.data.streaming import resolver -from lightning.data.streaming.cache import Cache, Dir +from lit_data.processing.functions import LambdaDataTransformRecipe, map, optimize +from lit_data.streaming import resolver +from lit_data.streaming.cache import Cache, Dir from lightning_utilities.core.imports import RequirementCache _PIL_AVAILABLE = RequirementCache("PIL") @@ -163,7 +163,7 @@ def fn(*_, **__): @pytest.mark.skipif(condition=sys.platform == "win32", reason="Not supported on windows") -@mock.patch("lightning.data.processing.data_processor._wait_for_disk_usage_higher_than_threshold") +@mock.patch("lit_data.processing.data_processor._wait_for_disk_usage_higher_than_threshold") def test_download_data_target(wait_for_disk_usage_higher_than_threshold_mock, tmpdir): input_dir = os.path.join(tmpdir, "input_dir") os.makedirs(input_dir, exist_ok=True) @@ -202,7 +202,7 @@ def fn(*_, **__): def test_wait_for_disk_usage_higher_than_threshold(): disk_usage_mock = mock.Mock(side_effect=[mock.Mock(free=10e9), mock.Mock(free=10e9), mock.Mock(free=10e11)]) - with mock.patch("lightning.data.processing.data_processor.shutil.disk_usage", disk_usage_mock): + with mock.patch("lit_data.processing.data_processor.shutil.disk_usage", disk_usage_mock): _wait_for_disk_usage_higher_than_threshold("/", 10, sleep_time=0) assert disk_usage_mock.call_count == 3 diff --git a/tests/tests_data/processing/test_functions.py b/tests/tests_data/processing/test_functions.py index d0b58113..edc041dc 100644 --- a/tests/tests_data/processing/test_functions.py +++ b/tests/tests_data/processing/test_functions.py @@ -3,8 +3,8 @@ from unittest import mock import pytest -from lightning.data import walk -from lightning.data.processing.functions import _get_input_dir +from lit_data import walk +from lit_data.processing.functions import _get_input_dir @pytest.mark.skipif(sys.platform == "win32", reason="currently not supported for windows.") diff --git a/tests/tests_data/processing/test_readers.py b/tests/tests_data/processing/test_readers.py index 98d97c96..cf1d2ae4 100644 --- a/tests/tests_data/processing/test_readers.py +++ b/tests/tests_data/processing/test_readers.py @@ -2,8 +2,8 @@ import sys import pytest -from lightning.data import map -from lightning.data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader +from lit_data import map +from lit_data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader class DummyReader(BaseReader): diff --git a/tests/tests_data/processing/test_utilities.py b/tests/tests_data/processing/test_utilities.py index 5bc97b18..31f1ea7b 100644 --- a/tests/tests_data/processing/test_utilities.py +++ b/tests/tests_data/processing/test_utilities.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock -from lightning.data.processing import utilities as utilities_module -from lightning.data.processing.utilities import optimize_dns_context +from lit_data.processing import utilities as utilities_module +from lit_data.processing.utilities import optimize_dns_context def test_optimize_dns_context(monkeypatch): @@ -31,6 +31,6 @@ def readlines(self): cmd = popen_mock._mock_call_args_list[0].args[0] expected_cmd = ( "sudo /home/zeus/miniconda3/envs/cloudspace/bin/python" - " -c 'from lightning.data.processing.utilities import _optimize_dns; _optimize_dns(True)'" + " -c 'from lit_data.processing.utilities import _optimize_dns; _optimize_dns(True)'" ) assert cmd == expected_cmd diff --git a/tests/tests_data/streaming/test_cache.py b/tests/tests_data/streaming/test_cache.py index f677c36f..b8daece1 100644 --- a/tests/tests_data/streaming/test_cache.py +++ b/tests/tests_data/streaming/test_cache.py @@ -19,12 +19,12 @@ import pytest import torch from lightning import seed_everything -from lightning.data.streaming import Cache -from lightning.data.streaming.dataloader import CacheDataLoader -from lightning.data.streaming.dataset import StreamingDataset -from lightning.data.streaming.item_loader import TokensLoader -from lightning.data.streaming.serializers import Serializer -from lightning.data.utilities.env import _DistributedEnv +from lit_data.streaming import Cache +from lit_data.streaming.dataloader import CacheDataLoader +from lit_data.streaming.dataset import StreamingDataset +from lit_data.streaming.item_loader import TokensLoader +from lit_data.streaming.serializers import Serializer +from lit_data.utilities.env import _DistributedEnv from lightning.fabric import Fabric from lightning.pytorch.demos.boring_classes import RandomDataset from lightning_utilities.core.imports import RequirementCache diff --git a/tests/tests_data/streaming/test_client.py b/tests/tests_data/streaming/test_client.py index d1425fd9..b9ca68ef 100644 --- a/tests/tests_data/streaming/test_client.py +++ b/tests/tests_data/streaming/test_client.py @@ -3,7 +3,7 @@ from unittest import mock import pytest -from lightning.data.streaming import client +from lit_data.streaming import client def test_s3_client_without_cloud_space_id(monkeypatch): diff --git a/tests/tests_data/streaming/test_combined.py b/tests/tests_data/streaming/test_combined.py index 5d8d4baa..76b5af84 100644 --- a/tests/tests_data/streaming/test_combined.py +++ b/tests/tests_data/streaming/test_combined.py @@ -4,10 +4,10 @@ import pytest import torch -from lightning.data.streaming.cache import Cache -from lightning.data.streaming.combined import CombinedStreamingDataset -from lightning.data.streaming.dataloader import StreamingDataLoader -from lightning.data.streaming.dataset import Dir, StreamingDataset +from lit_data.streaming.cache import Cache +from lit_data.streaming.combined import CombinedStreamingDataset +from lit_data.streaming.dataloader import StreamingDataLoader +from lit_data.streaming.dataset import Dir, StreamingDataset from torch.utils.data import IterableDataset from torch.utils.data.dataloader import DataLoader diff --git a/tests/tests_data/streaming/test_dataloader.py b/tests/tests_data/streaming/test_dataloader.py index 7b02292a..c74dec82 100644 --- a/tests/tests_data/streaming/test_dataloader.py +++ b/tests/tests_data/streaming/test_dataloader.py @@ -2,8 +2,8 @@ import pytest import torch -from lightning.data.streaming import CombinedStreamingDataset, StreamingDataLoader -from lightning.data.streaming import dataloader as streaming_dataloader_module +from lit_data.streaming import CombinedStreamingDataset, StreamingDataLoader +from lit_data.streaming import dataloader as streaming_dataloader_module from torch import tensor diff --git a/tests/tests_data/streaming/test_dataset.py b/tests/tests_data/streaming/test_dataset.py index 298c4f36..752cf7e9 100644 --- a/tests/tests_data/streaming/test_dataset.py +++ b/tests/tests_data/streaming/test_dataset.py @@ -20,11 +20,11 @@ import pytest import torch from lightning import seed_everything -from lightning.data.processing import functions -from lightning.data.streaming import Cache -from lightning.data.streaming import dataset as dataset_module -from lightning.data.streaming.dataloader import StreamingDataLoader -from lightning.data.streaming.dataset import ( +from lit_data.processing import functions +from lit_data.streaming import Cache +from lit_data.streaming import dataset as dataset_module +from lit_data.streaming.dataloader import StreamingDataLoader +from lit_data.streaming.dataset import ( _INDEX_FILENAME, Dir, StreamingDataset, @@ -34,9 +34,9 @@ _should_replace_path, _try_create_cache_dir, ) -from lightning.data.streaming.item_loader import TokensLoader -from lightning.data.streaming.shuffle import FullShuffle, NoShuffle -from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv +from lit_data.streaming.item_loader import TokensLoader +from lit_data.streaming.shuffle import FullShuffle, NoShuffle +from lit_data.utilities.env import _DistributedEnv, _WorkerEnv from torch.utils.data import DataLoader @@ -391,7 +391,7 @@ def test_try_create_cache_dir(): # the cache dir creating at /cache requires root privileges, so we need to mock `os.makedirs()` with ( mock.patch.dict("os.environ", {"LIGHTNING_CLUSTER_ID": "abc", "LIGHTNING_CLOUD_PROJECT_ID": "123"}), - mock.patch("lightning.data.streaming.dataset.os.makedirs") as makedirs_mock, + mock.patch("lit_data.streaming.dataset.os.makedirs") as makedirs_mock, ): cache_dir_1 = _try_create_cache_dir("") cache_dir_2 = _try_create_cache_dir("ssdf") diff --git a/tests/tests_data/streaming/test_downloader.py b/tests/tests_data/streaming/test_downloader.py index 3d4e5421..e2275533 100644 --- a/tests/tests_data/streaming/test_downloader.py +++ b/tests/tests_data/streaming/test_downloader.py @@ -1,7 +1,7 @@ import os from unittest.mock import MagicMock -from lightning.data.streaming.downloader import S3Downloader, subprocess +from lit_data.streaming.downloader import S3Downloader, subprocess def test_s3_downloader_fast(tmpdir, monkeypatch): diff --git a/tests/tests_data/streaming/test_reader.py b/tests/tests_data/streaming/test_reader.py index eacf04a0..5571a22e 100644 --- a/tests/tests_data/streaming/test_reader.py +++ b/tests/tests_data/streaming/test_reader.py @@ -3,13 +3,13 @@ from time import sleep import numpy as np -from lightning.data.streaming import reader -from lightning.data.streaming.cache import Cache -from lightning.data.streaming.config import ChunkedIndex -from lightning.data.streaming.item_loader import PyTreeLoader -from lightning.data.streaming.reader import _END_TOKEN, PrepareChunksThread, _get_folder_size -from lightning.data.streaming.resolver import Dir -from lightning.data.utilities.env import _DistributedEnv +from lit_data.streaming import reader +from lit_data.streaming.cache import Cache +from lit_data.streaming.config import ChunkedIndex +from lit_data.streaming.item_loader import PyTreeLoader +from lit_data.streaming.reader import _END_TOKEN, PrepareChunksThread, _get_folder_size +from lit_data.streaming.resolver import Dir +from lit_data.utilities.env import _DistributedEnv def test_reader_chunk_removal(tmpdir): diff --git a/tests/tests_data/streaming/test_resolver.py b/tests/tests_data/streaming/test_resolver.py index 0cc6a12e..6b1923e7 100644 --- a/tests/tests_data/streaming/test_resolver.py +++ b/tests/tests_data/streaming/test_resolver.py @@ -3,7 +3,7 @@ from unittest import mock import pytest -from lightning.data.streaming import resolver +from lit_data.streaming import resolver from lightning_cloud import login from lightning_cloud.openapi import ( Externalv1Cluster, diff --git a/tests/tests_data/streaming/test_sampler.py b/tests/tests_data/streaming/test_sampler.py index d379b359..911ea278 100644 --- a/tests/tests_data/streaming/test_sampler.py +++ b/tests/tests_data/streaming/test_sampler.py @@ -2,7 +2,7 @@ import pytest from lightning import seed_everything -from lightning.data.streaming.sampler import CacheBatchSampler +from lit_data.streaming.sampler import CacheBatchSampler @pytest.mark.parametrize( diff --git a/tests/tests_data/streaming/test_serializer.py b/tests/tests_data/streaming/test_serializer.py index 5a3916d4..902f8aa8 100644 --- a/tests/tests_data/streaming/test_serializer.py +++ b/tests/tests_data/streaming/test_serializer.py @@ -20,7 +20,7 @@ import pytest import torch from lightning import seed_everything -from lightning.data.streaming.serializers import ( +from lit_data.streaming.serializers import ( _AV_AVAILABLE, _NUMPY_DTYPES_MAPPING, _SERIALIZERS, diff --git a/tests/tests_data/streaming/test_writer.py b/tests/tests_data/streaming/test_writer.py index c2ac731f..dc00afa3 100644 --- a/tests/tests_data/streaming/test_writer.py +++ b/tests/tests_data/streaming/test_writer.py @@ -18,11 +18,11 @@ import numpy as np import pytest from lightning import seed_everything -from lightning.data.streaming.compression import _ZSTD_AVAILABLE -from lightning.data.streaming.reader import BinaryReader -from lightning.data.streaming.sampler import ChunkedIndex -from lightning.data.streaming.writer import BinaryWriter -from lightning.data.utilities.format import _FORMAT_TO_RATIO +from lit_data.streaming.compression import _ZSTD_AVAILABLE +from lit_data.streaming.reader import BinaryReader +from lit_data.streaming.sampler import ChunkedIndex +from lit_data.streaming.writer import BinaryWriter +from lit_data.utilities.format import _FORMAT_TO_RATIO from lightning_utilities.core.imports import RequirementCache _PIL_AVAILABLE = RequirementCache("PIL") diff --git a/tests/tests_data/utilities/test_broadcast.py b/tests/tests_data/utilities/test_broadcast.py index 97f40418..d4427ac8 100644 --- a/tests/tests_data/utilities/test_broadcast.py +++ b/tests/tests_data/utilities/test_broadcast.py @@ -1,7 +1,7 @@ import os from unittest import mock -from lightning.data.utilities.broadcast import broadcast_object, requests +from lit_data.utilities.broadcast import broadcast_object, requests @mock.patch.dict( diff --git a/tests/tests_data/utilities/test_format.py b/tests/tests_data/utilities/test_format.py index 38bb7d44..362b2295 100644 --- a/tests/tests_data/utilities/test_format.py +++ b/tests/tests_data/utilities/test_format.py @@ -1,4 +1,4 @@ -from lightning.data.utilities.format import _human_readable_bytes +from lit_data.utilities.format import _human_readable_bytes def test_human_readable_bytes(): diff --git a/tests/tests_data/utilities/test_packing.py b/tests/tests_data/utilities/test_packing.py index 878083cc..cb7e7928 100644 --- a/tests/tests_data/utilities/test_packing.py +++ b/tests/tests_data/utilities/test_packing.py @@ -1,5 +1,5 @@ import pytest -from lightning.data.utilities.packing import _pack_greedily +from lit_data.utilities.packing import _pack_greedily def test_pack_greedily(): diff --git a/tests/tests_data/utilities/test_shuffle.py b/tests/tests_data/utilities/test_shuffle.py index f31aa76e..1cf818ef 100644 --- a/tests/tests_data/utilities/test_shuffle.py +++ b/tests/tests_data/utilities/test_shuffle.py @@ -1,5 +1,5 @@ -from lightning.data.utilities.env import _DistributedEnv -from lightning.data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle +from lit_data.utilities.env import _DistributedEnv +from lit_data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle def test_intra_node_chunk_shuffle():