Skip to content

Commit

Permalink
rename import path
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas authored and thomas committed Feb 15, 2024
1 parent fe1a041 commit ab9d20b
Show file tree
Hide file tree
Showing 37 changed files with 137 additions and 137 deletions.
8 changes: 4 additions & 4 deletions src/lightning/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from lightning_utilities.core.imports import RequirementCache

from lightning.data.processing.functions import map, optimize, walk
from lightning.data.streaming.combined import CombinedStreamingDataset
from lightning.data.streaming.dataloader import StreamingDataLoader
from lightning.data.streaming.dataset import StreamingDataset
from lit_data.processing.functions import map, optimize, walk
from lit_data.streaming.combined import CombinedStreamingDataset
from lit_data.streaming.dataloader import StreamingDataLoader
from lit_data.streaming.dataset import StreamingDataset

__all__ = [
"LightningDataset",
Expand Down
16 changes: 8 additions & 8 deletions src/lightning/data/processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@
from tqdm.auto import tqdm as _tqdm

from lightning import seed_everything
from lightning.data.constants import (
from lit_data.constants import (
_BOTO3_AVAILABLE,
_DEFAULT_FAST_DEV_RUN_ITEMS,
_INDEX_FILENAME,
_IS_IN_STUDIO,
_LIGHTNING_CLOUD_LATEST,
_TORCH_GREATER_EQUAL_2_1_0,
)
from lightning.data.processing.readers import BaseReader
from lightning.data.streaming import Cache
from lightning.data.streaming.cache import Dir
from lightning.data.streaming.client import S3Client
from lightning.data.streaming.resolver import _resolve_dir
from lightning.data.utilities.broadcast import broadcast_object
from lightning.data.utilities.packing import _pack_greedily
from lit_data.processing.readers import BaseReader
from lit_data.streaming import Cache
from lit_data.streaming.cache import Dir
from lit_data.streaming.client import S3Client
from lit_data.streaming.resolver import _resolve_dir
from lit_data.utilities.broadcast import broadcast_object
from lit_data.utilities.packing import _pack_greedily

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import tree_flatten, tree_unflatten, treespec_loads
Expand Down
10 changes: 5 additions & 5 deletions src/lightning/data/processing/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@

import torch

from lightning.data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
from lightning.data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
from lightning.data.processing.readers import BaseReader
from lightning.data.processing.utilities import optimize_dns_context
from lightning.data.streaming.resolver import (
from lit_data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
from lit_data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
from lit_data.processing.readers import BaseReader
from lit_data.processing.utilities import optimize_dns_context
from lit_data.streaming.resolver import (
Dir,
_assert_dir_has_index_file,
_assert_dir_is_empty,
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/data/processing/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from subprocess import Popen
from typing import Any, Callable, Optional, Tuple

from lightning.data.constants import _IS_IN_STUDIO
from lit_data.constants import _IS_IN_STUDIO


def get_worker_rank() -> Optional[str]:
Expand Down Expand Up @@ -66,7 +66,7 @@ def optimize_dns(enable: bool) -> None:
):
cmd = (
f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python"
f" -c 'from lightning.data.processing.utilities import _optimize_dns; _optimize_dns({enable})'"
f" -c 'from lit_data.processing.utilities import _optimize_dns; _optimize_dns({enable})'"
)
Popen(cmd, shell=True).wait() # E501

Expand Down
10 changes: 5 additions & 5 deletions src/lightning/data/streaming/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from lightning.data.streaming.cache import Cache
from lightning.data.streaming.combined import CombinedStreamingDataset
from lightning.data.streaming.dataloader import StreamingDataLoader
from lightning.data.streaming.dataset import StreamingDataset
from lightning.data.streaming.item_loader import TokensLoader
from lit_data.streaming.cache import Cache
from lit_data.streaming.combined import CombinedStreamingDataset
from lit_data.streaming.dataloader import StreamingDataLoader
from lit_data.streaming.dataset import StreamingDataset
from lit_data.streaming.item_loader import TokensLoader

__all__ = [
"Cache",
Expand Down
18 changes: 9 additions & 9 deletions src/lightning/data/streaming/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@
import os
from typing import Any, Dict, List, Optional, Tuple, Union

from lightning.data.constants import (
from lit_data.constants import (
_INDEX_FILENAME,
_LIGHTNING_CLOUD_LATEST,
_TORCH_GREATER_EQUAL_2_1_0,
)
from lightning.data.streaming.item_loader import BaseItemLoader
from lightning.data.streaming.reader import BinaryReader
from lightning.data.streaming.resolver import Dir, _resolve_dir
from lightning.data.streaming.sampler import ChunkedIndex
from lightning.data.streaming.serializers import Serializer
from lightning.data.streaming.writer import BinaryWriter
from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv
from lightning.data.utilities.format import _convert_bytes_to_int
from lit_data.streaming.item_loader import BaseItemLoader
from lit_data.streaming.reader import BinaryReader
from lit_data.streaming.resolver import Dir, _resolve_dir
from lit_data.streaming.sampler import ChunkedIndex
from lit_data.streaming.serializers import Serializer
from lit_data.streaming.writer import BinaryWriter
from lit_data.utilities.env import _DistributedEnv, _WorkerEnv
from lit_data.utilities.format import _convert_bytes_to_int

logger = logging.Logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/lightning/data/streaming/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from time import time
from typing import Any, Optional

from lightning.data.constants import _BOTO3_AVAILABLE
from lit_data.constants import _BOTO3_AVAILABLE

if _BOTO3_AVAILABLE:
import boto3
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/data/streaming/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

from torch.utils.data import IterableDataset

from lightning.data.streaming.dataset import StreamingDataset
from lightning.data.utilities.env import _WorkerEnv
from lit_data.streaming.dataset import StreamingDataset
from lit_data.utilities.env import _WorkerEnv

__NUM_SAMPLES_YIELDED_KEY__ = "__NUM_SAMPLES_YIELDED__"
__SAMPLES_KEY__ = "__SAMPLES__"
Expand Down
10 changes: 5 additions & 5 deletions src/lightning/data/streaming/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
import os
from typing import Any, Dict, List, Optional, Tuple

from lightning.data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from lightning.data.streaming.downloader import get_downloader_cls
from lightning.data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader
from lightning.data.streaming.sampler import ChunkedIndex
from lightning.data.streaming.serializers import Serializer
from lit_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from lit_data.streaming.downloader import get_downloader_cls
from lit_data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader
from lit_data.streaming.sampler import ChunkedIndex
from lit_data.streaming.serializers import Serializer

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import tree_unflatten, treespec_loads
Expand Down
18 changes: 9 additions & 9 deletions src/lightning/data/streaming/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
)
from torch.utils.data.sampler import BatchSampler, Sampler

from lightning.data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE
from lightning.data.streaming import Cache
from lightning.data.streaming.combined import (
from lit_data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE
from lit_data.streaming import Cache
from lit_data.streaming.combined import (
__NUM_SAMPLES_YIELDED_KEY__,
__SAMPLES_KEY__,
CombinedStreamingDataset,
)
from lightning.data.streaming.dataset import StreamingDataset
from lightning.data.streaming.sampler import CacheBatchSampler
from lightning.data.utilities.env import _DistributedEnv
from lit_data.streaming.dataset import StreamingDataset
from lit_data.streaming.sampler import CacheBatchSampler
from lit_data.utilities.env import _DistributedEnv

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import tree_flatten
Expand Down Expand Up @@ -105,7 +105,7 @@ def __getitem__(self, index: int) -> Any:
if not _equal_items(data_1, data2):
raise ValueError(
f"Your dataset items aren't deterministic. Found {data_1} and {data2} for index {index}."
" HINT: Use the `lightning.data.cache.Cache` directly within your dataset."
" HINT: Use the `lit_data.cache.Cache` directly within your dataset."
)
self._is_deterministic = True
self._cache[index] = data_1
Expand Down Expand Up @@ -180,7 +180,7 @@ def __call__(
) -> None:
from torch.utils.data._utils import worker

from lightning.data.streaming.cache import Cache
from lit_data.streaming.cache import Cache

enable_profiling = self._global_rank == 0 and worker_id == 0 and _VIZ_TRACKER_AVAILABLE and self._profile

Expand Down Expand Up @@ -481,7 +481,7 @@ def _try_put_index(self) -> None:
class StreamingDataLoader(DataLoader):
r"""The StreamingDataLoader combines a dataset and a sampler, and provides an iterable over the given dataset.
The :class:`~lightning.data.streaming.dataloader.StreamingDataLoader` supports either a
The :class:`~lit_data.streaming.dataloader.StreamingDataLoader` supports either a
StreamingDataset and CombinedStreamingDataset datasets with single- or multi-process loading,
customizing
loading order and optional automatic batching (collation) and memory pinning.
Expand Down
16 changes: 8 additions & 8 deletions src/lightning/data/streaming/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@
import numpy as np
from torch.utils.data import IterableDataset

from lightning.data.constants import (
from lit_data.constants import (
_DEFAULT_CACHE_DIR,
_INDEX_FILENAME,
)
from lightning.data.streaming import Cache
from lightning.data.streaming.item_loader import BaseItemLoader
from lightning.data.streaming.resolver import Dir, _resolve_dir
from lightning.data.streaming.sampler import ChunkedIndex
from lightning.data.streaming.serializers import Serializer
from lightning.data.streaming.shuffle import FullShuffle, NoShuffle, Shuffle
from lightning.data.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv
from lit_data.streaming import Cache
from lit_data.streaming.item_loader import BaseItemLoader
from lit_data.streaming.resolver import Dir, _resolve_dir
from lit_data.streaming.sampler import ChunkedIndex
from lit_data.streaming.serializers import Serializer
from lit_data.streaming.shuffle import FullShuffle, NoShuffle, Shuffle
from lit_data.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv

logger = Logger(__name__)

Expand Down
4 changes: 2 additions & 2 deletions src/lightning/data/streaming/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

from filelock import FileLock, Timeout

from lightning.data.constants import _INDEX_FILENAME
from lightning.data.streaming.client import S3Client
from lit_data.constants import _INDEX_FILENAME
from lit_data.streaming.client import S3Client


class Downloader(ABC):
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/data/streaming/item_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
import numpy as np
import torch

from lightning.data.constants import (
from lit_data.constants import (
_TORCH_DTYPES_MAPPING,
_TORCH_GREATER_EQUAL_2_1_0,
)
from lightning.data.streaming.serializers import Serializer
from lit_data.streaming.serializers import Serializer

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import PyTree, tree_unflatten
Expand Down
12 changes: 6 additions & 6 deletions src/lightning/data/streaming/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
from threading import Thread
from typing import Any, Dict, List, Optional, Tuple, Union

from lightning.data.constants import _TORCH_GREATER_EQUAL_2_1_0
from lightning.data.streaming.config import ChunksConfig
from lightning.data.streaming.item_loader import BaseItemLoader, PyTreeLoader
from lightning.data.streaming.sampler import ChunkedIndex
from lightning.data.streaming.serializers import Serializer, _get_serializers
from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv
from lit_data.constants import _TORCH_GREATER_EQUAL_2_1_0
from lit_data.streaming.config import ChunksConfig
from lit_data.streaming.item_loader import BaseItemLoader, PyTreeLoader
from lit_data.streaming.sampler import ChunkedIndex
from lit_data.streaming.serializers import Serializer, _get_serializers
from lit_data.utilities.env import _DistributedEnv, _WorkerEnv

warnings.filterwarnings("ignore", message=".*The given buffer is not writable.*")

Expand Down
2 changes: 1 addition & 1 deletion src/lightning/data/streaming/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import torch
from lightning_utilities.core.imports import RequirementCache

from lightning.data.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING
from lit_data.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING

_PIL_AVAILABLE = RequirementCache("PIL")
_TORCH_VISION_AVAILABLE = RequirementCache("torchvision")
Expand Down
6 changes: 3 additions & 3 deletions src/lightning/data/streaming/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

import numpy as np

from lightning.data.streaming import Cache
from lightning.data.utilities.env import _DistributedEnv
from lightning.data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle
from lit_data.streaming import Cache
from lit_data.utilities.env import _DistributedEnv
from lit_data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle


class Shuffle(ABC):
Expand Down
12 changes: 6 additions & 6 deletions src/lightning/data/streaming/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
import numpy as np
import torch

from lightning.data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from lightning.data.processing.utilities import get_worker_rank
from lightning.data.streaming.compression import _COMPRESSORS, Compressor
from lightning.data.streaming.serializers import Serializer, _get_serializers
from lightning.data.utilities.env import _DistributedEnv, _WorkerEnv
from lightning.data.utilities.format import _convert_bytes_to_int, _human_readable_bytes
from lit_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from lit_data.processing.utilities import get_worker_rank
from lit_data.streaming.compression import _COMPRESSORS, Compressor
from lit_data.streaming.serializers import Serializer, _get_serializers
from lit_data.utilities.env import _DistributedEnv, _WorkerEnv
from lit_data.utilities.format import _convert_bytes_to_int, _human_readable_bytes

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import PyTree, tree_flatten, treespec_dumps
Expand Down
2 changes: 1 addition & 1 deletion src/lightning/data/utilities/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from lightning.data.utilities.env import _DistributedEnv
from lit_data.utilities.env import _DistributedEnv


def _intra_node_chunk_shuffle(
Expand Down
16 changes: 8 additions & 8 deletions tests/tests_data/processing/test_data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
import pytest
import torch
from lightning import seed_everything
from lightning.data.processing import data_processor as data_processor_module
from lightning.data.processing import functions
from lightning.data.processing.data_processor import (
from lit_data.processing import data_processor as data_processor_module
from lit_data.processing import functions
from lit_data.processing.data_processor import (
DataChunkRecipe,
DataProcessor,
DataTransformRecipe,
Expand All @@ -25,9 +25,9 @@
_wait_for_disk_usage_higher_than_threshold,
_wait_for_file_to_exist,
)
from lightning.data.processing.functions import LambdaDataTransformRecipe, map, optimize
from lightning.data.streaming import resolver
from lightning.data.streaming.cache import Cache, Dir
from lit_data.processing.functions import LambdaDataTransformRecipe, map, optimize
from lit_data.streaming import resolver
from lit_data.streaming.cache import Cache, Dir
from lightning_utilities.core.imports import RequirementCache

_PIL_AVAILABLE = RequirementCache("PIL")
Expand Down Expand Up @@ -163,7 +163,7 @@ def fn(*_, **__):


@pytest.mark.skipif(condition=sys.platform == "win32", reason="Not supported on windows")
@mock.patch("lightning.data.processing.data_processor._wait_for_disk_usage_higher_than_threshold")
@mock.patch("lit_data.processing.data_processor._wait_for_disk_usage_higher_than_threshold")
def test_download_data_target(wait_for_disk_usage_higher_than_threshold_mock, tmpdir):
input_dir = os.path.join(tmpdir, "input_dir")
os.makedirs(input_dir, exist_ok=True)
Expand Down Expand Up @@ -202,7 +202,7 @@ def fn(*_, **__):

def test_wait_for_disk_usage_higher_than_threshold():
disk_usage_mock = mock.Mock(side_effect=[mock.Mock(free=10e9), mock.Mock(free=10e9), mock.Mock(free=10e11)])
with mock.patch("lightning.data.processing.data_processor.shutil.disk_usage", disk_usage_mock):
with mock.patch("lit_data.processing.data_processor.shutil.disk_usage", disk_usage_mock):
_wait_for_disk_usage_higher_than_threshold("/", 10, sleep_time=0)
assert disk_usage_mock.call_count == 3

Expand Down
4 changes: 2 additions & 2 deletions tests/tests_data/processing/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from unittest import mock

import pytest
from lightning.data import walk
from lightning.data.processing.functions import _get_input_dir
from lit_data import walk
from lit_data.processing.functions import _get_input_dir


@pytest.mark.skipif(sys.platform == "win32", reason="currently not supported for windows.")
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_data/processing/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import sys

import pytest
from lightning.data import map
from lightning.data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader
from lit_data import map
from lit_data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader


class DummyReader(BaseReader):
Expand Down
Loading

0 comments on commit ab9d20b

Please sign in to comment.