From accd450a761ac50db1834704872aa472dc304abe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 17 Aug 2023 18:56:38 +0000
Subject: [PATCH 01/30] [DOP-8157] Bump version

---
 onetl/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onetl/VERSION b/onetl/VERSION
index f374f6662..2003b639c 100644
--- a/onetl/VERSION
+++ b/onetl/VERSION
@@ -1 +1 @@
-0.9.1
+0.9.2

From 320dc8312d5e3b1d52d38a2b46b05eb0187c2c68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 17 Aug 2023 19:05:04 +0000
Subject: [PATCH 02/30] [DOP-8157] Don't use ThreadPool for list of 1 file

---
 onetl/file/file_downloader/file_downloader.py | 4 ++--
 onetl/file/file_mover/file_mover.py           | 4 ++--
 onetl/file/file_uploader/file_uploader.py     | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index ba7c9b18a..b4e7fa94e 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -714,12 +714,12 @@ def _bulk_download(
         self,
         to_download: DOWNLOAD_ITEMS_TYPE,
     ) -> list[tuple[FileDownloadStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = self.options.workers
+        workers = min(self.options.workers, len(to_download))
         result = []
 
         if workers > 1:
             with ThreadPoolExecutor(
-                max_workers=min(workers, len(to_download)),
+                max_workers=workers,
                 thread_name_prefix=self.__class__.__name__,
             ) as executor:
                 futures = [
diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py
index 9c28632a3..f2a1076f9 100644
--- a/onetl/file/file_mover/file_mover.py
+++ b/onetl/file/file_mover/file_mover.py
@@ -511,12 +511,12 @@ def _bulk_move(
         self,
         to_move: MOVE_ITEMS_TYPE,
     ) -> list[tuple[FileMoveStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = self.options.workers
+        workers = min(self.options.workers, len(to_move))
         result = []
 
         if workers > 1:
             with ThreadPoolExecutor(
-                max_workers=min(workers, len(to_move)),
+                max_workers=workers,
                 thread_name_prefix=self.__class__.__name__,
             ) as executor:
                 futures = [
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index 811c27c5f..df152e501 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -539,12 +539,12 @@ def _bulk_upload(
         self,
         to_upload: UPLOAD_ITEMS_TYPE,
     ) -> list[tuple[FileUploadStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = self.options.workers
+        workers = min(self.options.workers, len(to_upload))
         result = []
 
         if workers > 1:
             with ThreadPoolExecutor(
-                max_workers=min(workers, len(to_upload)),
+                max_workers=workers,
                 thread_name_prefix=self.__class__.__name__,
             ) as executor:
                 futures = [

From 5dd8011c6d2053eaf5f7699576de75599be132dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 08:02:51 +0000
Subject: [PATCH 03/30] [DOP-8157] Cover worker numbers detection with tests

---
 onetl/file/file_downloader/file_downloader.py | 19 ++++++++++++++++---
 onetl/file/file_mover/file_mover.py           | 17 +++++++++++++++--
 onetl/file/file_uploader/file_uploader.py     | 17 +++++++++++++++--
 .../test_file_downloader_integration.py       | 16 ++++++++++++++--
 .../test_file_mover_integration.py            | 16 ++++++++++++++--
 .../test_file_uploader_integration.py         | 16 ++++++++++++++--
 6 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index b4e7fa94e..d78729358 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -714,12 +714,24 @@ def _bulk_download(
         self,
         to_download: DOWNLOAD_ITEMS_TYPE,
     ) -> list[tuple[FileDownloadStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = min(self.options.workers, len(to_download))
+        workers = self.options.workers
+        files_count = len(to_download)
         result = []
 
-        if workers > 1:
+        real_workers = workers
+        if files_count < workers:
+            log.debug(
+                "|%s| Asked for %d workers, but there are only %d files",
+                self.__class__.__name__,
+                workers,
+                files_count,
+            )
+            real_workers = files_count
+
+        if real_workers > 1:
+            log.debug("|%s| Using ThreadPoolExecutor with %d workers", self.__class__.__name__, real_workers)
             with ThreadPoolExecutor(
-                max_workers=workers,
+                max_workers=real_workers,
                 thread_name_prefix=self.__class__.__name__,
             ) as executor:
                 futures = [
@@ -729,6 +741,7 @@ def _bulk_download(
                 for future in as_completed(futures):
                     result.append(future.result())
         else:
+            log.debug("|%s| Using plain old for-loop", self.__class__.__name__)
             for source_file, target_file, tmp_file in to_download:
                 result.append(
                     self._download_file(
diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py
index f2a1076f9..ce45cdb18 100644
--- a/onetl/file/file_mover/file_mover.py
+++ b/onetl/file/file_mover/file_mover.py
@@ -511,10 +511,22 @@ def _bulk_move(
         self,
         to_move: MOVE_ITEMS_TYPE,
     ) -> list[tuple[FileMoveStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = min(self.options.workers, len(to_move))
+        workers = self.options.workers
+        files_count = len(to_move)
         result = []
 
-        if workers > 1:
+        real_workers = workers
+        if files_count < workers:
+            log.debug(
+                "|%s| Asked for %d workers, but there are only %d files",
+                self.__class__.__name__,
+                workers,
+                files_count,
+            )
+            real_workers = files_count
+
+        if real_workers > 1:
+            log.debug("|%s| Using ThreadPoolExecutor with %d workers", self.__class__.__name__, real_workers)
             with ThreadPoolExecutor(
                 max_workers=workers,
                 thread_name_prefix=self.__class__.__name__,
@@ -525,6 +537,7 @@ def _bulk_move(
                 for future in as_completed(futures):
                     result.append(future.result())
         else:
+            log.debug("|%s| Using plain old for-loop", self.__class__.__name__)
             for source_file, target_file in to_move:
                 result.append(
                     self._move_file(
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index df152e501..34450cb3c 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -539,10 +539,22 @@ def _bulk_upload(
         self,
         to_upload: UPLOAD_ITEMS_TYPE,
     ) -> list[tuple[FileUploadStatus, PurePathProtocol | PathWithStatsProtocol]]:
-        workers = min(self.options.workers, len(to_upload))
+        workers = self.options.workers
+        files_count = len(to_upload)
         result = []
 
-        if workers > 1:
+        real_workers = workers
+        if files_count < workers:
+            log.debug(
+                "|%s| Asked for %d workers, but there are only %d files",
+                self.__class__.__name__,
+                workers,
+                files_count,
+            )
+            real_workers = files_count
+
+        if real_workers > 1:
+            log.debug("|%s| Using ThreadPoolExecutor with %d workers", self.__class__.__name__, real_workers)
             with ThreadPoolExecutor(
                 max_workers=workers,
                 thread_name_prefix=self.__class__.__name__,
@@ -554,6 +566,7 @@ def _bulk_upload(
                 for future in as_completed(futures):
                     result.append(future.result())
         else:
+            log.debug("|%s| Using plain old for-loop", self.__class__.__name__)
             for local_file, target_file, tmp_file in to_upload:
                 result.append(
                     self._upload_file(
diff --git a/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py b/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
index 86f7dc95d..352483ec2 100644
--- a/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
@@ -53,13 +53,14 @@ def test_file_downloader_view_file(file_connection_with_path_and_files):
     [str, Path],
     ids=["run_path_type str", "run_path_type Path"],
 )
-@pytest.mark.parametrize("workers", [1, 3])
+@pytest.mark.parametrize("workers", [1, 3, 20])
 def test_file_downloader_run(
     file_connection_with_path_and_files,
     path_type,
     run_path_type,
     tmp_path_factory,
     workers,
+    caplog,
 ):
     file_connection, remote_path, uploaded_files = file_connection_with_path_and_files
     local_path = tmp_path_factory.mktemp("local_path")
@@ -73,7 +74,18 @@ def test_file_downloader_run(
         ),
     )
 
-    download_result = downloader.run()
+    with caplog.at_level(logging.DEBUG):
+        download_result = downloader.run()
+
+    files_count = len(uploaded_files)
+    real_workers = min(workers, files_count)
+    if 1 <= files_count < workers:
+        assert f"|FileDownloader| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+
+    if real_workers > 1:
+        assert f"|FileDownloader| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    else:
+        assert "|FileDownloader| Using plain old for-loop" in caplog.text
 
     assert not download_result.failed
     assert not download_result.skipped
diff --git a/tests/tests_integration/tests_core_integration/test_file_mover_integration.py b/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
index a3aeb32fb..fc71fa01d 100644
--- a/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
@@ -35,12 +35,13 @@ def test_file_mover_view_file(file_connection_with_path_and_files):
 
 
 @pytest.mark.parametrize("path_type", [str, PurePosixPath], ids=["path_type str", "path_type PurePosixPath"])
-@pytest.mark.parametrize("workers", [1, 3])
+@pytest.mark.parametrize("workers", [1, 3, 20])
 def test_file_mover_run(
     request,
     file_connection_with_path_and_files,
     path_type,
     workers,
+    caplog,
 ):
     file_connection, source_path, uploaded_files = file_connection_with_path_and_files
     target_path = f"/tmp/test_move_{secrets.token_hex(5)}"
@@ -68,7 +69,18 @@ def finalizer():
             files_content[file_path] = file_connection.read_bytes(file_path)
             files_size[file_path] = file_connection.get_stat(file_path).st_size
 
-    move_result = mover.run()
+    with caplog.at_level(logging.DEBUG):
+        move_result = mover.run()
+
+    files_count = len(uploaded_files)
+    real_workers = min(workers, files_count)
+    if 1 <= files_count < workers:
+        assert f"|FileMover| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+
+    if real_workers > 1:
+        assert f"|FileMover| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    else:
+        assert "|FileMover| Using plain old for-loop" in caplog.text
 
     assert not move_result.failed
     assert not move_result.skipped
diff --git a/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py b/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
index cd52202fd..3e283062c 100644
--- a/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
@@ -41,7 +41,7 @@ def test_file_uploader_view_files(file_connection, file_connection_resource_path
     [str, Path],
     ids=["run_path_type str", "run_path_type Path"],
 )
-@pytest.mark.parametrize("workers", [1, 3])
+@pytest.mark.parametrize("workers", [1, 3, 20])
 def test_file_uploader_run_with_files(
     request,
     file_connection,
@@ -49,6 +49,7 @@ def test_file_uploader_run_with_files(
     run_path_type,
     path_type,
     workers,
+    caplog,
 ):
     target_path = path_type(f"/tmp/test_upload_{secrets.token_hex(5)}")
     test_files = file_connection_test_files
@@ -67,7 +68,18 @@ def finalizer():
         ),
     )
 
-    upload_result = uploader.run(run_path_type(file) for file in test_files)
+    with caplog.at_level(logging.DEBUG):
+        upload_result = uploader.run(run_path_type(file) for file in test_files)
+
+    files_count = len(test_files)
+    real_workers = min(workers, files_count)
+    if 1 <= files_count < workers:
+        assert f"|FileUploader| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+
+    if real_workers > 1:
+        assert f"|FileUploader| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    else:
+        assert "|FileUploader| Using plain old for-loop" in caplog.text
 
     assert not upload_result.failed
     assert not upload_result.missing

From b6db78d76834e0571ea3876c7fc1341150aed7c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 08:10:00 +0000
Subject: [PATCH 04/30] [DOP-8157] Test ThreadPoolExecutors are not used with
 no files

---
 .../test_file_downloader_integration.py       | 25 ++++++----
 .../test_file_mover_integration.py            | 25 ++++++----
 .../test_file_uploader_integration.py         | 46 +++++++++++--------
 3 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py b/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
index 352483ec2..ed290ab43 100644
--- a/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_downloader_integration.py
@@ -78,14 +78,14 @@ def test_file_downloader_run(
         download_result = downloader.run()
 
     files_count = len(uploaded_files)
-    real_workers = min(workers, files_count)
     if 1 <= files_count < workers:
-        assert f"|FileDownloader| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+        assert f"Asked for {workers} workers, but there are only {files_count} files" in caplog.text
 
-    if real_workers > 1:
-        assert f"|FileDownloader| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    if workers > 1 and files_count > 1:
+        real_workers = min(workers, files_count)
+        assert f"Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
     else:
-        assert "|FileDownloader| Using plain old for-loop" in caplog.text
+        assert "Using plain old for-loop" in caplog.text
 
     assert not download_result.failed
     assert not download_result.skipped
@@ -384,6 +384,7 @@ def test_file_downloader_run_with_empty_files_input(
     file_connection_with_path_and_files,
     pass_source_path,
     tmp_path_factory,
+    caplog,
 ):
     file_connection, remote_path, _ = file_connection_with_path_and_files
     local_path = tmp_path_factory.mktemp("local_path")
@@ -394,7 +395,11 @@ def test_file_downloader_run_with_empty_files_input(
         source_path=remote_path if pass_source_path else None,
     )
 
-    download_result = downloader.run([])  # this argument takes precedence
+    with caplog.at_level(logging.INFO):
+        download_result = downloader.run([])  # argument takes precedence over source_path content
+
+    assert "No files to download!" in caplog.text
+    assert "Starting the download process" not in caplog.text
 
     assert not download_result.failed
     assert not download_result.skipped
@@ -402,7 +407,7 @@ def test_file_downloader_run_with_empty_files_input(
     assert not download_result.successful
 
 
-def test_file_downloader_run_with_empty_source_path(request, file_connection_with_path, tmp_path_factory):
+def test_file_downloader_run_with_empty_source_path(request, file_connection_with_path, tmp_path_factory, caplog):
     file_connection, remote_path = file_connection_with_path
     remote_path = PurePosixPath(f"/tmp/test_download_{secrets.token_hex(5)}")
 
@@ -423,7 +428,11 @@ def finalizer():
             source_path=remote_path,
         )
 
-        download_result = downloader.run()
+        with caplog.at_level(logging.INFO):
+            download_result = downloader.run()
+
+        assert "No files to download!" in caplog.text
+        assert "Starting the download process" not in caplog.text
 
         assert not download_result.failed
         assert not download_result.skipped
diff --git a/tests/tests_integration/tests_core_integration/test_file_mover_integration.py b/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
index fc71fa01d..762e9b9aa 100644
--- a/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_mover_integration.py
@@ -73,14 +73,14 @@ def finalizer():
         move_result = mover.run()
 
     files_count = len(uploaded_files)
-    real_workers = min(workers, files_count)
     if 1 <= files_count < workers:
-        assert f"|FileMover| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+        assert f"Asked for {workers} workers, but there are only {files_count} files" in caplog.text
 
-    if real_workers > 1:
-        assert f"|FileMover| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    if workers > 1 and files_count > 1:
+        real_workers = min(workers, files_count)
+        assert f"Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
     else:
-        assert "|FileMover| Using plain old for-loop" in caplog.text
+        assert "Using plain old for-loop" in caplog.text
 
     assert not move_result.failed
     assert not move_result.skipped
@@ -356,6 +356,7 @@ def test_file_mover_run_with_empty_files_input(
     request,
     file_connection_with_path_and_files,
     pass_source_path,
+    caplog,
 ):
     file_connection, source_path, _ = file_connection_with_path_and_files
     target_path = f"/tmp/test_move_{secrets.token_hex(5)}"
@@ -371,7 +372,11 @@ def finalizer():
         source_path=source_path if pass_source_path else None,
     )
 
-    move_result = mover.run([])  # this argument takes precedence
+    with caplog.at_level(logging.INFO):
+        move_result = mover.run([])  # argument takes precedence over source_path content
+
+    assert "No files to move!" in caplog.text
+    assert "Starting the moving process" not in caplog.text
 
     assert not move_result.failed
     assert not move_result.skipped
@@ -379,7 +384,7 @@ def finalizer():
     assert not move_result.successful
 
 
-def test_file_mover_run_with_empty_source_path(request, file_connection):
+def test_file_mover_run_with_empty_source_path(request, file_connection, caplog):
     source_path = PurePosixPath(f"/tmp/test_move_{secrets.token_hex(5)}")
 
     file_connection.create_dir(source_path)
@@ -405,7 +410,11 @@ def finalizer2():
         source_path=source_path,
     )
 
-    move_result = mover.run()
+    with caplog.at_level(logging.INFO):
+        move_result = mover.run()
+
+    assert "No files to move!" in caplog.text
+    assert "Starting the moving process" not in caplog.text
 
     assert not move_result.failed
     assert not move_result.skipped
diff --git a/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py b/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
index 3e283062c..522cf2dd4 100644
--- a/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
+++ b/tests/tests_integration/tests_core_integration/test_file_uploader_integration.py
@@ -72,14 +72,14 @@ def finalizer():
         upload_result = uploader.run(run_path_type(file) for file in test_files)
 
     files_count = len(test_files)
-    real_workers = min(workers, files_count)
     if 1 <= files_count < workers:
-        assert f"|FileUploader| Asked for {workers} workers, but there are only {real_workers} files" in caplog.text
+        assert f"Asked for {workers} workers, but there are only {files_count} files" in caplog.text
 
-    if real_workers > 1:
-        assert f"|FileUploader| Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
+    if workers > 1 and files_count > 1:
+        real_workers = min(workers, files_count)
+        assert f"Using ThreadPoolExecutor with {real_workers} workers" in caplog.text
     else:
-        assert "|FileUploader| Using plain old for-loop" in caplog.text
+        assert "Using plain old for-loop" in caplog.text
 
     assert not upload_result.failed
     assert not upload_result.missing
@@ -529,25 +529,29 @@ def test_file_uploader_run_input_is_not_file(file_connection):
     [False, True],
     ids=["Without local_path", "With local_path"],
 )
-def test_file_uploader_run_with_empty_files(file_connection, pass_local_path, tmp_path_factory):
+def test_file_uploader_run_with_empty_files(file_connection, pass_local_path, tmp_path_factory, caplog):
     target_path = PurePosixPath(f"/tmp/test_upload_{secrets.token_hex(5)}")
     local_path = tmp_path_factory.mktemp("local_path")
 
-    downloader = FileUploader(
+    uploader = FileUploader(
         connection=file_connection,
         target_path=target_path,
         local_path=local_path if pass_local_path else None,
     )
 
-    download_result = downloader.run([])
+    with caplog.at_level(logging.INFO):
+        upload_result = uploader.run([])  # argument takes precedence over source_path content
+
+    assert "No files to upload!" in caplog.text
+    assert "Starting the upload process" not in caplog.text
 
-    assert not download_result.failed
-    assert not download_result.skipped
-    assert not download_result.missing
-    assert not download_result.successful
+    assert not upload_result.failed
+    assert not upload_result.skipped
+    assert not upload_result.missing
+    assert not upload_result.successful
 
 
-def test_file_uploader_run_with_empty_local_path(request, file_connection, tmp_path_factory):
+def test_file_uploader_run_with_empty_local_path(request, file_connection, tmp_path_factory, caplog):
     target_path = PurePosixPath(f"/tmp/test_upload_{secrets.token_hex(5)}")
     local_path = tmp_path_factory.mktemp("local_path")
 
@@ -556,18 +560,22 @@ def finalizer():
 
     request.addfinalizer(finalizer)
 
-    downloader = FileUploader(
+    uploader = FileUploader(
         connection=file_connection,
         target_path=target_path,
         local_path=local_path,
     )
 
-    download_result = downloader.run()
+    with caplog.at_level(logging.INFO):
+        upload_result = uploader.run()
+
+    assert "No files to upload!" in caplog.text
+    assert "Starting the upload process" not in caplog.text
 
-    assert not download_result.failed
-    assert not download_result.skipped
-    assert not download_result.missing
-    assert not download_result.successful
+    assert not upload_result.failed
+    assert not upload_result.skipped
+    assert not upload_result.missing
+    assert not upload_result.successful
 
 
 def test_file_uploader_without_files_and_without_local_path(file_connection):

From d0d6985e66092721da776a9bb3de8945eefd4576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 08:23:18 +0000
Subject: [PATCH 05/30] [DOP-8140] Split base DBConnection classes to
 submodules

---
 onetl/base/__init__.py                        |   2 +-
 onetl/base/base_db_connection.py              | 243 +++++++++---------
 .../connection/db_connection/db_connection.py | 159 ------------
 .../db_connection/db_connection/__init__.py   |  16 ++
 .../db_connection/db_connection/connection.py |  56 ++++
 .../db_connection/db_connection/dialect.py    | 126 +++++++++
 onetl/connection/db_connection/greenplum.py   |   4 +-
 onetl/connection/db_connection/hive.py        |   3 +-
 .../db_connection/jdbc_connection.py          |   4 +-
 .../connection/db_connection/kafka/dialect.py |   5 +-
 onetl/connection/db_connection/mongodb.py     |   3 +-
 onetl/connection/db_connection/postgres.py    |   4 +-
 12 files changed, 335 insertions(+), 290 deletions(-)
 delete mode 100644 onetl/connection/db_connection/db_connection.py
 create mode 100644 onetl/connection/db_connection/db_connection/__init__.py
 create mode 100644 onetl/connection/db_connection/db_connection/connection.py
 create mode 100644 onetl/connection/db_connection/db_connection/dialect.py

diff --git a/onetl/base/__init__.py b/onetl/base/__init__.py
index d418c33cc..c46eb5e27 100644
--- a/onetl/base/__init__.py
+++ b/onetl/base/__init__.py
@@ -13,7 +13,7 @@
 #  limitations under the License.
 
 from onetl.base.base_connection import BaseConnection
-from onetl.base.base_db_connection import BaseDBConnection
+from onetl.base.base_db_connection import BaseDBConnection, BaseDBDialect
 from onetl.base.base_file_connection import BaseFileConnection
 from onetl.base.base_file_df_connection import (
     BaseFileDFConnection,
diff --git a/onetl/base/base_db_connection.py b/onetl/base/base_db_connection.py
index cb6b92a5f..56a7c5ae5 100644
--- a/onetl/base/base_db_connection.py
+++ b/onetl/base/base_db_connection.py
@@ -27,131 +27,134 @@
     from pyspark.sql.types import StructType
 
 
+class BaseDBDialect(ABC):
+    """
+    Collection of methods used for validating input values before passing them to read_source_as_df/write_df_to_target
+    """
+
+    @classmethod
+    @abstractmethod
+    def validate_name(cls, connection: BaseDBConnection, value: Table) -> Table:
+        """Check if ``source`` or ``target`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_columns(cls, connection: BaseDBConnection, columns: list[str] | None) -> list[str] | None:
+        """Check if ``columns`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_hwm_column(
+        cls,
+        connection: BaseDBConnection,
+        hwm_column: str | None,
+    ) -> str | None:
+        """Check if ``hwm_column`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_df_schema(cls, connection: BaseDBConnection, df_schema: StructType | None) -> StructType | None:
+        """Check if ``df_schema`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_where(cls, connection: BaseDBConnection, where: Any) -> Any | None:
+        """Check if ``where`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_hint(cls, connection: BaseDBConnection, hint: Any) -> Any | None:
+        """Check if ``hint`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def validate_hwm_expression(cls, connection: BaseDBConnection, value: Any) -> str | None:
+        """Check if ``hwm_expression`` value is valid.
+
+        Raises
+        ------
+        TypeError
+            If value type is invalid
+        ValueError
+            If value is invalid
+        """
+
+    @classmethod
+    @abstractmethod
+    def _merge_conditions(cls, conditions: list[Any]) -> Any:
+        """
+        Convert multiple WHERE conditions to one
+        """
+
+    @classmethod
+    @abstractmethod
+    def _expression_with_alias(cls, expression: Any, alias: str) -> Any:
+        """
+        Return "expression AS alias" statement
+        """
+
+    @classmethod
+    @abstractmethod
+    def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> Any:
+        """
+        Return "arg1 COMPARATOR arg2" statement
+        """
+
+
 class BaseDBConnection(BaseConnection):
     """
     Implements generic methods for reading and writing dataframe from/to database-like source
     """
 
-    class Dialect(ABC):
-        """
-        Collection of methods used for validating input values before passing them to read_source_as_df/write_df_to_target
-        """
-
-        @classmethod
-        @abstractmethod
-        def validate_name(cls, connection: BaseDBConnection, value: Table) -> Table:
-            """Check if ``source`` or ``target`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_columns(cls, connection: BaseDBConnection, columns: list[str] | None) -> list[str] | None:
-            """Check if ``columns`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_hwm_column(
-            cls,
-            connection: BaseDBConnection,
-            hwm_column: str | None,
-        ) -> str | None:
-            """Check if ``hwm_column`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_df_schema(cls, connection: BaseDBConnection, df_schema: StructType | None) -> StructType | None:
-            """Check if ``df_schema`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_where(cls, connection: BaseDBConnection, where: Any) -> Any | None:
-            """Check if ``where`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_hint(cls, connection: BaseDBConnection, hint: Any) -> Any | None:
-            """Check if ``hint`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def validate_hwm_expression(cls, connection: BaseDBConnection, value: Any) -> str | None:
-            """Check if ``hwm_expression`` value is valid.
-
-            Raises
-            ------
-            TypeError
-                If value type is invalid
-            ValueError
-                If value is invalid
-            """
-
-        @classmethod
-        @abstractmethod
-        def _merge_conditions(cls, conditions: list[Any]) -> Any:
-            """
-            Convert multiple WHERE conditions to one
-            """
-
-        @classmethod
-        @abstractmethod
-        def _expression_with_alias(cls, expression: Any, alias: str) -> Any:
-            """
-            Return "expression AS alias" statement
-            """
-
-        @classmethod
-        @abstractmethod
-        def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> Any:
-            """
-            Return "arg1 COMPARATOR arg2" statement
-            """
+    Dialect = BaseDBDialect
 
     @property
     @abstractmethod
diff --git a/onetl/connection/db_connection/db_connection.py b/onetl/connection/db_connection/db_connection.py
deleted file mode 100644
index e86c989ff..000000000
--- a/onetl/connection/db_connection/db_connection.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#  Copyright 2023 MTS (Mobile Telesystems)
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from __future__ import annotations
-
-import operator
-from datetime import date, datetime
-from logging import getLogger
-from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict
-
-from pydantic import Field
-
-from onetl._util.spark import try_import_pyspark
-from onetl.base import BaseDBConnection
-from onetl.hwm import Statement
-from onetl.impl import FrozenModel
-from onetl.log import log_with_indent
-
-if TYPE_CHECKING:
-    from pyspark.sql import SparkSession
-
-log = getLogger(__name__)
-
-
-class DBConnection(BaseDBConnection, FrozenModel):
-    spark: SparkSession = Field(repr=False)
-
-    class Dialect(BaseDBConnection.Dialect):
-        @classmethod
-        def _expression_with_alias(cls, expression: str, alias: str) -> str:
-            return f"{expression} AS {alias}"
-
-        @classmethod
-        def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> Any:
-            template = cls._compare_statements[comparator]
-            return template.format(arg1, cls._serialize_datetime_value(arg2))
-
-        @classmethod
-        def _merge_conditions(cls, conditions: list[Any]) -> Any:
-            if len(conditions) == 1:
-                return conditions[0]
-
-            return " AND ".join(f"({item})" for item in conditions)
-
-        @classmethod
-        def _condition_assembler(
-            cls,
-            condition: Any,
-            start_from: Statement | None,
-            end_at: Statement | None,
-        ) -> Any:
-            conditions = [condition]
-
-            if start_from:
-                condition1 = cls._get_compare_statement(
-                    comparator=start_from.operator,
-                    arg1=start_from.expression,
-                    arg2=start_from.value,
-                )
-                conditions.append(condition1)
-
-            if end_at:
-                condition2 = cls._get_compare_statement(
-                    comparator=end_at.operator,
-                    arg1=end_at.expression,
-                    arg2=end_at.value,
-                )
-                conditions.append(condition2)
-
-            result: list[Any] = list(filter(None, conditions))
-            if not result:
-                return None
-
-            return cls._merge_conditions(result)
-
-        _compare_statements: ClassVar[Dict[Callable, str]] = {
-            operator.ge: "{} >= {}",
-            operator.gt: "{} > {}",
-            operator.le: "{} <= {}",
-            operator.lt: "{} < {}",
-            operator.eq: "{} == {}",
-            operator.ne: "{} != {}",
-        }
-
-        @classmethod
-        def _serialize_datetime_value(cls, value: Any) -> str | int | dict:
-            """
-            Transform the value into an SQL Dialect-supported form.
-            """
-
-            if isinstance(value, datetime):
-                return cls._get_datetime_value_sql(value)
-
-            if isinstance(value, date):
-                return cls._get_date_value_sql(value)
-
-            return str(value)
-
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            """
-            Transform the datetime value into supported by SQL Dialect
-            """
-            result = value.isoformat()
-            return repr(result)
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            """
-            Transform the date value into supported by SQL Dialect
-            """
-            result = value.isoformat()
-            return repr(result)
-
-        @classmethod
-        def _get_max_value_sql(cls, value: Any) -> str:
-            """
-            Generate `MAX(value)` clause for given value
-            """
-            result = cls._serialize_datetime_value(value)
-            return f"MAX({result})"
-
-        @classmethod
-        def _get_min_value_sql(cls, value: Any) -> str:
-            """
-            Generate `MIN(value)` clause for given value
-            """
-            result = cls._serialize_datetime_value(value)
-            return f"MIN({result})"
-
-    @classmethod
-    def _forward_refs(cls) -> dict[str, type]:
-        try_import_pyspark()
-
-        from pyspark.sql import SparkSession  # noqa: WPS442
-
-        # avoid importing pyspark unless user called the constructor,
-        # as we allow user to use `Connection.get_packages()` for creating Spark session
-        refs = super()._forward_refs()
-        refs["SparkSession"] = SparkSession
-        return refs
-
-    def _log_parameters(self):
-        log.info("|Spark| Using connection parameters:")
-        log_with_indent(log, "type = %s", self.__class__.__name__)
-        parameters = self.dict(exclude_none=True, exclude={"spark"})
-        for attr, value in sorted(parameters.items()):
-            log_with_indent(log, "%s = %r", attr, value)
diff --git a/onetl/connection/db_connection/db_connection/__init__.py b/onetl/connection/db_connection/db_connection/__init__.py
new file mode 100644
index 000000000..9eb19e84f
--- /dev/null
+++ b/onetl/connection/db_connection/db_connection/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.db_connection.connection import DBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
diff --git a/onetl/connection/db_connection/db_connection/connection.py b/onetl/connection/db_connection/db_connection/connection.py
new file mode 100644
index 000000000..14c5d4f9d
--- /dev/null
+++ b/onetl/connection/db_connection/db_connection/connection.py
@@ -0,0 +1,56 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from logging import getLogger
+from typing import TYPE_CHECKING
+
+from pydantic import Field
+
+from onetl._util.spark import try_import_pyspark
+from onetl.base import BaseDBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
+from onetl.impl import FrozenModel
+from onetl.log import log_with_indent
+
+if TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+
+log = getLogger(__name__)
+
+
+class DBConnection(BaseDBConnection, FrozenModel):
+    spark: SparkSession = Field(repr=False)
+
+    Dialect = DBDialect
+
+    @classmethod
+    def _forward_refs(cls) -> dict[str, type]:
+        try_import_pyspark()
+
+        from pyspark.sql import SparkSession  # noqa: WPS442
+
+        # avoid importing pyspark unless user called the constructor,
+        # as we allow user to use `Connection.get_packages()` for creating Spark session
+        refs = super()._forward_refs()
+        refs["SparkSession"] = SparkSession
+        return refs
+
+    def _log_parameters(self):
+        log.info("|Spark| Using connection parameters:")
+        log_with_indent(log, "type = %s", self.__class__.__name__)
+        parameters = self.dict(exclude_none=True, exclude={"spark"})
+        for attr, value in sorted(parameters.items()):
+            log_with_indent(log, "%s = %r", attr, value)
diff --git a/onetl/connection/db_connection/db_connection/dialect.py b/onetl/connection/db_connection/db_connection/dialect.py
new file mode 100644
index 000000000..0d118fbee
--- /dev/null
+++ b/onetl/connection/db_connection/db_connection/dialect.py
@@ -0,0 +1,126 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import operator
+from datetime import date, datetime
+from typing import Any, Callable, ClassVar, Dict
+
+from onetl.base import BaseDBDialect
+from onetl.hwm import Statement
+
+
+class DBDialect(BaseDBDialect):
+    _compare_statements: ClassVar[Dict[Callable, str]] = {
+        operator.ge: "{} >= {}",
+        operator.gt: "{} > {}",
+        operator.le: "{} <= {}",
+        operator.lt: "{} < {}",
+        operator.eq: "{} == {}",
+        operator.ne: "{} != {}",
+    }
+
+    @classmethod
+    def _expression_with_alias(cls, expression: str, alias: str) -> str:
+        return f"{expression} AS {alias}"
+
+    @classmethod
+    def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> Any:
+        template = cls._compare_statements[comparator]
+        return template.format(arg1, cls._serialize_datetime_value(arg2))
+
+    @classmethod
+    def _merge_conditions(cls, conditions: list[Any]) -> Any:
+        if len(conditions) == 1:
+            return conditions[0]
+
+        return " AND ".join(f"({item})" for item in conditions)
+
+    @classmethod
+    def _condition_assembler(
+        cls,
+        condition: Any,
+        start_from: Statement | None,
+        end_at: Statement | None,
+    ) -> Any:
+        conditions = [condition]
+
+        if start_from:
+            condition1 = cls._get_compare_statement(
+                comparator=start_from.operator,
+                arg1=start_from.expression,
+                arg2=start_from.value,
+            )
+            conditions.append(condition1)
+
+        if end_at:
+            condition2 = cls._get_compare_statement(
+                comparator=end_at.operator,
+                arg1=end_at.expression,
+                arg2=end_at.value,
+            )
+            conditions.append(condition2)
+
+        result: list[Any] = list(filter(None, conditions))
+        if not result:
+            return None
+
+        return cls._merge_conditions(result)
+
+    @classmethod
+    def _serialize_datetime_value(cls, value: Any) -> str | int | dict:
+        """
+        Transform the value into an SQL Dialect-supported form.
+        """
+
+        if isinstance(value, datetime):
+            return cls._get_datetime_value_sql(value)
+
+        if isinstance(value, date):
+            return cls._get_date_value_sql(value)
+
+        return str(value)
+
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        """
+        Transform the datetime value into supported by SQL Dialect
+        """
+        result = value.isoformat()
+        return repr(result)
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        """
+        Transform the date value into supported by SQL Dialect
+        """
+        result = value.isoformat()
+        return repr(result)
+
+    @classmethod
+    def _get_max_value_sql(cls, value: Any) -> str:
+        """
+        Generate `MAX(value)` clause for given value
+        """
+        result = cls._serialize_datetime_value(value)
+        return f"MAX({result})"
+
+    @classmethod
+    def _get_min_value_sql(cls, value: Any) -> str:
+        """
+        Generate `MIN(value)` clause for given value
+        """
+        result = cls._serialize_datetime_value(value)
+        return f"MIN({result})"
diff --git a/onetl/connection/db_connection/greenplum.py b/onetl/connection/db_connection/greenplum.py
index 0986ac333..41e709323 100644
--- a/onetl/connection/db_connection/greenplum.py
+++ b/onetl/connection/db_connection/greenplum.py
@@ -32,7 +32,7 @@
 from onetl._util.scala import get_default_scala_version
 from onetl._util.spark import get_executor_total_cores, get_spark_version
 from onetl._util.version import Version
-from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.db_connection import DBConnection, DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsList,
     SupportDfSchemaNone,
@@ -476,7 +476,7 @@ class Dialect(  # noqa: WPS215
         SupportHintNone,
         SupportHWMExpressionStr,
         SupportHWMColumnStr,
-        DBConnection.Dialect,
+        DBDialect,
     ):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
diff --git a/onetl/connection/db_connection/hive.py b/onetl/connection/db_connection/hive.py
index d82d05142..fe80d0c35 100644
--- a/onetl/connection/db_connection/hive.py
+++ b/onetl/connection/db_connection/hive.py
@@ -27,6 +27,7 @@
 from onetl._internal import clear_statement, get_sql_query
 from onetl._util.spark import inject_spark_param
 from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsList,
     SupportDfSchemaNone,
@@ -569,7 +570,7 @@ class Dialect(  # noqa: WPS215
         SupportHintStr,
         SupportHWMExpressionStr,
         SupportHWMColumnStr,
-        DBConnection.Dialect,
+        DBDialect,
     ):
         pass
 
diff --git a/onetl/connection/db_connection/jdbc_connection.py b/onetl/connection/db_connection/jdbc_connection.py
index 4554e6660..296f1766c 100644
--- a/onetl/connection/db_connection/jdbc_connection.py
+++ b/onetl/connection/db_connection/jdbc_connection.py
@@ -25,7 +25,7 @@
 from pydantic import Field, PositiveInt, root_validator
 
 from onetl._internal import clear_statement, get_sql_query, to_camel
-from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.db_connection import DBConnection, DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsList,
     SupportDfSchemaNone,
@@ -162,7 +162,7 @@ class Dialect(  # noqa: WPS215
         SupportHintStr,
         SupportHWMExpressionStr,
         SupportHWMColumnStr,
-        DBConnection.Dialect,
+        DBDialect,
     ):
         pass
 
diff --git a/onetl/connection/db_connection/kafka/dialect.py b/onetl/connection/db_connection/kafka/dialect.py
index a8f4baff9..d6cc9bf56 100644
--- a/onetl/connection/db_connection/kafka/dialect.py
+++ b/onetl/connection/db_connection/kafka/dialect.py
@@ -18,7 +18,8 @@
 import logging
 
 from onetl._util.spark import get_spark_version
-from onetl.connection.db_connection.db_connection import BaseDBConnection, DBConnection
+from onetl.base import BaseDBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsNone,
     SupportDfSchemaNone,
@@ -38,7 +39,7 @@ class KafkaDialect(  # noqa: WPS215
     SupportWhereNone,
     SupportTableWithoutDBSchema,
     SupportHWMExpressionNone,
-    DBConnection.Dialect,
+    DBDialect,
 ):
     valid_hwm_columns = {"offset", "timestamp"}
 
diff --git a/onetl/connection/db_connection/mongodb.py b/onetl/connection/db_connection/mongodb.py
index 1bb95bbd1..629cd0ce4 100644
--- a/onetl/connection/db_connection/mongodb.py
+++ b/onetl/connection/db_connection/mongodb.py
@@ -33,6 +33,7 @@
 from onetl._util.version import Version
 from onetl.base.base_db_connection import BaseDBConnection
 from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsNone,
     SupportDfSchemaStruct,
@@ -515,7 +516,7 @@ class Dialect(  # noqa: WPS215
         SupportColumnsNone,
         SupportDfSchemaStruct,
         SupportHWMColumnStr,
-        DBConnection.Dialect,
+        DBDialect,
     ):
         _compare_statements: ClassVar[Dict[Callable, str]] = {
             operator.ge: "$gte",
diff --git a/onetl/connection/db_connection/postgres.py b/onetl/connection/db_connection/postgres.py
index aee317974..b425c61ea 100644
--- a/onetl/connection/db_connection/postgres.py
+++ b/onetl/connection/db_connection/postgres.py
@@ -19,7 +19,7 @@
 from typing import ClassVar
 
 from onetl._util.classproperty import classproperty
-from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsList,
     SupportDfSchemaNone,
@@ -166,7 +166,7 @@ class Dialect(  # noqa: WPS215
         SupportHWMExpressionStr,
         SupportHWMColumnStr,
         SupportHintNone,
-        DBConnection.Dialect,
+        DBDialect,
     ):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:

From 1eeb0be96c6e035ce49486824e4416c4a5c32ec5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 09:47:54 +0000
Subject: [PATCH 06/30] [DOP-8140] Split FileDownloader.Options to separated
 submodule

---
 docs/file/file_downloader/file_downloader.rst |  8 +--
 docs/file/file_downloader/index.rst           |  3 +-
 docs/file/file_downloader/options.rst         | 11 ++++
 .../{download_result.rst => result.rst}       |  8 +--
 docs/file/file_mover/file_mover.rst           |  9 +--
 docs/file/file_mover/index.rst                |  3 +-
 docs/file/file_mover/options.rst              | 11 ++++
 .../{move_result.rst => result.rst}           |  8 +--
 docs/file/file_uploader/file_uploader.rst     |  9 +--
 docs/file/file_uploader/index.rst             |  3 +-
 docs/file/file_uploader/options.rst           | 11 ++++
 .../{upload_result.rst => result.rst}         |  8 +--
 onetl/file/file_downloader/__init__.py        |  3 +-
 onetl/file/file_downloader/file_downloader.py | 52 ++-------------
 onetl/file/file_downloader/options.py         | 64 +++++++++++++++++++
 .../{download_result.py => result.py}         |  0
 onetl/file/file_mover/__init__.py             |  3 +-
 onetl/file/file_mover/file_mover.py           | 46 ++-----------
 onetl/file/file_mover/options.py              | 57 +++++++++++++++++
 .../file_mover/{move_result.py => result.py}  |  0
 onetl/file/file_uploader/__init__.py          |  3 +-
 onetl/file/file_uploader/file_uploader.py     | 53 ++-------------
 onetl/file/file_uploader/options.py           | 64 +++++++++++++++++++
 .../{upload_result.py => result.py}           |  0
 24 files changed, 264 insertions(+), 173 deletions(-)
 create mode 100644 docs/file/file_downloader/options.rst
 rename docs/file/file_downloader/{download_result.rst => result.rst} (74%)
 create mode 100644 docs/file/file_mover/options.rst
 rename docs/file/file_mover/{move_result.rst => result.rst} (77%)
 create mode 100644 docs/file/file_uploader/options.rst
 rename docs/file/file_uploader/{upload_result.rst => result.rst} (75%)
 create mode 100644 onetl/file/file_downloader/options.py
 rename onetl/file/file_downloader/{download_result.py => result.py} (100%)
 create mode 100644 onetl/file/file_mover/options.py
 rename onetl/file/file_mover/{move_result.py => result.py} (100%)
 create mode 100644 onetl/file/file_uploader/options.py
 rename onetl/file/file_uploader/{upload_result.py => result.py} (100%)

diff --git a/docs/file/file_downloader/file_downloader.rst b/docs/file/file_downloader/file_downloader.rst
index e1d807443..6c8728f1b 100644
--- a/docs/file/file_downloader/file_downloader.rst
+++ b/docs/file/file_downloader/file_downloader.rst
@@ -8,13 +8,9 @@ File Downloader
 .. autosummary::
 
     FileDownloader
-    FileDownloader.Options
+    FileDownloader.run
+    FileDownloader.view_files
 
 .. autoclass:: FileDownloader
     :members: run, view_files
-
-.. currentmodule:: onetl.file.file_downloader.file_downloader.FileDownloader
-
-.. autopydantic_model:: Options
-    :members: mode, delete_source, workers
     :member-order: bysource
diff --git a/docs/file/file_downloader/index.rst b/docs/file/file_downloader/index.rst
index cf7276529..b20d859ea 100644
--- a/docs/file/file_downloader/index.rst
+++ b/docs/file/file_downloader/index.rst
@@ -8,4 +8,5 @@ File Downloader
     :caption: File Downloader
 
     file_downloader
-    download_result
+    options
+    result
diff --git a/docs/file/file_downloader/options.rst b/docs/file/file_downloader/options.rst
new file mode 100644
index 000000000..8fa37e613
--- /dev/null
+++ b/docs/file/file_downloader/options.rst
@@ -0,0 +1,11 @@
+.. _file-downloader-options:
+
+File Downloader Options
+=======================
+
+.. currentmodule:: onetl.file.file_downloader.options
+
+.. autopydantic_model:: FileDownloaderOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/file/file_downloader/download_result.rst b/docs/file/file_downloader/result.rst
similarity index 74%
rename from docs/file/file_downloader/download_result.rst
rename to docs/file/file_downloader/result.rst
index 3d1a9d4f5..8fd20e9df 100644
--- a/docs/file/file_downloader/download_result.rst
+++ b/docs/file/file_downloader/result.rst
@@ -1,9 +1,9 @@
-.. _download-result:
+.. _file-downloader-result:
 
-Download result
-==============
+File Downloader Result
+======================
 
-.. currentmodule:: onetl.file.file_downloader.download_result
+.. currentmodule:: onetl.file.file_downloader.result
 
 .. autoclass:: DownloadResult
     :members: successful, failed, skipped, missing, successful_count, failed_count, skipped_count, missing_count, total_count, successful_size, failed_size, skipped_size, total_size, raise_if_failed, reraise_failed, raise_if_missing, raise_if_skipped, raise_if_empty, is_empty, raise_if_contains_zero_size, details, summary, dict, json
diff --git a/docs/file/file_mover/file_mover.rst b/docs/file/file_mover/file_mover.rst
index 6e12bb9b4..4c191ba94 100644
--- a/docs/file/file_mover/file_mover.rst
+++ b/docs/file/file_mover/file_mover.rst
@@ -1,7 +1,7 @@
 .. _file-mover:
 
 File Mover
-==============
+==========
 
 .. currentmodule:: onetl.file.file_mover.file_mover
 
@@ -10,14 +10,7 @@ File Mover
     FileMover
     FileMover.run
     FileMover.view_files
-    FileMover.Options
 
 .. autoclass:: FileMover
     :members: run, view_files
     :member-order: bysource
-
-.. currentmodule:: onetl.file.file_mover.file_mover.FileMover
-
-.. autopydantic_model:: Options
-    :members: mode, workers
-    :member-order: bysource
diff --git a/docs/file/file_mover/index.rst b/docs/file/file_mover/index.rst
index c0ca8a19d..e28f6316f 100644
--- a/docs/file/file_mover/index.rst
+++ b/docs/file/file_mover/index.rst
@@ -8,4 +8,5 @@ File Mover
     :caption: File Mover
 
     file_mover
-    move_result
+    options
+    result
diff --git a/docs/file/file_mover/options.rst b/docs/file/file_mover/options.rst
new file mode 100644
index 000000000..743ae5dd0
--- /dev/null
+++ b/docs/file/file_mover/options.rst
@@ -0,0 +1,11 @@
+.. _file-mover-options:
+
+File Mover Options
+==================
+
+.. currentmodule:: onetl.file.file_mover.options
+
+.. autopydantic_model:: FileMoverOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/file/file_mover/move_result.rst b/docs/file/file_mover/result.rst
similarity index 77%
rename from docs/file/file_mover/move_result.rst
rename to docs/file/file_mover/result.rst
index c2c4581fe..d4ea950f3 100644
--- a/docs/file/file_mover/move_result.rst
+++ b/docs/file/file_mover/result.rst
@@ -1,9 +1,9 @@
-.. _move-result:
+.. _file-mover-result:
 
-Move result
-==============
+File Mover Result
+=================
 
-.. currentmodule:: onetl.file.file_mover.move_result
+.. currentmodule:: onetl.file.file_mover.result
 
 .. autoclass:: MoveResult
     :members: successful, failed, skipped, missing, successful_count, failed_count, skipped_count, missing_count, total_count, successful_size, failed_size, skipped_size, total_size, raise_if_failed, reraise_failed, raise_if_missing, raise_if_skipped, raise_if_empty, is_empty, raise_if_contains_zero_size, details, summary, dict, json
diff --git a/docs/file/file_uploader/file_uploader.rst b/docs/file/file_uploader/file_uploader.rst
index 3f13c44b2..6f00c7dc6 100644
--- a/docs/file/file_uploader/file_uploader.rst
+++ b/docs/file/file_uploader/file_uploader.rst
@@ -1,7 +1,7 @@
 .. _file-uploader:
 
 File Uploader
-==============
+=============
 
 .. currentmodule:: onetl.file.file_uploader.file_uploader
 
@@ -10,14 +10,7 @@ File Uploader
     FileUploader
     FileUploader.run
     FileUploader.view_files
-    FileUploader.Options
 
 .. autoclass:: FileUploader
     :members: run, view_files
     :member-order: bysource
-
-.. currentmodule:: onetl.file.file_uploader.file_uploader.FileUploader
-
-.. autopydantic_model:: Options
-    :members: mode, delete_local, workers
-    :member-order: bysource
diff --git a/docs/file/file_uploader/index.rst b/docs/file/file_uploader/index.rst
index e12c65b20..d65c83e42 100644
--- a/docs/file/file_uploader/index.rst
+++ b/docs/file/file_uploader/index.rst
@@ -8,4 +8,5 @@ File Uploader
     :caption: File Uploader
 
     file_uploader
-    upload_result
+    options
+    result
diff --git a/docs/file/file_uploader/options.rst b/docs/file/file_uploader/options.rst
new file mode 100644
index 000000000..b0e614b53
--- /dev/null
+++ b/docs/file/file_uploader/options.rst
@@ -0,0 +1,11 @@
+.. _file-uploader-options:
+
+File Uploader Options
+=====================
+
+.. currentmodule:: onetl.file.file_uploader.options
+
+.. autopydantic_model:: FileUploaderOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/file/file_uploader/upload_result.rst b/docs/file/file_uploader/result.rst
similarity index 75%
rename from docs/file/file_uploader/upload_result.rst
rename to docs/file/file_uploader/result.rst
index 9c7b189d6..af20ace14 100644
--- a/docs/file/file_uploader/upload_result.rst
+++ b/docs/file/file_uploader/result.rst
@@ -1,9 +1,9 @@
-.. _upload-result:
+.. _file-uploader-result:
 
-Upload result
-==============
+File Uploader Result
+====================
 
-.. currentmodule:: onetl.file.file_uploader.upload_result
+.. currentmodule:: onetl.file.file_uploader.result
 
 .. autoclass:: UploadResult
     :members: successful, failed, skipped, missing, successful_count, failed_count, skipped_count, missing_count, total_count, successful_size, failed_size, skipped_size, total_size, raise_if_failed, reraise_failed, raise_if_missing, raise_if_skipped, raise_if_empty, is_empty, raise_if_contains_zero_size, details, summary, dict, json
diff --git a/onetl/file/file_downloader/__init__.py b/onetl/file/file_downloader/__init__.py
index f0e741e55..232bbccb7 100644
--- a/onetl/file/file_downloader/__init__.py
+++ b/onetl/file/file_downloader/__init__.py
@@ -12,5 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from onetl.file.file_downloader.download_result import DownloadResult
 from onetl.file.file_downloader.file_downloader import FileDownloader
+from onetl.file.file_downloader.options import FileDownloaderOptions
+from onetl.file.file_downloader.result import DownloadResult
diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index d78729358..eb067f9a1 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -24,13 +24,14 @@
 
 from etl_entities import HWM, FileHWM, RemoteFolder
 from ordered_set import OrderedSet
-from pydantic import Field, root_validator, validator
+from pydantic import Field, validator
 
 from onetl._internal import generate_temp_path
 from onetl.base import BaseFileConnection, BaseFileFilter, BaseFileLimit
 from onetl.base.path_protocol import PathProtocol, PathWithStatsProtocol
 from onetl.base.pure_path_protocol import PurePathProtocol
-from onetl.file.file_downloader.download_result import DownloadResult
+from onetl.file.file_downloader.options import FileDownloaderOptions
+from onetl.file.file_downloader.result import DownloadResult
 from onetl.file.file_set import FileSet
 from onetl.file.filter.file_hwm import FileHWMFilter
 from onetl.hooks import slot, support_hooks
@@ -39,7 +40,6 @@
     FailedRemoteFile,
     FileExistBehavior,
     FrozenModel,
-    GenericOptions,
     LocalPath,
     RemoteFile,
     RemotePath,
@@ -209,48 +209,6 @@ class FileDownloader(FrozenModel):
 
     """
 
-    class Options(GenericOptions):
-        """File downloading options"""
-
-        if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
-        """
-        How to handle existing files in the local directory.
-
-        Possible values:
-            * ``error`` (default) - do nothing, mark file as failed
-            * ``ignore`` - do nothing, mark file as ignored
-            * ``overwrite`` - replace existing file with a new one
-            * ``delete_all`` - delete local directory content before downloading files
-        """
-
-        delete_source: bool = False
-        """
-        If ``True``, remove source file after successful download.
-
-        If download failed, file will left intact.
-        """
-
-        workers: int = Field(default=1, ge=1)
-        """
-        Number of workers to create for parallel file download.
-
-        1 (default) means files will me downloaded sequentially.
-        2 or more means files will be downloaded in parallel workers.
-
-        Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
-        """
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `FileDownloader.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `FileDownloader.Options(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
     connection: BaseFileConnection
 
     local_path: LocalPath
@@ -262,7 +220,9 @@ def mode_is_deprecated(cls, values):
 
     hwm_type: Optional[Type[FileHWM]] = None
 
-    options: Options = Options()
+    options: FileDownloaderOptions = FileDownloaderOptions()
+
+    Options = FileDownloaderOptions
 
     @slot
     def run(self, files: Iterable[str | os.PathLike] | None = None) -> DownloadResult:  # noqa: WPS231
diff --git a/onetl/file/file_downloader/options.py b/onetl/file/file_downloader/options.py
new file mode 100644
index 000000000..9ec44ce52
--- /dev/null
+++ b/onetl/file/file_downloader/options.py
@@ -0,0 +1,64 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+
+from pydantic import Field, root_validator
+
+from onetl.impl import FileExistBehavior, GenericOptions
+
+
+class FileDownloaderOptions(GenericOptions):
+    """File downloading options"""
+
+    if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
+    """
+    How to handle existing files in the local directory.
+
+    Possible values:
+        * ``error`` (default) - do nothing, mark file as failed
+        * ``ignore`` - do nothing, mark file as ignored
+        * ``overwrite`` - replace existing file with a new one
+        * ``delete_all`` - delete local directory content before downloading files
+    """
+
+    delete_source: bool = False
+    """
+    If ``True``, remove source file after successful download.
+
+    If download failed, file will left intact.
+    """
+
+    workers: int = Field(default=1, ge=1)
+    """
+    Number of workers to create for parallel file download.
+
+    1 (default) means files will me downloaded sequentially.
+    2 or more means files will be downloaded in parallel workers.
+
+    Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
+    """
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `FileDownloader.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `FileDownloader.Options(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/onetl/file/file_downloader/download_result.py b/onetl/file/file_downloader/result.py
similarity index 100%
rename from onetl/file/file_downloader/download_result.py
rename to onetl/file/file_downloader/result.py
diff --git a/onetl/file/file_mover/__init__.py b/onetl/file/file_mover/__init__.py
index acfa16899..fa416ec6f 100644
--- a/onetl/file/file_mover/__init__.py
+++ b/onetl/file/file_mover/__init__.py
@@ -13,4 +13,5 @@
 #  limitations under the License.
 
 from onetl.file.file_mover.file_mover import FileMover
-from onetl.file.file_mover.move_result import MoveResult
+from onetl.file.file_mover.options import FileMoverOptions
+from onetl.file.file_mover.result import MoveResult
diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py
index ce45cdb18..633ce0814 100644
--- a/onetl/file/file_mover/file_mover.py
+++ b/onetl/file/file_mover/file_mover.py
@@ -16,25 +16,24 @@
 
 import logging
 import os
-import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from enum import Enum
 from typing import Iterable, List, Optional, Tuple
 
 from ordered_set import OrderedSet
-from pydantic import Field, root_validator, validator
+from pydantic import Field, validator
 
 from onetl.base import BaseFileConnection, BaseFileFilter, BaseFileLimit
 from onetl.base.path_protocol import PathProtocol, PathWithStatsProtocol
 from onetl.base.pure_path_protocol import PurePathProtocol
-from onetl.file.file_mover.move_result import MoveResult
+from onetl.file.file_mover.options import FileMoverOptions
+from onetl.file.file_mover.result import MoveResult
 from onetl.file.file_set import FileSet
 from onetl.hooks import slot, support_hooks
 from onetl.impl import (
     FailedRemoteFile,
     FileExistBehavior,
     FrozenModel,
-    GenericOptions,
     RemoteFile,
     RemotePath,
     path_repr,
@@ -153,41 +152,6 @@ class FileMover(FrozenModel):
 
     """
 
-    class Options(GenericOptions):
-        """File moving options"""
-
-        if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
-        """
-        How to handle existing files in the local directory.
-
-        Possible values:
-            * ``error`` (default) - do nothing, mark file as failed
-            * ``ignore`` - do nothing, mark file as ignored
-            * ``overwrite`` - replace existing file with a new one
-            * ``delete_all`` - delete directory content before moving files
-        """
-
-        workers: int = Field(default=1, ge=1)
-        """
-        Number of workers to create for parallel file moving.
-
-        1 (default) means files will me moved sequentially.
-        2 or more means files will be moved in parallel workers.
-
-        Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
-        """
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `FileMover.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `FileMover.Options(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
     connection: BaseFileConnection
 
     target_path: RemotePath
@@ -196,7 +160,9 @@ def mode_is_deprecated(cls, values):
     filters: List[BaseFileFilter] = Field(default_factory=list)
     limits: List[BaseFileLimit] = Field(default_factory=list)
 
-    options: Options = Options()
+    options: FileMoverOptions = FileMoverOptions()
+
+    Options = FileMoverOptions
 
     @slot
     def run(self, files: Iterable[str | os.PathLike] | None = None) -> MoveResult:  # noqa: WPS231
diff --git a/onetl/file/file_mover/options.py b/onetl/file/file_mover/options.py
new file mode 100644
index 000000000..912c0ae1b
--- /dev/null
+++ b/onetl/file/file_mover/options.py
@@ -0,0 +1,57 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+
+from pydantic import Field, root_validator
+
+from onetl.impl import FileExistBehavior, GenericOptions
+
+
+class FileMoverOptions(GenericOptions):
+    """File moving options"""
+
+    if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
+    """
+    How to handle existing files in the local directory.
+
+    Possible values:
+        * ``error`` (default) - do nothing, mark file as failed
+        * ``ignore`` - do nothing, mark file as ignored
+        * ``overwrite`` - replace existing file with a new one
+        * ``delete_all`` - delete directory content before moving files
+    """
+
+    workers: int = Field(default=1, ge=1)
+    """
+    Number of workers to create for parallel file moving.
+
+    1 (default) means files will me moved sequentially.
+    2 or more means files will be moved in parallel workers.
+
+    Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
+    """
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `FileMover.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `FileMover.Options(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/onetl/file/file_mover/move_result.py b/onetl/file/file_mover/result.py
similarity index 100%
rename from onetl/file/file_mover/move_result.py
rename to onetl/file/file_mover/result.py
diff --git a/onetl/file/file_uploader/__init__.py b/onetl/file/file_uploader/__init__.py
index cfcf797c3..7f24451f7 100644
--- a/onetl/file/file_uploader/__init__.py
+++ b/onetl/file/file_uploader/__init__.py
@@ -13,4 +13,5 @@
 #  limitations under the License.
 
 from onetl.file.file_uploader.file_uploader import FileUploader
-from onetl.file.file_uploader.upload_result import UploadResult
+from onetl.file.file_uploader.options import FileUploaderOptions
+from onetl.file.file_uploader.result import UploadResult
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index 34450cb3c..4fa32c670 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -16,13 +16,12 @@
 
 import logging
 import os
-import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from enum import Enum
 from typing import Iterable, Optional, Tuple
 
 from ordered_set import OrderedSet
-from pydantic import Field, root_validator, validator
+from pydantic import validator
 
 from onetl._internal import generate_temp_path
 from onetl.base import BaseFileConnection
@@ -30,13 +29,13 @@
 from onetl.base.pure_path_protocol import PurePathProtocol
 from onetl.exception import DirectoryNotFoundError, NotAFileError
 from onetl.file.file_set import FileSet
-from onetl.file.file_uploader.upload_result import UploadResult
+from onetl.file.file_uploader.options import FileUploaderOptions
+from onetl.file.file_uploader.result import UploadResult
 from onetl.hooks import slot, support_hooks
 from onetl.impl import (
     FailedLocalFile,
     FileExistBehavior,
     FrozenModel,
-    GenericOptions,
     LocalPath,
     RemotePath,
     path_repr,
@@ -142,48 +141,6 @@ class FileUploader(FrozenModel):
 
     """
 
-    class Options(GenericOptions):
-        """File uploading options"""
-
-        if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
-        """
-        How to handle existing files in the target directory.
-
-        Possible values:
-            * ``error`` (default) - do nothing, mark file as failed
-            * ``ignore`` - do nothing, mark file as ignored
-            * ``overwrite`` - replace existing file with a new one
-            * ``delete_all`` - delete local directory content before downloading files
-        """
-
-        delete_local: bool = False
-        """
-        If ``True``, remove local file after successful download.
-
-        If download failed, file will left intact.
-        """
-
-        workers: int = Field(default=1, ge=1)
-        """
-        Number of workers to create for parallel file upload.
-
-        1 (default) means files will me uploaded sequentially.
-        2 or more means files will be uploaded in parallel workers.
-
-        Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
-        """
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `FileUploader.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `FileUploader.Options(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
     connection: BaseFileConnection
 
     target_path: RemotePath
@@ -191,7 +148,9 @@ def mode_is_deprecated(cls, values):
     local_path: Optional[LocalPath] = None
     temp_path: Optional[RemotePath] = None
 
-    options: Options = Options()
+    options: FileUploaderOptions = FileUploaderOptions()
+
+    Options = FileUploaderOptions
 
     @slot
     def run(self, files: Iterable[str | os.PathLike] | None = None) -> UploadResult:
diff --git a/onetl/file/file_uploader/options.py b/onetl/file/file_uploader/options.py
new file mode 100644
index 000000000..e3dd78bb3
--- /dev/null
+++ b/onetl/file/file_uploader/options.py
@@ -0,0 +1,64 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+
+from pydantic import Field, root_validator
+
+from onetl.impl import FileExistBehavior, GenericOptions
+
+
+class FileUploaderOptions(GenericOptions):
+    """File uploading options"""
+
+    if_exists: FileExistBehavior = Field(default=FileExistBehavior.ERROR, alias="mode")
+    """
+    How to handle existing files in the target directory.
+
+    Possible values:
+        * ``error`` (default) - do nothing, mark file as failed
+        * ``ignore`` - do nothing, mark file as ignored
+        * ``overwrite`` - replace existing file with a new one
+        * ``delete_all`` - delete local directory content before downloading files
+    """
+
+    delete_local: bool = False
+    """
+    If ``True``, remove local file after successful download.
+
+    If download failed, file will left intact.
+    """
+
+    workers: int = Field(default=1, ge=1)
+    """
+    Number of workers to create for parallel file upload.
+
+    1 (default) means files will me uploaded sequentially.
+    2 or more means files will be uploaded in parallel workers.
+
+    Recommended value is ``min(32, os.cpu_count() + 4)``, e.g. ``5``.
+    """
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `FileUploader.Options(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `FileUploader.Options(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/onetl/file/file_uploader/upload_result.py b/onetl/file/file_uploader/result.py
similarity index 100%
rename from onetl/file/file_uploader/upload_result.py
rename to onetl/file/file_uploader/result.py

From 0b978e5c494827e8ef8a8e7568315a0893bbbef3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 09:57:02 +0000
Subject: [PATCH 07/30] [DOP-8140] Split JDBCConnection.Dialect and Options to
 separated submodule

---
 onetl/connection/db_connection/clickhouse.py  |  35 +-
 onetl/connection/db_connection/greenplum.py   |  23 +-
 .../db_connection/jdbc_connection.py          | 906 ------------------
 .../db_connection/jdbc_connection/__init__.py |  22 +
 .../jdbc_connection/connection.py             | 379 ++++++++
 .../db_connection/jdbc_connection/dialect.py  |  39 +
 .../db_connection/jdbc_connection/options.py  | 523 ++++++++++
 .../db_connection/jdbc_mixin/__init__.py      |  19 +
 .../connection.py}                            |  79 +-
 .../db_connection/jdbc_mixin/options.py       |  65 ++
 onetl/connection/db_connection/mssql.py       |  26 +-
 onetl/connection/db_connection/mysql.py       |  28 +-
 onetl/connection/db_connection/oracle.py      |  36 +-
 onetl/connection/db_connection/postgres.py    |  30 +-
 onetl/connection/db_connection/teradata.py    |  26 +-
 setup.cfg                                     |   2 +-
 .../test_clickhouse_reader_integration.py     |  10 +-
 .../test_kafka_reader_integration.py          |   2 +-
 .../test_mssql_reader_integration.py          |  10 +-
 .../test_mysql_reader_integration.py          |  10 +-
 .../test_oracle_reader_integration.py         |  10 +-
 .../test_postgres_reader_integration.py       |  10 +-
 .../test_db_options_unit.py                   |   2 +-
 .../test_jdbc_options_unit.py                 |   4 +-
 24 files changed, 1267 insertions(+), 1029 deletions(-)
 delete mode 100644 onetl/connection/db_connection/jdbc_connection.py
 create mode 100644 onetl/connection/db_connection/jdbc_connection/__init__.py
 create mode 100644 onetl/connection/db_connection/jdbc_connection/connection.py
 create mode 100644 onetl/connection/db_connection/jdbc_connection/dialect.py
 create mode 100644 onetl/connection/db_connection/jdbc_connection/options.py
 create mode 100644 onetl/connection/db_connection/jdbc_mixin/__init__.py
 rename onetl/connection/db_connection/{jdbc_mixin.py => jdbc_mixin/connection.py} (91%)
 create mode 100644 onetl/connection/db_connection/jdbc_mixin/options.py

diff --git a/onetl/connection/db_connection/clickhouse.py b/onetl/connection/db_connection/clickhouse.py
index 12a1e5126..defa63eb1 100644
--- a/onetl/connection/db_connection/clickhouse.py
+++ b/onetl/connection/db_connection/clickhouse.py
@@ -19,10 +19,19 @@
 from datetime import date, datetime
 from typing import ClassVar, Optional
 
+from deprecated import deprecated
+
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_mixin import StatementType
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin import JDBCStatementType
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
+from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `Clickhouse.get_packages()` for creating Spark session
 
@@ -30,6 +39,11 @@
 log = logging.getLogger(__name__)
 
 
+class ClickhouseExtra(GenericOptions):
+    class Config:
+        extra = "allow"
+
+
 @support_hooks
 class Clickhouse(JDBCConnection):
     """Clickhouse JDBC connection. |support_hooks|
@@ -125,6 +139,9 @@ class Clickhouse(JDBCConnection):
 
     port: int = 8123
     database: Optional[str] = None
+    extra: ClickhouseExtra = ClickhouseExtra()
+
+    Extra = ClickhouseExtra
 
     DRIVER: ClassVar[str] = "ru.yandex.clickhouse.ClickHouseDriver"
 
@@ -163,7 +180,7 @@ def jdbc_url(self) -> str:
 
         return f"jdbc:clickhouse://{self.host}:{self.port}?{parameters}".rstrip("?")
 
-    class Dialect(JDBCConnection.Dialect):
+    class Dialect(JDBCDialect):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
             result = value.strftime("%Y-%m-%d %H:%M:%S")
@@ -174,7 +191,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.strftime("%Y-%m-%d")
             return f"CAST('{result}' AS Date)"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
             return f"modulo(halfMD5({partition_column}), {num_partitions})"
@@ -183,12 +200,20 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"{partition_column} % {num_partitions}"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
 
     @staticmethod
     def _build_statement(
         statement: str,
-        statement_type: StatementType,
+        statement_type: JDBCStatementType,
         jdbc_connection,
         statement_args,
     ):
diff --git a/onetl/connection/db_connection/greenplum.py b/onetl/connection/db_connection/greenplum.py
index 41e709323..369d75503 100644
--- a/onetl/connection/db_connection/greenplum.py
+++ b/onetl/connection/db_connection/greenplum.py
@@ -43,6 +43,7 @@
     SupportWhereStr,
 )
 from onetl.connection.db_connection.jdbc_mixin import JDBCMixin
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.exception import MISSING_JVM_CLASS_MSG, TooManyParallelJobsError
 from onetl.hooks import slot, support_hooks
 from onetl.hwm import Statement
@@ -233,9 +234,9 @@ class Extra(GenericOptions):
 
         class Config:
             extra = "allow"
-            prohibited_options = JDBCMixin.JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS
+            prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS
 
-    class ReadOptions(JDBCMixin.JDBCOptions):
+    class ReadOptions(JDBCOptions):
         """Pivotal's Greenplum Spark connector reading options.
 
         .. note ::
@@ -267,10 +268,7 @@ class ReadOptions(JDBCMixin.JDBCOptions):
         class Config:
             known_options = READ_OPTIONS
             prohibited_options = (
-                JDBCMixin.JDBCOptions.Config.prohibited_options
-                | EXTRA_OPTIONS
-                | GENERIC_PROHIBITED_OPTIONS
-                | WRITE_OPTIONS
+                JDBCOptions.Config.prohibited_options | EXTRA_OPTIONS | GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
             )
 
         partition_column: Optional[str] = Field(alias="partitionColumn")
@@ -370,7 +368,7 @@ class Config:
             or both should be ``None``
         """
 
-    class WriteOptions(JDBCMixin.JDBCOptions):
+    class WriteOptions(JDBCOptions):
         """Pivotal's Greenplum Spark connector writing options.
 
         .. note ::
@@ -403,10 +401,7 @@ class WriteOptions(JDBCMixin.JDBCOptions):
         class Config:
             known_options = WRITE_OPTIONS
             prohibited_options = (
-                JDBCMixin.JDBCOptions.Config.prohibited_options
-                | EXTRA_OPTIONS
-                | GENERIC_PROHIBITED_OPTIONS
-                | READ_OPTIONS
+                JDBCOptions.Config.prohibited_options | EXTRA_OPTIONS | GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
             )
 
         if_exists: GreenplumTableExistBehavior = Field(default=GreenplumTableExistBehavior.APPEND, alias="mode")
@@ -644,7 +639,7 @@ def get_df_schema(
         self,
         source: str,
         columns: list[str] | None = None,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
+        options: JDBCOptions | dict | None = None,
     ) -> StructType:
         log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
 
@@ -667,7 +662,7 @@ def get_min_max_bounds(
         expression: str | None = None,
         hint: str | None = None,
         where: str | None = None,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
+        options: JDBCOptions | dict | None = None,
     ) -> tuple[Any, Any]:
         log.info("|Spark| Getting min and max values for column %r", column)
 
@@ -737,7 +732,7 @@ def _connector_params(
             **extra,
         }
 
-    def _options_to_connection_properties(self, options: JDBCMixin.JDBCOptions):
+    def _options_to_connection_properties(self, options: JDBCOptions):
         # See https://github.com/pgjdbc/pgjdbc/pull/1252
         # Since 42.2.9 Postgres JDBC Driver added new option readOnlyMode=transaction
         # Which is not a desired behavior, because `.fetch()` method should always be read-only
diff --git a/onetl/connection/db_connection/jdbc_connection.py b/onetl/connection/db_connection/jdbc_connection.py
deleted file mode 100644
index 296f1766c..000000000
--- a/onetl/connection/db_connection/jdbc_connection.py
+++ /dev/null
@@ -1,906 +0,0 @@
-#  Copyright 2023 MTS (Mobile Telesystems)
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from __future__ import annotations
-
-import logging
-import secrets
-import warnings
-from enum import Enum
-from typing import TYPE_CHECKING, Any, Optional
-
-from deprecated import deprecated
-from etl_entities.instance import Host
-from pydantic import Field, PositiveInt, root_validator
-
-from onetl._internal import clear_statement, get_sql_query, to_camel
-from onetl.connection.db_connection.db_connection import DBConnection, DBDialect
-from onetl.connection.db_connection.dialect_mixins import (
-    SupportColumnsList,
-    SupportDfSchemaNone,
-    SupportHintStr,
-    SupportHWMColumnStr,
-    SupportHWMExpressionStr,
-    SupportWhereStr,
-)
-from onetl.connection.db_connection.dialect_mixins.support_table_with_dbschema import (
-    SupportTableWithDBSchema,
-)
-from onetl.connection.db_connection.jdbc_mixin import JDBCMixin
-from onetl.hooks import slot, support_hooks
-from onetl.hwm import Statement
-from onetl.impl.generic_options import GenericOptions
-from onetl.log import log_lines, log_with_indent
-
-if TYPE_CHECKING:
-    from pyspark.sql import DataFrame
-    from pyspark.sql.types import StructType
-
-log = logging.getLogger(__name__)
-
-# options from spark.read.jdbc which are populated by JDBCConnection methods
-GENERIC_PROHIBITED_OPTIONS = frozenset(
-    (
-        "table",
-        "dbtable",
-        "query",
-        "properties",
-    ),
-)
-
-READ_WRITE_OPTIONS = frozenset(
-    (
-        "keytab",
-        "principal",
-        "refreshKrb5Config",
-        "connectionProvider",
-    ),
-)
-
-WRITE_OPTIONS = frozenset(
-    (
-        "mode",
-        "column",  # in some part of Spark source code option 'partitionColumn' is called just 'column'
-        "batchsize",
-        "isolationLevel",
-        "isolation_level",
-        "truncate",
-        "cascadeTruncate",
-        "createTableOptions",
-        "createTableColumnTypes",
-        "createTableColumnTypes",
-    ),
-)
-
-READ_OPTIONS = frozenset(
-    (
-        "column",  # in some part of Spark source code option 'partitionColumn' is called just 'column'
-        "partitionColumn",
-        "partition_column",
-        "lowerBound",
-        "lower_bound",
-        "upperBound",
-        "upper_bound",
-        "numPartitions",
-        "num_partitions",
-        "fetchsize",
-        "sessionInitStatement",
-        "session_init_statement",
-        "customSchema",
-        "pushDownPredicate",
-        "pushDownAggregate",
-        "pushDownLimit",
-        "pushDownTableSample",
-        "predicates",
-    ),
-)
-
-
-# parameters accepted by spark.read.jdbc method:
-#  spark.read.jdbc(
-#    url, table, column, lowerBound, upperBound, numPartitions, predicates
-#    properties:  { "user" : "SYSTEM", "password" : "mypassword", ... })
-READ_TOP_LEVEL_OPTIONS = frozenset(("url", "column", "lower_bound", "upper_bound", "num_partitions", "predicates"))
-
-# parameters accepted by spark.write.jdbc method:
-#   spark.write.jdbc(
-#     url, table, mode,
-#     properties:  { "user" : "SYSTEM", "password" : "mypassword", ... })
-WRITE_TOP_LEVEL_OPTIONS = frozenset("url")
-
-
-class JDBCTableExistBehavior(str, Enum):
-    APPEND = "append"
-    REPLACE_ENTIRE_TABLE = "replace_entire_table"
-
-    def __str__(self) -> str:
-        return str(self.value)
-
-    @classmethod  # noqa: WPS120
-    def _missing_(cls, value: object):  # noqa: WPS120
-        if str(value) == "overwrite":
-            warnings.warn(
-                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                "Use `replace_entire_table` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_ENTIRE_TABLE
-
-
-class PartitioningMode(str, Enum):
-    range = "range"
-    hash = "hash"
-    mod = "mod"
-
-    def __str__(self):
-        return str(self.value)
-
-
-@support_hooks
-class JDBCConnection(SupportDfSchemaNone, JDBCMixin, DBConnection):
-    class Extra(GenericOptions):
-        class Config:
-            extra = "allow"
-
-    class Dialect(  # noqa: WPS215
-        SupportTableWithDBSchema,
-        SupportColumnsList,
-        SupportDfSchemaNone,
-        SupportWhereStr,
-        SupportHintStr,
-        SupportHWMExpressionStr,
-        SupportHWMColumnStr,
-        DBDialect,
-    ):
-        pass
-
-    class ReadOptions(JDBCMixin.JDBCOptions):
-        """Spark JDBC options.
-
-        .. note ::
-
-            You can pass any value
-            `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_,
-            even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
-
-            The set of supported options depends on Spark version. See link above.
-
-        Examples
-        --------
-
-        Read options initialization
-
-        .. code:: python
-
-            options = JDBC.ReadOptions(
-                partitionColumn="reg_id",
-                numPartitions=10,
-                lowerBound=0,
-                upperBound=1000,
-                someNewOption="value",
-            )
-        """
-
-        class Config:
-            known_options = READ_OPTIONS | READ_WRITE_OPTIONS
-            prohibited_options = (
-                JDBCMixin.JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
-            )
-            alias_generator = to_camel
-
-        # Options in DataFrameWriter.jdbc() method
-        partition_column: Optional[str] = None
-        """Column used to parallelize reading from a table.
-
-        .. warning::
-            It is highly recommended to use primary key, or at least a column with an index
-            to avoid performance issues.
-
-        .. note::
-            Column type depends on :obj:`~partitioning_mode`.
-
-           * ``partitioning_mode="range"`` requires column to be an integer or date (can be NULL, but not recommended).
-           * ``partitioning_mode="hash"`` requires column to be an string (NOT NULL).
-           * ``partitioning_mode="mod"`` requires column to be an integer (NOT NULL).
-
-
-        See documentation for :obj:`~partitioning_mode` for more details"""
-
-        num_partitions: PositiveInt = 1
-        """Number of jobs created by Spark to read the table content in parallel.
-        See documentation for :obj:`~partitioning_mode` for more details"""
-
-        lower_bound: Optional[int] = None
-        """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
-
-        upper_bound: Optional[int] = None
-        """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
-
-        session_init_statement: Optional[str] = None
-        '''After each database session is opened to the remote DB and before starting to read data,
-        this option executes a custom SQL statement (or a PL/SQL block).
-
-        Use this to implement session initialization code.
-
-        Example:
-
-        .. code:: python
-
-            sessionInitStatement = """
-                BEGIN
-                    execute immediate
-                    'alter session set "_serial_direct_read"=true';
-                END;
-            """
-        '''
-
-        fetchsize: int = 100_000
-        """Fetch N rows from an opened cursor per one read round.
-
-        Tuning this option can influence performance of reading.
-
-        .. warning::
-
-            Default value is different from Spark.
-
-            Spark uses driver's own value, and it may be different in different drivers,
-            and even versions of the same driver. For example, Oracle has
-            default ``fetchsize=10``, which is absolutely not usable.
-
-            Thus we've overridden default value with ``100_000``, which should increase reading performance.
-        """
-
-        partitioning_mode: PartitioningMode = PartitioningMode.range
-        """Defines how Spark will parallelize reading from table.
-
-        Possible values:
-
-        * ``range`` (default)
-            Allocate each executor a range of values from column passed into :obj:`~partition_column`.
-
-            Spark generates for each executor an SQL query like:
-
-            Executor 1:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (partition_column >= lowerBound
-                        OR partition_column IS NULL)
-                AND partition_column < (lower_bound + stride)
-
-            Executor 2:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE partition_column >= (lower_bound + stride)
-                AND partition_column < (lower_bound + 2 * stride)
-
-            ...
-
-            Executor N:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE partition_column >= (lower_bound + (N-1) * stride)
-                AND partition_column <= upper_bound
-
-            Where ``stride=(upper_bound - lower_bound) / num_partitions``.
-
-            .. note::
-
-                :obj:`~lower_bound`, :obj:`~upper_bound` and :obj:`~num_partitions` are used just to
-                calculate the partition stride, **NOT** for filtering the rows in table.
-                So all rows in the table will be returned (unlike *Incremental* :ref:`strategy`).
-
-            .. note::
-
-                All queries are executed in parallel. To execute them sequentially, use *Batch* :ref:`strategy`.
-
-        * ``hash``
-            Allocate each executor a set of values based on hash of the :obj:`~partition_column` column.
-
-            Spark generates for each executor an SQL query like:
-
-            Executor 1:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (some_hash(partition_column) mod num_partitions) = 0 -- lower_bound
-
-            Executor 2:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (some_hash(partition_column) mod num_partitions) = 1 -- lower_bound + 1
-
-            ...
-
-            Executor N:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (some_hash(partition_column) mod num_partitions) = num_partitions-1 -- upper_bound
-
-            .. note::
-
-                The hash function implementation depends on RDBMS. It can be ``MD5`` or any other fast hash function,
-                or expression based on this function call.
-
-        * ``mod``
-            Allocate each executor a set of values based on modulus of the :obj:`~partition_column` column.
-
-            Spark generates for each executor an SQL query like:
-
-            Executor 1:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (partition_column mod num_partitions) = 0 -- lower_bound
-
-            Executor 2:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (partition_column mod num_partitions) = 1 -- lower_bound + 1
-
-            Executor N:
-
-            .. code:: sql
-
-                SELECT ... FROM table
-                WHERE (partition_column mod num_partitions) = num_partitions-1 -- upper_bound
-
-        Examples
-        --------
-
-        Read data in 10 parallel jobs by range of values in ``id_column`` column:
-
-        .. code:: python
-
-            Postgres.ReadOptions(
-                partitioning_mode="range",  # default mode, can be omitted
-                partition_column="id_column",
-                num_partitions=10,
-                # if you're using DBReader, options below can be omitted
-                # because they are calculated by automatically as
-                # MIN and MAX values of `partition_column`
-                lower_bound=0,
-                upper_bound=100_000,
-            )
-
-        Read data in 10 parallel jobs by hash of values in ``some_column`` column:
-
-        .. code:: python
-
-            Postgres.ReadOptions(
-                partitioning_mode="hash",
-                partition_column="some_column",
-                num_partitions=10,
-                # lower_bound and upper_bound are automatically set to `0` and `9`
-            )
-
-        Read data in 10 parallel jobs by modulus of values in ``id_column`` column:
-
-        .. code:: python
-
-            Postgres.ReadOptions(
-                partitioning_mode="mod",
-                partition_column="id_column",
-                num_partitions=10,
-                # lower_bound and upper_bound are automatically set to `0` and `9`
-            )
-        """
-
-        @root_validator
-        def partitioning_mode_actions(cls, values):
-            mode = values["partitioning_mode"]
-            num_partitions = values.get("num_partitions")
-            partition_column = values.get("partition_column")
-            lower_bound = values.get("lower_bound")
-            upper_bound = values.get("upper_bound")
-
-            if not partition_column:
-                if num_partitions == 1:
-                    return values
-
-                raise ValueError("You should set partition_column to enable partitioning")
-
-            elif num_partitions == 1:
-                raise ValueError("You should set num_partitions > 1 to enable partitioning")
-
-            if mode == PartitioningMode.range:
-                return values
-
-            if mode == PartitioningMode.hash:
-                values["partition_column"] = cls._get_partition_column_hash(
-                    partition_column=partition_column,
-                    num_partitions=num_partitions,
-                )
-
-            if mode == PartitioningMode.mod:
-                values["partition_column"] = cls._get_partition_column_mod(
-                    partition_column=partition_column,
-                    num_partitions=num_partitions,
-                )
-
-            values["lower_bound"] = lower_bound if lower_bound is not None else 0
-            values["upper_bound"] = upper_bound if upper_bound is not None else num_partitions
-
-            return values
-
-    class WriteOptions(JDBCMixin.JDBCOptions):
-        """Spark JDBC writing options.
-
-        .. note ::
-
-            You can pass any value
-            `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_,
-            even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
-
-            The set of supported options depends on Spark version. See link above.
-
-        Examples
-        --------
-
-        Write options initialization
-
-        .. code:: python
-
-            options = JDBC.WriteOptions(if_exists="append", batchsize=20_000, someNewOption="value")
-        """
-
-        class Config:
-            known_options = WRITE_OPTIONS | READ_WRITE_OPTIONS
-            prohibited_options = (
-                JDBCMixin.JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
-            )
-            alias_generator = to_camel
-
-        if_exists: JDBCTableExistBehavior = Field(default=JDBCTableExistBehavior.APPEND, alias="mode")
-        """Behavior of writing data into existing table.
-
-        Possible values:
-            * ``append`` (default)
-                Adds new rows into existing table.
-
-                .. dropdown:: Behavior in details
-
-                * Table does not exist
-                    Table is created using options provided by user
-                    (``createTableOptions``, ``createTableColumnTypes``, etc).
-
-                * Table exists
-                    Data is appended to a table. Table has the same DDL as before writing data
-
-                    .. warning::
-
-                        This mode does not check whether table already contains
-                        rows from dataframe, so duplicated rows can be created.
-
-                        Also Spark does not support passing custom options to
-                        insert statement, like ``ON CONFLICT``, so don't try to
-                        implement deduplication using unique indexes or constraints.
-
-                        Instead, write to staging table and perform deduplication
-                        using :obj:`~execute` method.
-
-            * ``replace_entire_table``
-                **Table is dropped and then created, or truncated**.
-
-                .. dropdown:: Behavior in details
-
-                * Table does not exist
-                    Table is created using options provided by user
-                    (``createTableOptions``, ``createTableColumnTypes``, etc).
-
-                * Table exists
-                    Table content is replaced with dataframe content.
-
-                    After writing completed, target table could either have the same DDL as
-                    before writing data (``truncate=True``), or can be recreated (``truncate=False``
-                    or source does not support truncation).
-
-        .. note::
-
-            ``error`` and ``ignore`` modes are not supported.
-        """
-
-        batchsize: int = 20_000
-        """How many rows can be inserted per round trip.
-
-        Tuning this option can influence performance of writing.
-
-        .. warning::
-
-            Default value is different from Spark.
-
-            Spark uses quite small value ``1000``, which is absolutely not usable
-            in BigData world.
-
-            Thus we've overridden default value with ``20_000``,
-            which should increase writing performance.
-
-            You can increase it even more, up to ``50_000``,
-            but it depends on your database load and number of columns in the row.
-            Higher values does not increase performance.
-        """
-
-        isolation_level: str = "READ_UNCOMMITTED"
-        """The transaction isolation level, which applies to current connection.
-
-        Possible values:
-            * ``NONE`` (as string, not Python's ``None``)
-            * ``READ_COMMITTED``
-            * ``READ_UNCOMMITTED``
-            * ``REPEATABLE_READ``
-            * ``SERIALIZABLE``
-
-        Values correspond to transaction isolation levels defined by JDBC standard.
-        Please refer the documentation for
-        `java.sql.Connection <https://docs.oracle.com/javase/8/docs/api/java/sql/Connection.html>`_.
-        """
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `WriteOptions(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, WriteOptions):
-        class Config:
-            prohibited_options = JDBCMixin.JDBCOptions.Config.prohibited_options
-
-    host: Host
-    port: int
-    extra: Extra = Extra()
-
-    @property
-    def instance_url(self) -> str:
-        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
-
-    @slot
-    def sql(
-        self,
-        query: str,
-        options: ReadOptions | dict | None = None,
-    ) -> DataFrame:
-        """
-        **Lazily** execute SELECT statement **on Spark executor** and return DataFrame. |support_hooks|
-
-        Same as ``spark.read.jdbc(query)``.
-
-        .. note::
-
-            This method does not support :ref:`strategy`,
-            use :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` instead
-
-        .. note::
-
-            Statement is executed in read-write connection,
-            so if you're calling some functions/procedures with DDL/DML statements inside,
-            they can change data in your database.
-
-            Unfortunately, Spark does no provide any option to change this behavior.
-
-        Parameters
-        ----------
-        query : str
-
-            SQL query to be executed.
-
-            Only ``SELECT ... FROM ...`` form is supported.
-
-            Some databases also supports ``WITH ... AS (...) SELECT ... FROM ...`` form.
-
-            Queries like ``SHOW ...`` are not supported.
-
-            .. warning::
-
-                The exact syntax **depends on RDBMS** is being used.
-
-        options : dict, :obj:`~ReadOptions`, default: ``None``
-
-            Spark options to be used while fetching data, like ``fetchsize`` or ``partitionColumn``
-
-        Returns
-        -------
-        df : pyspark.sql.dataframe.DataFrame
-
-            Spark dataframe
-
-        Examples
-        --------
-
-        Read data from a table:
-
-        .. code:: python
-
-            df = connection.sql("SELECT * FROM mytable")
-
-        Read data from a table with options:
-
-        .. code:: python
-
-            # reads data from table in batches, 10000 rows per batch
-            df = connection.sql("SELECT * FROM mytable", {"fetchsize": 10000})
-            assert df.count()
-
-        """
-
-        query = clear_statement(query)
-
-        log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
-        log_lines(log, query)
-
-        df = self._query_on_executor(query, self.ReadOptions.parse(options))
-
-        log.info("|Spark| DataFrame successfully created from SQL statement ")
-        return df
-
-    @slot
-    def read_source_as_df(
-        self,
-        source: str,
-        columns: list[str] | None = None,
-        hint: str | None = None,
-        where: str | None = None,
-        df_schema: StructType | None = None,
-        start_from: Statement | None = None,
-        end_at: Statement | None = None,
-        options: ReadOptions | dict | None = None,
-    ) -> DataFrame:
-        read_options = self._set_lower_upper_bound(
-            table=source,
-            where=where,
-            hint=hint,
-            options=self.ReadOptions.parse(options).copy(exclude={"if_exists", "partitioning_mode"}),
-        )
-
-        # hack to avoid column name verification
-        # in the spark, the expression in the partitioning of the column must
-        # have the same name as the field in the table ( 2.4 version )
-        # https://github.com/apache/spark/pull/21379
-
-        new_columns = columns or ["*"]
-        alias = "x" + secrets.token_hex(5)
-
-        if read_options.partition_column:
-            aliased = self.Dialect._expression_with_alias(read_options.partition_column, alias)
-            read_options = read_options.copy(update={"partition_column": alias})
-            new_columns.append(aliased)
-
-        where = self.Dialect._condition_assembler(condition=where, start_from=start_from, end_at=end_at)
-
-        query = get_sql_query(
-            table=source,
-            columns=new_columns,
-            where=where,
-            hint=hint,
-        )
-
-        result = self.sql(query, read_options)
-
-        if read_options.partition_column:
-            result = result.drop(alias)
-
-        return result
-
-    @slot
-    def write_df_to_target(
-        self,
-        df: DataFrame,
-        target: str,
-        options: WriteOptions | dict | None = None,
-    ) -> None:
-        write_options = self.WriteOptions.parse(options)
-        jdbc_params = self.options_to_jdbc_params(write_options)
-
-        mode = "append" if write_options.if_exists == JDBCTableExistBehavior.APPEND else "overwrite"
-        log.info("|%s| Saving data to a table %r", self.__class__.__name__, target)
-        df.write.jdbc(table=target, mode=mode, **jdbc_params)
-        log.info("|%s| Table %r successfully written", self.__class__.__name__, target)
-
-    @slot
-    def get_df_schema(
-        self,
-        source: str,
-        columns: list[str] | None = None,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
-    ) -> StructType:
-        log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
-
-        query = get_sql_query(source, columns=columns, where="1=0", compact=True)
-        read_options = self._exclude_partition_options(options, fetchsize=0)
-
-        log.debug("|%s| Executing SQL query (on driver):", self.__class__.__name__)
-        log_lines(log, query, level=logging.DEBUG)
-
-        df = self._query_on_driver(query, read_options)
-        log.info("|%s| Schema fetched", self.__class__.__name__)
-
-        return df.schema
-
-    def options_to_jdbc_params(
-        self,
-        options: ReadOptions | WriteOptions,
-    ) -> dict:
-        # Have to replace the <partitionColumn> parameter with <column>
-        # since the method takes the named <column> parameter
-        # link to source below
-        # https://github.com/apache/spark/blob/2ef8ced27a6b0170a691722a855d3886e079f037/python/pyspark/sql/readwriter.py#L465
-
-        partition_column = getattr(options, "partition_column", None)
-        if partition_column:
-            options = options.copy(
-                update={"column": partition_column},
-                exclude={"partition_column"},
-            )
-
-        result = self._get_jdbc_properties(
-            options,
-            include=READ_TOP_LEVEL_OPTIONS | WRITE_TOP_LEVEL_OPTIONS,
-            exclude={"if_exists"},
-            exclude_none=True,
-        )
-
-        result["properties"] = self._get_jdbc_properties(
-            options,
-            exclude=READ_TOP_LEVEL_OPTIONS | WRITE_TOP_LEVEL_OPTIONS | {"if_exists"},
-            exclude_none=True,
-        )
-
-        result["properties"].pop("partitioningMode", None)
-
-        return result
-
-    @slot
-    def get_min_max_bounds(
-        self,
-        source: str,
-        column: str,
-        expression: str | None = None,
-        hint: str | None = None,
-        where: str | None = None,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
-    ) -> tuple[Any, Any]:
-        log.info("|Spark| Getting min and max values for column %r", column)
-
-        read_options = self._exclude_partition_options(options, fetchsize=1)
-
-        query = get_sql_query(
-            table=source,
-            columns=[
-                self.Dialect._expression_with_alias(
-                    self.Dialect._get_min_value_sql(expression or column),
-                    "min",
-                ),
-                self.Dialect._expression_with_alias(
-                    self.Dialect._get_max_value_sql(expression or column),
-                    "max",
-                ),
-            ],
-            where=where,
-            hint=hint,
-        )
-
-        log.info("|%s| Executing SQL query (on driver):", self.__class__.__name__)
-        log_lines(log, query)
-
-        df = self._query_on_driver(query, read_options)
-        row = df.collect()[0]
-        min_value = row["min"]
-        max_value = row["max"]
-
-        log.info("|Spark| Received values:")
-        log_with_indent(log, "MIN(%s) = %r", column, min_value)
-        log_with_indent(log, "MAX(%s) = %r", column, max_value)
-
-        return min_value, max_value
-
-    def _query_on_executor(
-        self,
-        query: str,
-        options: ReadOptions,
-    ) -> DataFrame:
-        jdbc_params = self.options_to_jdbc_params(options)
-        return self.spark.read.jdbc(table=f"({query}) T", **jdbc_params)
-
-    def _exclude_partition_options(
-        self,
-        options: JDBCMixin.JDBCOptions | dict | None,
-        fetchsize: int,
-    ) -> JDBCMixin.JDBCOptions:
-        return self.JDBCOptions.parse(options).copy(
-            update={"fetchsize": fetchsize},
-            exclude={"partition_column", "lower_bound", "upper_bound", "num_partitions", "partitioning_mode"},
-        )
-
-    def _set_lower_upper_bound(
-        self,
-        table: str,
-        hint: str | None = None,
-        where: str | None = None,
-        options: ReadOptions | dict | None = None,
-    ) -> ReadOptions:
-        """
-        Determine values of upperBound and lowerBound options
-        """
-
-        result_options = self.ReadOptions.parse(options)
-
-        if not result_options.partition_column:
-            return result_options
-
-        missing_values: list[str] = []
-
-        is_missed_lower_bound = result_options.lower_bound is None
-        is_missed_upper_bound = result_options.upper_bound is None
-
-        if is_missed_lower_bound:
-            missing_values.append("lowerBound")
-
-        if is_missed_upper_bound:
-            missing_values.append("upperBound")
-
-        if not missing_values:
-            return result_options
-
-        log.warning(
-            "|Spark| Passed numPartitions = %d, but values %r are not set. "
-            "They will be detected automatically based on values in partitionColumn %r",
-            result_options.num_partitions,
-            missing_values,
-            result_options.partition_column,
-        )
-
-        min_partition_value, max_partition_value = self.get_min_max_bounds(
-            source=table,
-            column=result_options.partition_column,
-            where=where,
-            hint=hint,
-            options=result_options,
-        )
-
-        # The sessionInitStatement parameter is removed because it only needs to be applied once.
-        return result_options.copy(
-            exclude={"session_init_statement"},
-            update={
-                "lower_bound": result_options.lower_bound if not is_missed_lower_bound else min_partition_value,
-                "upper_bound": result_options.upper_bound if not is_missed_upper_bound else max_partition_value,
-            },
-        )
-
-    def _log_parameters(self):
-        super()._log_parameters()
-        log_with_indent(log, "jdbc_url = %r", self.jdbc_url)
diff --git a/onetl/connection/db_connection/jdbc_connection/__init__.py b/onetl/connection/db_connection/jdbc_connection/__init__.py
new file mode 100644
index 000000000..9c11532c1
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_connection/__init__.py
@@ -0,0 +1,22 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.jdbc_connection.connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCPartitioningMode,
+    JDBCReadOptions,
+    JDBCTableExistBehavior,
+    JDBCWriteOptions,
+)
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
new file mode 100644
index 000000000..3fc754c72
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -0,0 +1,379 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import logging
+import secrets
+from typing import TYPE_CHECKING, Any
+
+from etl_entities.instance import Host
+
+from onetl._internal import clear_statement, get_sql_query
+from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCTableExistBehavior,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin import JDBCMixin
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.hooks import slot, support_hooks
+from onetl.hwm import Statement
+from onetl.log import log_lines, log_with_indent
+
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame
+    from pyspark.sql.types import StructType
+
+log = logging.getLogger(__name__)
+
+# parameters accepted by spark.read.jdbc method:
+#  spark.read.jdbc(
+#    url, table, column, lowerBound, upperBound, numPartitions, predicates
+#    properties:  { "user" : "SYSTEM", "password" : "mypassword", ... })
+READ_TOP_LEVEL_OPTIONS = frozenset(("url", "column", "lower_bound", "upper_bound", "num_partitions", "predicates"))
+
+# parameters accepted by spark.write.jdbc method:
+#   spark.write.jdbc(
+#     url, table, mode,
+#     properties:  { "user" : "SYSTEM", "password" : "mypassword", ... })
+WRITE_TOP_LEVEL_OPTIONS = frozenset("url")
+
+
+@support_hooks
+class JDBCConnection(JDBCMixin, DBConnection):
+    host: Host
+    port: int
+
+    ReadOptions = JDBCReadOptions
+    WriteOptions = JDBCWriteOptions
+
+    @property
+    def instance_url(self) -> str:
+        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    @slot
+    def sql(
+        self,
+        query: str,
+        options: JDBCReadOptions | dict | None = None,
+    ) -> DataFrame:
+        """
+        **Lazily** execute SELECT statement **on Spark executor** and return DataFrame. |support_hooks|
+
+        Same as ``spark.read.jdbc(query)``.
+
+        .. note::
+
+            This method does not support :ref:`strategy`,
+            use :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` instead
+
+        .. note::
+
+            Statement is executed in read-write connection,
+            so if you're calling some functions/procedures with DDL/DML statements inside,
+            they can change data in your database.
+
+            Unfortunately, Spark does no provide any option to change this behavior.
+
+        Parameters
+        ----------
+        query : str
+
+            SQL query to be executed.
+
+            Only ``SELECT ... FROM ...`` form is supported.
+
+            Some databases also supports ``WITH ... AS (...) SELECT ... FROM ...`` form.
+
+            Queries like ``SHOW ...`` are not supported.
+
+            .. warning::
+
+                The exact syntax **depends on RDBMS** is being used.
+
+        options : dict, :obj:`~ReadOptions`, default: ``None``
+
+            Spark options to be used while fetching data, like ``fetchsize`` or ``partitionColumn``
+
+        Returns
+        -------
+        df : pyspark.sql.dataframe.DataFrame
+
+            Spark dataframe
+
+        Examples
+        --------
+
+        Read data from a table:
+
+        .. code:: python
+
+            df = connection.sql("SELECT * FROM mytable")
+
+        Read data from a table with options:
+
+        .. code:: python
+
+            # reads data from table in batches, 10000 rows per batch
+            df = connection.sql("SELECT * FROM mytable", {"fetchsize": 10000})
+            assert df.count()
+
+        """
+
+        query = clear_statement(query)
+
+        log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
+        log_lines(log, query)
+
+        df = self._query_on_executor(query, self.ReadOptions.parse(options))
+
+        log.info("|Spark| DataFrame successfully created from SQL statement ")
+        return df
+
+    @slot
+    def read_source_as_df(
+        self,
+        source: str,
+        columns: list[str] | None = None,
+        hint: str | None = None,
+        where: str | None = None,
+        df_schema: StructType | None = None,
+        start_from: Statement | None = None,
+        end_at: Statement | None = None,
+        options: JDBCReadOptions | None = None,
+    ) -> DataFrame:
+        read_options = self._set_lower_upper_bound(
+            table=source,
+            where=where,
+            hint=hint,
+            options=self.ReadOptions.parse(options).copy(exclude={"if_exists", "partitioning_mode"}),
+        )
+
+        # hack to avoid column name verification
+        # in the spark, the expression in the partitioning of the column must
+        # have the same name as the field in the table ( 2.4 version )
+        # https://github.com/apache/spark/pull/21379
+
+        new_columns = columns or ["*"]
+        alias = "x" + secrets.token_hex(5)
+
+        if read_options.partition_column:
+            aliased = self.Dialect._expression_with_alias(read_options.partition_column, alias)
+            read_options = read_options.copy(update={"partition_column": alias})
+            new_columns.append(aliased)
+
+        where = self.Dialect._condition_assembler(condition=where, start_from=start_from, end_at=end_at)
+
+        query = get_sql_query(
+            table=source,
+            columns=new_columns,
+            where=where,
+            hint=hint,
+        )
+
+        result = self.sql(query, read_options)
+
+        if read_options.partition_column:
+            result = result.drop(alias)
+
+        return result
+
+    @slot
+    def write_df_to_target(
+        self,
+        df: DataFrame,
+        target: str,
+        options: JDBCWriteOptions | None = None,
+    ) -> None:
+        write_options = self.WriteOptions.parse(options)
+        jdbc_params = self.options_to_jdbc_params(write_options)
+
+        mode = "append" if write_options.if_exists == JDBCTableExistBehavior.APPEND else "overwrite"
+        log.info("|%s| Saving data to a table %r", self.__class__.__name__, target)
+        df.write.jdbc(table=target, mode=mode, **jdbc_params)
+        log.info("|%s| Table %r successfully written", self.__class__.__name__, target)
+
+    @slot
+    def get_df_schema(
+        self,
+        source: str,
+        columns: list[str] | None = None,
+        options: JDBCReadOptions | None = None,
+    ) -> StructType:
+        log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
+
+        query = get_sql_query(source, columns=columns, where="1=0", compact=True)
+        read_options = self._exclude_partition_options(self.ReadOptions.parse(options), fetchsize=0)
+
+        log.debug("|%s| Executing SQL query (on driver):", self.__class__.__name__)
+        log_lines(log, query, level=logging.DEBUG)
+
+        df = self._query_on_driver(query, read_options)
+        log.info("|%s| Schema fetched", self.__class__.__name__)
+
+        return df.schema
+
+    def options_to_jdbc_params(
+        self,
+        options: JDBCReadOptions | JDBCWriteOptions,
+    ) -> dict:
+        # Have to replace the <partitionColumn> parameter with <column>
+        # since the method takes the named <column> parameter
+        # link to source below
+        # https://github.com/apache/spark/blob/2ef8ced27a6b0170a691722a855d3886e079f037/python/pyspark/sql/readwriter.py#L465
+
+        partition_column = getattr(options, "partition_column", None)
+        if partition_column:
+            options = options.copy(
+                update={"column": partition_column},
+                exclude={"partition_column"},
+            )
+
+        result = self._get_jdbc_properties(
+            options,
+            include=READ_TOP_LEVEL_OPTIONS | WRITE_TOP_LEVEL_OPTIONS,
+            exclude={"if_exists"},
+            exclude_none=True,
+        )
+
+        result["properties"] = self._get_jdbc_properties(
+            options,
+            exclude=READ_TOP_LEVEL_OPTIONS | WRITE_TOP_LEVEL_OPTIONS | {"if_exists"},
+            exclude_none=True,
+        )
+
+        result["properties"].pop("partitioningMode", None)
+
+        return result
+
+    @slot
+    def get_min_max_bounds(
+        self,
+        source: str,
+        column: str,
+        expression: str | None = None,
+        hint: str | None = None,
+        where: str | None = None,
+        options: JDBCReadOptions | None = None,
+    ) -> tuple[Any, Any]:
+        log.info("|Spark| Getting min and max values for column %r", column)
+
+        read_options = self._exclude_partition_options(self.ReadOptions.parse(options), fetchsize=1)
+
+        query = get_sql_query(
+            table=source,
+            columns=[
+                self.Dialect._expression_with_alias(
+                    self.Dialect._get_min_value_sql(expression or column),
+                    "min",
+                ),
+                self.Dialect._expression_with_alias(
+                    self.Dialect._get_max_value_sql(expression or column),
+                    "max",
+                ),
+            ],
+            where=where,
+            hint=hint,
+        )
+
+        log.info("|%s| Executing SQL query (on driver):", self.__class__.__name__)
+        log_lines(log, query)
+
+        df = self._query_on_driver(query, read_options)
+        row = df.collect()[0]
+        min_value = row["min"]
+        max_value = row["max"]
+
+        log.info("|Spark| Received values:")
+        log_with_indent(log, "MIN(%s) = %r", column, min_value)
+        log_with_indent(log, "MAX(%s) = %r", column, max_value)
+
+        return min_value, max_value
+
+    def _query_on_executor(
+        self,
+        query: str,
+        options: JDBCReadOptions,
+    ) -> DataFrame:
+        jdbc_params = self.options_to_jdbc_params(options)
+        return self.spark.read.jdbc(table=f"({query}) T", **jdbc_params)
+
+    def _exclude_partition_options(
+        self,
+        options: JDBCReadOptions,
+        fetchsize: int,
+    ) -> JDBCOptions:
+        return options.copy(
+            update={"fetchsize": fetchsize},
+            exclude={"partition_column", "lower_bound", "upper_bound", "num_partitions", "partitioning_mode"},
+        )
+
+    def _set_lower_upper_bound(
+        self,
+        table: str,
+        hint: str | None = None,
+        where: str | None = None,
+        options: JDBCReadOptions | None = None,
+    ) -> JDBCReadOptions:
+        """
+        Determine values of upperBound and lowerBound options
+        """
+        read_options = self.ReadOptions.parse(options)
+        if not read_options.partition_column:
+            return read_options
+
+        missing_values: list[str] = []
+
+        is_missed_lower_bound = read_options.lower_bound is None
+        is_missed_upper_bound = read_options.upper_bound is None
+
+        if is_missed_lower_bound:
+            missing_values.append("lowerBound")
+
+        if is_missed_upper_bound:
+            missing_values.append("upperBound")
+
+        if not missing_values:
+            return read_options
+
+        log.warning(
+            "|Spark| Passed numPartitions = %d, but values %r are not set. "
+            "They will be detected automatically based on values in partitionColumn %r",
+            read_options.num_partitions,
+            missing_values,
+            read_options.partition_column,
+        )
+
+        min_partition_value, max_partition_value = self.get_min_max_bounds(
+            source=table,
+            column=read_options.partition_column,
+            where=where,
+            hint=hint,
+            options=options,
+        )
+
+        # The sessionInitStatement parameter is removed because it only needs to be applied once.
+        return read_options.copy(
+            exclude={"session_init_statement"},
+            update={
+                "lower_bound": read_options.lower_bound if not is_missed_lower_bound else min_partition_value,
+                "upper_bound": read_options.upper_bound if not is_missed_upper_bound else max_partition_value,
+            },
+        )
+
+    def _log_parameters(self):
+        super()._log_parameters()
+        log_with_indent(log, "jdbc_url = %r", self.jdbc_url)
diff --git a/onetl/connection/db_connection/jdbc_connection/dialect.py b/onetl/connection/db_connection/jdbc_connection/dialect.py
new file mode 100644
index 000000000..c794dcad4
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_connection/dialect.py
@@ -0,0 +1,39 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from onetl.connection.db_connection.db_connection import DBDialect
+from onetl.connection.db_connection.dialect_mixins import (
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportHintStr,
+    SupportHWMColumnStr,
+    SupportHWMExpressionStr,
+    SupportTableWithDBSchema,
+    SupportWhereStr,
+)
+
+
+class JDBCDialect(  # noqa: WPS215
+    SupportTableWithDBSchema,
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportWhereStr,
+    SupportHintStr,
+    SupportHWMExpressionStr,
+    SupportHWMColumnStr,
+    DBDialect,
+):
+    pass
diff --git a/onetl/connection/db_connection/jdbc_connection/options.py b/onetl/connection/db_connection/jdbc_connection/options.py
new file mode 100644
index 000000000..4a5608f22
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_connection/options.py
@@ -0,0 +1,523 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+from abc import abstractmethod
+from enum import Enum
+from typing import Optional
+
+from pydantic import Field, PositiveInt, root_validator
+
+from onetl._internal import to_camel
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+
+# options from spark.read.jdbc which are populated by JDBCConnection methods
+GENERIC_PROHIBITED_OPTIONS = frozenset(
+    (
+        "table",
+        "dbtable",
+        "query",
+        "properties",
+    ),
+)
+
+READ_WRITE_OPTIONS = frozenset(
+    (
+        "keytab",
+        "principal",
+        "refreshKrb5Config",
+        "connectionProvider",
+    ),
+)
+
+WRITE_OPTIONS = frozenset(
+    (
+        "mode",
+        "column",  # in some part of Spark source code option 'partitionColumn' is called just 'column'
+        "batchsize",
+        "isolationLevel",
+        "isolation_level",
+        "truncate",
+        "cascadeTruncate",
+        "createTableOptions",
+        "createTableColumnTypes",
+        "createTableColumnTypes",
+    ),
+)
+
+READ_OPTIONS = frozenset(
+    (
+        "column",  # in some part of Spark source code option 'partitionColumn' is called just 'column'
+        "partitionColumn",
+        "partition_column",
+        "lowerBound",
+        "lower_bound",
+        "upperBound",
+        "upper_bound",
+        "numPartitions",
+        "num_partitions",
+        "fetchsize",
+        "sessionInitStatement",
+        "session_init_statement",
+        "customSchema",
+        "pushDownPredicate",
+        "pushDownAggregate",
+        "pushDownLimit",
+        "pushDownTableSample",
+        "predicates",
+    ),
+)
+
+
+class JDBCTableExistBehavior(str, Enum):
+    APPEND = "append"
+    REPLACE_ENTIRE_TABLE = "replace_entire_table"
+
+    def __str__(self) -> str:
+        return str(self.value)
+
+    @classmethod  # noqa: WPS120
+    def _missing_(cls, value: object):  # noqa: WPS120
+        if str(value) == "overwrite":
+            warnings.warn(
+                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `replace_entire_table` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_ENTIRE_TABLE
+
+
+class JDBCPartitioningMode(str, Enum):
+    RANGE = "range"
+    HASH = "hash"
+    MOD = "mod"
+
+    def __str__(self):
+        return str(self.value)
+
+
+class JDBCReadOptions(JDBCOptions):
+    """Spark JDBC reading options.
+
+    .. note ::
+
+        You can pass any value
+        `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_,
+        even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
+
+        The set of supported options depends on Spark version. See link above.
+
+    Examples
+    --------
+
+    Read options initialization
+
+    .. code:: python
+
+        options = JDBC.ReadOptions(
+            partitionColumn="reg_id",
+            numPartitions=10,
+            lowerBound=0,
+            upperBound=1000,
+            someNewOption="value",
+        )
+    """
+
+    class Config:
+        known_options = READ_OPTIONS | READ_WRITE_OPTIONS
+        prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
+        alias_generator = to_camel
+
+    # Options in DataFrameWriter.jdbc() method
+    partition_column: Optional[str] = None
+    """Column used to parallelize reading from a table.
+
+    .. warning::
+        It is highly recommended to use primary key, or at least a column with an index
+        to avoid performance issues.
+
+    .. note::
+        Column type depends on :obj:`~partitioning_mode`.
+
+        * ``partitioning_mode="range"`` requires column to be an integer or date (can be NULL, but not recommended).
+        * ``partitioning_mode="hash"`` requires column to be an string (NOT NULL).
+        * ``partitioning_mode="mod"`` requires column to be an integer (NOT NULL).
+
+
+    See documentation for :obj:`~partitioning_mode` for more details"""
+
+    num_partitions: PositiveInt = 1
+    """Number of jobs created by Spark to read the table content in parallel.
+    See documentation for :obj:`~partitioning_mode` for more details"""
+
+    lower_bound: Optional[int] = None
+    """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
+
+    upper_bound: Optional[int] = None
+    """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
+
+    session_init_statement: Optional[str] = None
+    '''After each database session is opened to the remote DB and before starting to read data,
+    this option executes a custom SQL statement (or a PL/SQL block).
+
+    Use this to implement session initialization code.
+
+    Example:
+
+    .. code:: python
+
+        sessionInitStatement = """
+            BEGIN
+                execute immediate
+                'alter session set "_serial_direct_read"=true';
+            END;
+        """
+    '''
+
+    fetchsize: int = 100_000
+    """Fetch N rows from an opened cursor per one read round.
+
+    Tuning this option can influence performance of reading.
+
+    .. warning::
+
+        Default value is different from Spark.
+
+        Spark uses driver's own value, and it may be different in different drivers,
+        and even versions of the same driver. For example, Oracle has
+        default ``fetchsize=10``, which is absolutely not usable.
+
+        Thus we've overridden default value with ``100_000``, which should increase reading performance.
+    """
+
+    partitioning_mode: JDBCPartitioningMode = JDBCPartitioningMode.RANGE
+    """Defines how Spark will parallelize reading from table.
+
+    Possible values:
+
+    * ``range`` (default)
+        Allocate each executor a range of values from column passed into :obj:`~partition_column`.
+
+        Spark generates for each executor an SQL query like:
+
+        Executor 1:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (partition_column >= lowerBound
+                    OR partition_column IS NULL)
+            AND partition_column < (lower_bound + stride)
+
+        Executor 2:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE partition_column >= (lower_bound + stride)
+            AND partition_column < (lower_bound + 2 * stride)
+
+        ...
+
+        Executor N:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE partition_column >= (lower_bound + (N-1) * stride)
+            AND partition_column <= upper_bound
+
+        Where ``stride=(upper_bound - lower_bound) / num_partitions``.
+
+        .. note::
+
+            :obj:`~lower_bound`, :obj:`~upper_bound` and :obj:`~num_partitions` are used just to
+            calculate the partition stride, **NOT** for filtering the rows in table.
+            So all rows in the table will be returned (unlike *Incremental* :ref:`strategy`).
+
+        .. note::
+
+            All queries are executed in parallel. To execute them sequentially, use *Batch* :ref:`strategy`.
+
+    * ``hash``
+        Allocate each executor a set of values based on hash of the :obj:`~partition_column` column.
+
+        Spark generates for each executor an SQL query like:
+
+        Executor 1:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (some_hash(partition_column) mod num_partitions) = 0 -- lower_bound
+
+        Executor 2:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (some_hash(partition_column) mod num_partitions) = 1 -- lower_bound + 1
+
+        ...
+
+        Executor N:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (some_hash(partition_column) mod num_partitions) = num_partitions-1 -- upper_bound
+
+        .. note::
+
+            The hash function implementation depends on RDBMS. It can be ``MD5`` or any other fast hash function,
+            or expression based on this function call.
+
+    * ``mod``
+        Allocate each executor a set of values based on modulus of the :obj:`~partition_column` column.
+
+        Spark generates for each executor an SQL query like:
+
+        Executor 1:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (partition_column mod num_partitions) = 0 -- lower_bound
+
+        Executor 2:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (partition_column mod num_partitions) = 1 -- lower_bound + 1
+
+        Executor N:
+
+        .. code:: sql
+
+            SELECT ... FROM table
+            WHERE (partition_column mod num_partitions) = num_partitions-1 -- upper_bound
+
+    Examples
+    --------
+
+    Read data in 10 parallel jobs by range of values in ``id_column`` column:
+
+    .. code:: python
+
+        JDBC.ReadOptions(
+            partitioning_mode="range",  # default mode, can be omitted
+            partition_column="id_column",
+            num_partitions=10,
+            # if you're using DBReader, options below can be omitted
+            # because they are calculated by automatically as
+            # MIN and MAX values of `partition_column`
+            lower_bound=0,
+            upper_bound=100_000,
+        )
+
+    Read data in 10 parallel jobs by hash of values in ``some_column`` column:
+
+    .. code:: python
+
+        JDBC.ReadOptions(
+            partitioning_mode="hash",
+            partition_column="some_column",
+            num_partitions=10,
+            # lower_bound and upper_bound are automatically set to `0` and `9`
+        )
+
+    Read data in 10 parallel jobs by modulus of values in ``id_column`` column:
+
+    .. code:: python
+
+        JDBC.ReadOptions(
+            partitioning_mode="mod",
+            partition_column="id_column",
+            num_partitions=10,
+            # lower_bound and upper_bound are automatically set to `0` and `9`
+        )
+    """
+
+    @root_validator
+    def partitioning_mode_actions(cls, values):
+        mode = values["partitioning_mode"]
+        num_partitions = values.get("num_partitions")
+        partition_column = values.get("partition_column")
+        lower_bound = values.get("lower_bound")
+        upper_bound = values.get("upper_bound")
+
+        if not partition_column:
+            if num_partitions == 1:
+                return values
+
+            raise ValueError("You should set partition_column to enable partitioning")
+
+        elif num_partitions == 1:
+            raise ValueError("You should set num_partitions > 1 to enable partitioning")
+
+        if mode == JDBCPartitioningMode.RANGE:
+            return values
+
+        if mode == JDBCPartitioningMode.HASH:
+            values["partition_column"] = cls._get_partition_column_hash(
+                partition_column=partition_column,
+                num_partitions=num_partitions,
+            )
+
+        if mode == JDBCPartitioningMode.MOD:
+            values["partition_column"] = cls._get_partition_column_mod(
+                partition_column=partition_column,
+                num_partitions=num_partitions,
+            )
+
+        values["lower_bound"] = lower_bound if lower_bound is not None else 0
+        values["upper_bound"] = upper_bound if upper_bound is not None else num_partitions
+
+        return values
+
+    @classmethod
+    @abstractmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        ...
+
+    @classmethod
+    @abstractmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        ...
+
+
+class JDBCWriteOptions(JDBCOptions):
+    """Spark JDBC writing options.
+
+    .. note ::
+
+        You can pass any value
+        `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_,
+        even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
+
+        The set of supported options depends on Spark version. See link above.
+
+    Examples
+    --------
+
+    Write options initialization
+
+    .. code:: python
+
+        options = JDBC.WriteOptions(if_exists="append", batchsize=20_000, someNewOption="value")
+    """
+
+    class Config:
+        known_options = WRITE_OPTIONS | READ_WRITE_OPTIONS
+        prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
+        alias_generator = to_camel
+
+    if_exists: JDBCTableExistBehavior = Field(default=JDBCTableExistBehavior.APPEND, alias="mode")
+    """Behavior of writing data into existing table.
+
+    Possible values:
+        * ``append`` (default)
+            Adds new rows into existing table.
+
+            .. dropdown:: Behavior in details
+
+            * Table does not exist
+                Table is created using options provided by user
+                (``createTableOptions``, ``createTableColumnTypes``, etc).
+
+            * Table exists
+                Data is appended to a table. Table has the same DDL as before writing data
+
+                .. warning::
+
+                    This mode does not check whether table already contains
+                    rows from dataframe, so duplicated rows can be created.
+
+                    Also Spark does not support passing custom options to
+                    insert statement, like ``ON CONFLICT``, so don't try to
+                    implement deduplication using unique indexes or constraints.
+
+                    Instead, write to staging table and perform deduplication
+                    using :obj:`~execute` method.
+
+        * ``replace_entire_table``
+            **Table is dropped and then created, or truncated**.
+
+            .. dropdown:: Behavior in details
+
+            * Table does not exist
+                Table is created using options provided by user
+                (``createTableOptions``, ``createTableColumnTypes``, etc).
+
+            * Table exists
+                Table content is replaced with dataframe content.
+
+                After writing completed, target table could either have the same DDL as
+                before writing data (``truncate=True``), or can be recreated (``truncate=False``
+                or source does not support truncation).
+
+    .. note::
+
+        ``error`` and ``ignore`` modes are not supported.
+    """
+
+    batchsize: int = 20_000
+    """How many rows can be inserted per round trip.
+
+    Tuning this option can influence performance of writing.
+
+    .. warning::
+
+        Default value is different from Spark.
+
+        Spark uses quite small value ``1000``, which is absolutely not usable
+        in BigData world.
+
+        Thus we've overridden default value with ``20_000``,
+        which should increase writing performance.
+
+        You can increase it even more, up to ``50_000``,
+        but it depends on your database load and number of columns in the row.
+        Higher values does not increase performance.
+    """
+
+    isolation_level: str = "READ_UNCOMMITTED"
+    """The transaction isolation level, which applies to current connection.
+
+    Possible values:
+        * ``NONE`` (as string, not Python's ``None``)
+        * ``READ_COMMITTED``
+        * ``READ_UNCOMMITTED``
+        * ``REPEATABLE_READ``
+        * ``SERIALIZABLE``
+
+    Values correspond to transaction isolation levels defined by JDBC standard.
+    Please refer the documentation for
+    `java.sql.Connection <https://docs.oracle.com/javase/8/docs/api/java/sql/Connection.html>`_.
+    """
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `WriteOptions(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/onetl/connection/db_connection/jdbc_mixin/__init__.py b/onetl/connection/db_connection/jdbc_mixin/__init__.py
new file mode 100644
index 000000000..062fdb74d
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_mixin/__init__.py
@@ -0,0 +1,19 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.jdbc_mixin.connection import (
+    JDBCMixin,
+    JDBCStatementType,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
diff --git a/onetl/connection/db_connection/jdbc_mixin.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
similarity index 91%
rename from onetl/connection/db_connection/jdbc_mixin.py
rename to onetl/connection/db_connection/jdbc_mixin/connection.py
index 7f4bd1a73..c02fb82f1 100644
--- a/onetl/connection/db_connection/jdbc_mixin.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -25,9 +25,12 @@
 from onetl._internal import clear_statement, stringify
 from onetl._util.java import get_java_gateway, try_import_java_class
 from onetl._util.spark import get_spark_version
+from onetl.connection.db_connection.jdbc_mixin.options import (
+    JDBCOptions as JDBCMixinOptions,
+)
 from onetl.exception import MISSING_JVM_CLASS_MSG
 from onetl.hooks import slot, support_hooks
-from onetl.impl import FrozenModel, GenericOptions
+from onetl.impl import FrozenModel
 from onetl.log import log_lines
 
 if TYPE_CHECKING:
@@ -48,7 +51,7 @@
 )
 
 
-class StatementType(Enum):
+class JDBCStatementType(Enum):
     GENERIC = auto()
     PREPARED = auto()
     CALL = auto()
@@ -66,44 +69,14 @@ class JDBCMixin(FrozenModel):
     spark: SparkSession = Field(repr=False)
     user: str
     password: SecretStr
-    DRIVER: ClassVar[str]
-    _CHECK_QUERY: ClassVar[str] = "SELECT 1"
 
-    class JDBCOptions(GenericOptions):
-        """Generic options, related to specific JDBC driver.
+    JDBCOptions = JDBCMixinOptions
 
-        .. note ::
-
-            You can pass any value
-            supported by underlying JDBC driver class,
-            even if it is not mentioned in this documentation.
-        """
-
-        class Config:
-            prohibited_options = PROHIBITED_OPTIONS
-            extra = "allow"
-
-        query_timeout: Optional[int] = Field(default=None, alias="queryTimeout")
-        """The number of seconds the driver will wait for a statement to execute.
-        Zero means there is no limit.
-
-        This option depends on driver implementation,
-        some drivers can check the timeout of each query instead of an entire JDBC batch.
-        """
-
-        fetchsize: Optional[int] = None
-        """How many rows to fetch per round trip.
-
-        Tuning this option can influence performance of reading.
-
-        .. warning::
-
-            Default value depends on driver. For example, Oracle has
-            default ``fetchsize=10``.
-        """
+    DRIVER: ClassVar[str]
+    _CHECK_QUERY: ClassVar[str] = "SELECT 1"
 
     # cached JDBC connection (Java object), plus corresponding GenericOptions (Python object)
-    _last_connection_and_options: Optional[Tuple[Any, JDBCOptions]] = PrivateAttr(default=None)
+    _last_connection_and_options: Optional[Tuple[Any, JDBCMixinOptions]] = PrivateAttr(default=None)
 
     @property
     @abstractmethod
@@ -176,7 +149,7 @@ def check(self):
     def fetch(
         self,
         query: str,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
+        options: JDBCMixinOptions | dict | None = None,
     ) -> DataFrame:
         """
         **Immediately** execute SELECT statement **on Spark driver** and return in-memory DataFrame. |support_hooks|
@@ -274,7 +247,7 @@ def fetch(
     def execute(
         self,
         statement: str,
-        options: JDBCMixin.JDBCOptions | dict | None = None,
+        options: JDBCMixinOptions | dict | None = None,
     ) -> DataFrame | None:
         """
         **Immediately** execute DDL, DML or procedure/function **on Spark driver**. |support_hooks|
@@ -407,11 +380,11 @@ def _check_java_class_imported(cls, spark):
     def _query_on_driver(
         self,
         query: str,
-        options: JDBCMixin.JDBCOptions,
+        options: JDBCMixinOptions,
     ) -> DataFrame:
         return self._execute_on_driver(
             statement=query,
-            statement_type=StatementType.PREPARED,
+            statement_type=JDBCStatementType.PREPARED,
             callback=self._statement_to_dataframe,
             options=options,
             read_only=True,
@@ -420,11 +393,11 @@ def _query_on_driver(
     def _query_optional_on_driver(
         self,
         query: str,
-        options: JDBCMixin.JDBCOptions,
+        options: JDBCMixinOptions,
     ) -> DataFrame | None:
         return self._execute_on_driver(
             statement=query,
-            statement_type=StatementType.PREPARED,
+            statement_type=JDBCStatementType.PREPARED,
             callback=self._statement_to_optional_dataframe,
             options=options,
             read_only=True,
@@ -433,11 +406,11 @@ def _query_optional_on_driver(
     def _call_on_driver(
         self,
         query: str,
-        options: JDBCMixin.JDBCOptions,
+        options: JDBCMixinOptions,
     ) -> DataFrame | None:
         return self._execute_on_driver(
             statement=query,
-            statement_type=StatementType.CALL,
+            statement_type=JDBCStatementType.CALL,
             callback=self._statement_to_optional_dataframe,
             options=options,
             read_only=False,
@@ -445,7 +418,7 @@ def _call_on_driver(
 
     def _get_jdbc_properties(
         self,
-        options: JDBCOptions,
+        options: JDBCMixinOptions,
         **kwargs,
     ) -> dict:
         """
@@ -463,7 +436,7 @@ def _get_jdbc_properties(
 
         return stringify(result)
 
-    def _options_to_connection_properties(self, options: JDBCOptions):
+    def _options_to_connection_properties(self, options: JDBCMixinOptions):
         """
         Converts human-readable Options class to ``java.util.Properties``.
 
@@ -485,7 +458,7 @@ def _options_to_connection_properties(self, options: JDBCOptions):
         )
         return jdbc_options.asConnectionProperties()
 
-    def _get_jdbc_connection(self, options: JDBCOptions):
+    def _get_jdbc_connection(self, options: JDBCMixinOptions):
         with suppress(Exception):  # nothing cached, or JVM failed
             last_connection, last_options = self._last_connection_and_options
             if options == last_options and not last_connection.isClosed():
@@ -516,9 +489,9 @@ def _get_statement_args(self) -> tuple[int, ...]:
     def _execute_on_driver(
         self,
         statement: str,
-        statement_type: StatementType,
+        statement_type: JDBCStatementType,
         callback: Callable[..., T],
-        options: JDBCOptions,
+        options: JDBCMixinOptions,
         read_only: bool,
     ) -> T:
         """
@@ -540,7 +513,7 @@ def _execute_statement(
         self,
         jdbc_statement,
         statement: str,
-        options: JDBCOptions,
+        options: JDBCMixinOptions,
         callback: Callable[..., T],
         read_only: bool,
     ) -> T:
@@ -580,7 +553,7 @@ def _execute_statement(
     @staticmethod
     def _build_statement(
         statement: str,
-        statement_type: StatementType,
+        statement_type: JDBCStatementType,
         jdbc_connection,
         statement_args,
     ):
@@ -596,10 +569,10 @@ def _build_statement(
         * https://github.com/apache/spark/blob/v2.3.0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L633
         """
 
-        if statement_type == StatementType.PREPARED:
+        if statement_type == JDBCStatementType.PREPARED:
             return jdbc_connection.prepareStatement(statement, *statement_args)
 
-        if statement_type == StatementType.CALL:
+        if statement_type == JDBCStatementType.CALL:
             return jdbc_connection.prepareCall(statement, *statement_args)
 
         return jdbc_connection.createStatement(*statement_args)
diff --git a/onetl/connection/db_connection/jdbc_mixin/options.py b/onetl/connection/db_connection/jdbc_mixin/options.py
new file mode 100644
index 000000000..dd889b8fc
--- /dev/null
+++ b/onetl/connection/db_connection/jdbc_mixin/options.py
@@ -0,0 +1,65 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from typing import Optional
+
+from pydantic import Field
+
+from onetl.impl import GenericOptions
+
+# options generated by JDBCMixin methods
+PROHIBITED_OPTIONS = frozenset(
+    (
+        "user",
+        "password",
+        "driver",
+        "url",
+    ),
+)
+
+
+class JDBCOptions(GenericOptions):
+    """Generic options, related to specific JDBC driver.
+
+    .. note ::
+
+        You can pass any value
+        supported by underlying JDBC driver class,
+        even if it is not mentioned in this documentation.
+    """
+
+    class Config:
+        prohibited_options = PROHIBITED_OPTIONS
+        extra = "allow"
+
+    query_timeout: Optional[int] = Field(default=None, alias="queryTimeout")
+    """The number of seconds the driver will wait for a statement to execute.
+    Zero means there is no limit.
+
+    This option depends on driver implementation,
+    some drivers can check the timeout of each query instead of an entire JDBC batch.
+    """
+
+    fetchsize: Optional[int] = None
+    """How many rows to fetch per round trip.
+
+    Tuning this option can influence performance of reading.
+
+    .. warning::
+
+        Default value depends on driver. For example, Oracle has
+        default ``fetchsize=10``.
+    """
diff --git a/onetl/connection/db_connection/mssql.py b/onetl/connection/db_connection/mssql.py
index a187ea7dd..a54391e78 100644
--- a/onetl/connection/db_connection/mssql.py
+++ b/onetl/connection/db_connection/mssql.py
@@ -18,10 +18,19 @@
 from datetime import date, datetime
 from typing import ClassVar
 
+from deprecated import deprecated
+
 from onetl._util.classproperty import classproperty
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
+from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `MSSQL.get_packages()` for creating Spark session
 
@@ -161,8 +170,9 @@ class MSSQL(JDBCConnection):
 
     """
 
-    class Extra(JDBCConnection.Extra):
+    class Extra(GenericOptions):
         class Config:
+            extra = "allow"
             prohibited_options = frozenset(("databaseName",))
 
     database: str
@@ -211,7 +221,7 @@ def package(cls) -> str:
         warnings.warn(msg, UserWarning, stacklevel=3)
         return "com.microsoft.sqlserver:mssql-jdbc:12.2.0.jre8"
 
-    class Dialect(JDBCConnection.Dialect):
+    class Dialect(JDBCDialect):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
             result = value.isoformat()
@@ -222,7 +232,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.isoformat()
             return f"CAST('{result}' AS date)"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         # https://docs.microsoft.com/ru-ru/sql/t-sql/functions/hashbytes-transact-sql?view=sql-server-ver16
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
@@ -232,7 +242,15 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"{partition_column} % {num_partitions}"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/connection/db_connection/mysql.py b/onetl/connection/db_connection/mysql.py
index fcde3d82c..3988c04aa 100644
--- a/onetl/connection/db_connection/mysql.py
+++ b/onetl/connection/db_connection/mysql.py
@@ -18,9 +18,18 @@
 from datetime import date, datetime
 from typing import ClassVar, Optional
 
+from deprecated import deprecated
+
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
+from onetl.impl.generic_options import GenericOptions
 
 # do not import PySpark here, as we allow user to use `MySQL.get_packages()` for creating Spark session
 
@@ -116,10 +125,13 @@ class MySQL(JDBCConnection):
 
     """
 
-    class Extra(JDBCConnection.Extra):
+    class Extra(GenericOptions):
         useUnicode: str = "yes"  # noqa: N815
         characterEncoding: str = "UTF-8"  # noqa: N815
 
+        class Config:
+            extra = "allow"
+
     port: int = 3306
     database: Optional[str] = None
     extra: Extra = Extra()
@@ -161,7 +173,7 @@ def jdbc_url(self):
 
         return f"jdbc:mysql://{self.host}:{self.port}?{parameters}"
 
-    class Dialect(JDBCConnection.Dialect):
+    class Dialect(JDBCDialect):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
             result = value.strftime("%Y-%m-%d %H:%M:%S.%f")
@@ -172,7 +184,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.strftime("%Y-%m-%d")
             return f"STR_TO_DATE('{result}', '%Y-%m-%d')"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
             return f"MOD(CONV(CONV(RIGHT(MD5({partition_column}), 16),16, 2), 2, 10), {num_partitions})"
@@ -181,4 +193,12 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"MOD({partition_column}, {num_partitions})"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
diff --git a/onetl/connection/db_connection/oracle.py b/onetl/connection/db_connection/oracle.py
index 538a5b65a..eed81a52a 100644
--- a/onetl/connection/db_connection/oracle.py
+++ b/onetl/connection/db_connection/oracle.py
@@ -24,13 +24,21 @@
 from textwrap import indent
 from typing import TYPE_CHECKING, ClassVar, Optional
 
+from deprecated import deprecated
 from pydantic import root_validator
 
 from onetl._internal import clear_statement
 from onetl._util.classproperty import classproperty
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
+from onetl.impl import GenericOptions
 from onetl.log import BASE_LOG_INDENT, log_lines
 
 # do not import PySpark here, as we allow user to use `Oracle.get_packages()` for creating Spark session
@@ -65,6 +73,11 @@ def sort_key(self) -> tuple[int, int, int]:
         return 100 - self.level, self.line, self.position
 
 
+class OracleExtra(GenericOptions):
+    class Config:
+        extra = "allow"
+
+
 @support_hooks
 class Oracle(JDBCConnection):
     """Oracle JDBC connection. |support_hooks|
@@ -173,6 +186,9 @@ class Oracle(JDBCConnection):
     port: int = 1521
     sid: Optional[str] = None
     service_name: Optional[str] = None
+    extra: OracleExtra = OracleExtra()
+
+    Extra = OracleExtra
 
     DRIVER: ClassVar[str] = "oracle.jdbc.driver.OracleDriver"
     _CHECK_QUERY: ClassVar[str] = "SELECT 1 FROM dual"
@@ -229,7 +245,7 @@ def only_one_of_sid_or_service_name(cls, values):
 
         return values
 
-    class Dialect(JDBCConnection.Dialect):
+    class Dialect(JDBCDialect):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
             result = value.strftime("%Y-%m-%d %H:%M:%S")
@@ -240,7 +256,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.strftime("%Y-%m-%d")
             return f"TO_DATE('{result}', 'YYYY-MM-DD')"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
             return f"ora_hash({partition_column}, {num_partitions})"
@@ -249,7 +265,15 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"MOD({partition_column}, {num_partitions})"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
 
     @property
     def jdbc_url(self) -> str:
@@ -272,7 +296,7 @@ def instance_url(self) -> str:
     def execute(
         self,
         statement: str,
-        options: Oracle.JDBCOptions | dict | None = None,  # noqa: WPS437
+        options: JDBCOptions | dict | None = None,  # noqa: WPS437
     ) -> DataFrame | None:
         statement = clear_statement(statement)
 
@@ -323,7 +347,7 @@ def _get_compile_errors(
         type_name: str,
         schema: str,
         object_name: str,
-        options: Oracle.JDBCOptions,
+        options: JDBCOptions,
     ) -> list[tuple[ErrorPosition, str]]:
         """
         Get compile errors for the object.
@@ -393,7 +417,7 @@ def _build_error_message(self, aggregated_errors: OrderedDict[ErrorPosition, str
     def _handle_compile_errors(
         self,
         statement: str,
-        options: Oracle.JDBCOptions,
+        options: JDBCOptions,
     ) -> None:
         """
         Oracle does not return compilation errors immediately.
diff --git a/onetl/connection/db_connection/postgres.py b/onetl/connection/db_connection/postgres.py
index b425c61ea..dfad6d77c 100644
--- a/onetl/connection/db_connection/postgres.py
+++ b/onetl/connection/db_connection/postgres.py
@@ -18,6 +18,8 @@
 from datetime import date, datetime
 from typing import ClassVar
 
+from deprecated import deprecated
+
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.db_connection.dialect import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
@@ -32,11 +34,22 @@
     SupportTableWithDBSchema,
 )
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
+from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `Postgres.get_packages()` for creating Spark session
 
 
+class PostgresExtra(GenericOptions):
+    class Config:
+        extra = "allow"
+
+
 @support_hooks
 class Postgres(JDBCConnection):
     """PostgreSQL JDBC connection. |support_hooks|
@@ -130,6 +143,9 @@ class Postgres(JDBCConnection):
 
     database: str
     port: int = 5432
+    extra: PostgresExtra = PostgresExtra()
+
+    Extra = PostgresExtra
 
     DRIVER: ClassVar[str] = "org.postgresql.Driver"
 
@@ -178,7 +194,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.isoformat()
             return f"'{result}'::date"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         # https://stackoverflow.com/a/9812029
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
@@ -188,7 +204,15 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"{partition_column} % {num_partitions}"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
 
     @property
     def jdbc_url(self) -> str:
@@ -202,7 +226,7 @@ def jdbc_url(self) -> str:
     def instance_url(self) -> str:
         return f"{super().instance_url}/{self.database}"
 
-    def _options_to_connection_properties(self, options: JDBCConnection.JDBCOptions):  # noqa: WPS437
+    def _options_to_connection_properties(self, options: JDBCOptions):  # noqa: WPS437
         # See https://github.com/pgjdbc/pgjdbc/pull/1252
         # Since 42.2.9 Postgres JDBC Driver added new option readOnlyMode=transaction
         # Which is not a desired behavior, because `.fetch()` method should always be read-only
diff --git a/onetl/connection/db_connection/teradata.py b/onetl/connection/db_connection/teradata.py
index 4c04e6f6e..c304788a4 100644
--- a/onetl/connection/db_connection/teradata.py
+++ b/onetl/connection/db_connection/teradata.py
@@ -18,9 +18,18 @@
 from datetime import date, datetime
 from typing import ClassVar, Optional
 
+from deprecated import deprecated
+
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
+from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCReadOptions,
+    JDBCWriteOptions,
+)
+from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot
+from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `Teradata.get_packages()` for creating Spark session
 
@@ -131,7 +140,7 @@ class Teradata(JDBCConnection):
 
     """
 
-    class Extra(JDBCConnection.Extra):
+    class Extra(GenericOptions):
         CHARSET: str = "UTF8"
         COLUMN_NAME: str = "ON"
         FLATTEN: str = "ON"
@@ -139,6 +148,7 @@ class Extra(JDBCConnection.Extra):
         STRICT_NAMES: str = "OFF"
 
         class Config:
+            extra = "allow"
             prohibited_options = frozenset(("DATABASE", "DBS_PORT"))
 
     port: int = 1025
@@ -185,7 +195,7 @@ def jdbc_url(self) -> str:
         conn = ",".join(f"{k}={v}" for k, v in sorted(prop.items()))
         return f"jdbc:teradata://{self.host}/{conn}"
 
-    class Dialect(JDBCConnection.Dialect):
+    class Dialect(JDBCDialect):
         @classmethod
         def _get_datetime_value_sql(cls, value: datetime) -> str:
             result = value.isoformat()
@@ -196,7 +206,7 @@ def _get_date_value_sql(cls, value: date) -> str:
             result = value.isoformat()
             return f"CAST('{result}' AS DATE)"
 
-    class ReadOptions(JDBCConnection.ReadOptions):
+    class ReadOptions(JDBCReadOptions):
         # https://docs.teradata.com/r/w4DJnG9u9GdDlXzsTXyItA/lkaegQT4wAakj~K_ZmW1Dg
         @classmethod
         def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
@@ -206,4 +216,12 @@ def _get_partition_column_hash(cls, partition_column: str, num_partitions: int)
         def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
             return f"{partition_column} mod {num_partitions}"
 
-    ReadOptions.__doc__ = JDBCConnection.ReadOptions.__doc__
+    @deprecated(
+        version="0.5.0",
+        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+        action="always",
+        category=UserWarning,
+    )
+    class Options(ReadOptions, JDBCWriteOptions):
+        class Config:
+            prohibited_options = JDBCOptions.Config.prohibited_options
diff --git a/setup.cfg b/setup.cfg
index b2b8aab52..fa71f2403 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -310,7 +310,7 @@ per-file-ignores =
     onetl/connection/db_connection/mongodb.py:
 # WPS437 Found protected attribute usage: self.Dialect._
         WPS437,
-    onetl/connection/db_connection/jdbc_mixin.py:
+    onetl/connection/db_connection/jdbc_mixin/connection.py:
 # too few type annotations
         TAE001,
 # WPS219 :Found too deep access level
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
index bc693e89f..303f43d14 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
@@ -1,7 +1,7 @@
 import pytest
 
 from onetl.connection import Clickhouse
-from onetl.connection.db_connection.jdbc_connection import PartitioningMode
+from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.clickhouse
@@ -43,8 +43,8 @@ def test_clickhouse_reader_snapshot_partitioning_mode_mod(spark, processing, loa
     reader = DBReader(
         connection=clickhouse,
         source=load_table_data.full_name,
-        options=clickhouse.ReadOptions(
-            partitioning_mode=PartitioningMode.mod,
+        options=Clickhouse.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.MOD,
             partition_column="id_int",
             num_partitions=5,
         ),
@@ -73,8 +73,8 @@ def test_clickhouse_reader_snapshot_partitioning_mode_hash(spark, processing, lo
     reader = DBReader(
         connection=clickhouse,
         source=load_table_data.full_name,
-        options=clickhouse.ReadOptions(
-            partitioning_mode=PartitioningMode.hash,
+        options=Clickhouse.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.HASH,
             partition_column="text_string",
             num_partitions=5,
         ),
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
index ce23a9a23..3294a6fa1 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
@@ -167,7 +167,7 @@ def test_kafka_reader_columns_and_types_with_headers(spark, kafka_processing, ka
     reader = DBReader(
         connection=kafka,
         source=topic,
-        options=kafka.ReadOptions(includeHeaders=True),
+        options=Kafka.ReadOptions(includeHeaders=True),
     )
 
     df = reader.run()
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
index a3098656b..b707ffb8d 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
@@ -1,7 +1,7 @@
 import pytest
 
 from onetl.connection import MSSQL
-from onetl.connection.db_connection.jdbc_connection import PartitioningMode
+from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.mssql
@@ -45,8 +45,8 @@ def test_mssql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
     reader = DBReader(
         connection=mssql,
         source=load_table_data.full_name,
-        options=mssql.ReadOptions(
-            partitioning_mode=PartitioningMode.mod,
+        options=MSSQL.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.MOD,
             partition_column="id_int",
             num_partitions=5,
         ),
@@ -76,8 +76,8 @@ def test_mssql_reader_snapshot_partitioning_mode_hash(spark, processing, load_ta
     reader = DBReader(
         connection=mssql,
         source=load_table_data.full_name,
-        options=mssql.ReadOptions(
-            partitioning_mode=PartitioningMode.hash,
+        options=MSSQL.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.HASH,
             partition_column="text_string",
             num_partitions=5,
         ),
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
index e05ce3826..d633ebf13 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
@@ -1,7 +1,7 @@
 import pytest
 
 from onetl.connection import MySQL
-from onetl.connection.db_connection.jdbc_connection import PartitioningMode
+from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.mysql
@@ -44,8 +44,8 @@ def test_mysql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
     reader = DBReader(
         connection=mysql,
         source=load_table_data.full_name,
-        options=mysql.ReadOptions(
-            partitioning_mode=PartitioningMode.mod,
+        options=MySQL.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.MOD,
             partition_column="id_int",
             num_partitions=5,
         ),
@@ -74,8 +74,8 @@ def test_mysql_reader_snapshot_partitioning_mode_hash(spark, processing, load_ta
     reader = DBReader(
         connection=mysql,
         source=load_table_data.full_name,
-        options=mysql.ReadOptions(
-            partitioning_mode=PartitioningMode.hash,
+        options=MySQL.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.HASH,
             partition_column="text_string",
             num_partitions=5,
         ),
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
index 02f4d5607..7b7aa5ae7 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
@@ -1,7 +1,7 @@
 import pytest
 
 from onetl.connection import Oracle
-from onetl.connection.db_connection.jdbc_connection import PartitioningMode
+from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.oracle
@@ -45,8 +45,8 @@ def test_oracle_reader_snapshot_partitioning_mode_mod(spark, processing, load_ta
     reader = DBReader(
         connection=oracle,
         source=load_table_data.full_name,
-        options=oracle.ReadOptions(
-            partitioning_mode=PartitioningMode.mod,
+        options=Oracle.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.MOD,
             partition_column="id_int",
             num_partitions=5,
         ),
@@ -76,8 +76,8 @@ def test_oracle_reader_snapshot_partitioning_mode_hash(spark, processing, load_t
     reader = DBReader(
         connection=oracle,
         source=load_table_data.full_name,
-        options=oracle.ReadOptions(
-            partitioning_mode=PartitioningMode.hash,
+        options=Oracle.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.HASH,
             partition_column="text_string",
             num_partitions=5,
         ),
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
index 738c9b6e3..c40a51ef7 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
@@ -1,7 +1,7 @@
 import pytest
 
 from onetl.connection import Postgres
-from onetl.connection.db_connection.jdbc_connection import PartitioningMode
+from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.postgres
@@ -43,8 +43,8 @@ def test_postgres_reader_snapshot_partitioning_mode_mod(spark, processing, load_
     reader = DBReader(
         connection=postgres,
         source=load_table_data.full_name,
-        options=postgres.ReadOptions(
-            partitioning_mode=PartitioningMode.mod,
+        options=Postgres.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.MOD,
             partition_column="id_int",
             num_partitions=5,
         ),
@@ -73,8 +73,8 @@ def test_postgres_reader_snapshot_partitioning_mode_hash(spark, processing, load
     reader = DBReader(
         connection=postgres,
         source=load_table_data.full_name,
-        options=postgres.ReadOptions(
-            partitioning_mode=PartitioningMode.hash,
+        options=Postgres.ReadOptions(
+            partitioning_mode=JDBCPartitioningMode.HASH,
             partition_column="text_string",
             num_partitions=5,
         ),
diff --git a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
index 72415207f..8cc50b45c 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
@@ -39,7 +39,7 @@ def test_db_options_connection_parameters_cannot_be_passed(options_class, arg, v
         (Hive.WriteOptions, "WriteOptions", {"if_exists": "replace_overlapping_partitions"}),
         (Hive.Options, "Options", {"if_exists": "replace_overlapping_partitions"}),
         (Postgres.ReadOptions, "ReadOptions", {"fetchsize": 10, "keytab": "a/b/c"}),
-        (Postgres.WriteOptions, "WriteOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
+        (Postgres.WriteOptions, "JDBCWriteOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
         (Postgres.Options, "Options", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
         (Greenplum.ReadOptions, "ReadOptions", {"partitions": 10}),
         (Greenplum.WriteOptions, "WriteOptions", {"if_exists": "replace_entire_table"}),
diff --git a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
index 988bb8a27..985f43aae 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
@@ -49,7 +49,7 @@ def test_jdbc_read_write_options_populated_by_connection_class(arg, value):
     with pytest.raises(ValueError, match=error_msg):
         Postgres.ReadOptions.parse({arg: value})
 
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a WriteOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a JDBCWriteOptions"
     with pytest.raises(ValueError, match=error_msg):
         Postgres.WriteOptions.parse({arg: value})
 
@@ -101,7 +101,7 @@ def test_jdbc_write_options_cannot_be_used_in_read_options(arg, value):
     ],
 )
 def test_jdbc_read_options_cannot_be_used_in_write_options(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a WriteOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a JDBCWriteOptions"
     with pytest.raises(ValueError, match=error_msg):
         Postgres.WriteOptions.parse({arg: value})
 

From 2ad8db801aae484861872aead39cd1370b207923 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 11:23:02 +0000
Subject: [PATCH 08/30] [DOP-8140] Split MongoDB.Dialect and .*Options to
 separated submodules

---
 docs/connection/db_connection/index.rst       |   2 +-
 docs/connection/db_connection/mongodb.rst     |  26 -
 .../db_connection/mongodb/connection.rst      |  10 +
 .../db_connection/mongodb/index.rst           |  17 +
 .../connection/db_connection/mongodb/read.rst |  25 +
 .../db_connection/mongodb/write.rst           |  13 +
 .../db_connection/mongodb/__init__.py         |  22 +
 .../{mongodb.py => mongodb/connection.py}     | 446 +-----------------
 .../db_connection/mongodb/dialect.py          | 204 ++++++++
 .../db_connection/mongodb/options.py          | 253 ++++++++++
 setup.cfg                                     |   3 -
 11 files changed, 567 insertions(+), 454 deletions(-)
 delete mode 100644 docs/connection/db_connection/mongodb.rst
 create mode 100644 docs/connection/db_connection/mongodb/connection.rst
 create mode 100644 docs/connection/db_connection/mongodb/index.rst
 create mode 100644 docs/connection/db_connection/mongodb/read.rst
 create mode 100644 docs/connection/db_connection/mongodb/write.rst
 create mode 100644 onetl/connection/db_connection/mongodb/__init__.py
 rename onetl/connection/db_connection/{mongodb.py => mongodb/connection.py} (57%)
 create mode 100644 onetl/connection/db_connection/mongodb/dialect.py
 create mode 100644 onetl/connection/db_connection/mongodb/options.py

diff --git a/docs/connection/db_connection/index.rst b/docs/connection/db_connection/index.rst
index 3a4036379..d8e2b7c77 100644
--- a/docs/connection/db_connection/index.rst
+++ b/docs/connection/db_connection/index.rst
@@ -11,7 +11,7 @@ DB Connections
     Greenplum <greenplum/index>
     Kafka <kafka/index>
     Hive <hive>
-    MongoDB <mongodb>
+    MongoDB <mongodb/index>
     MSSQL <mssql>
     MySQL <mysql>
     Oracle <oracle>
diff --git a/docs/connection/db_connection/mongodb.rst b/docs/connection/db_connection/mongodb.rst
deleted file mode 100644
index 2bc42d804..000000000
--- a/docs/connection/db_connection/mongodb.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-.. _mongo:
-
-MongoDB connection
-=====================
-
-.. currentmodule:: onetl.connection.db_connection.mongodb
-
-.. autosummary::
-
-    MongoDB
-    MongoDB.ReadOptions
-    MongoDB.WriteOptions
-    MongoDB.PipelineOptions
-
-.. autoclass:: MongoDB
-    :members: get_packages, check, pipeline
-
-.. currentmodule:: onetl.connection.db_connection.mongodb.MongoDB
-
-.. autopydantic_model:: ReadOptions
-
-.. autopydantic_model:: WriteOptions
-    :members: mode
-    :member-order: bysource
-
-.. autopydantic_model:: PipelineOptions
diff --git a/docs/connection/db_connection/mongodb/connection.rst b/docs/connection/db_connection/mongodb/connection.rst
new file mode 100644
index 000000000..1d0504609
--- /dev/null
+++ b/docs/connection/db_connection/mongodb/connection.rst
@@ -0,0 +1,10 @@
+.. _mongodb-connection:
+
+MongoDB Connection
+==================
+
+.. currentmodule:: onetl.connection.db_connection.mongodb.connection
+
+.. autoclass:: MongoDB
+    :members: get_packages, check
+    :member-order: bysource
diff --git a/docs/connection/db_connection/mongodb/index.rst b/docs/connection/db_connection/mongodb/index.rst
new file mode 100644
index 000000000..a863265a4
--- /dev/null
+++ b/docs/connection/db_connection/mongodb/index.rst
@@ -0,0 +1,17 @@
+.. _mongodb:
+
+MongoDB
+=======
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
diff --git a/docs/connection/db_connection/mongodb/read.rst b/docs/connection/db_connection/mongodb/read.rst
new file mode 100644
index 000000000..b3d51686a
--- /dev/null
+++ b/docs/connection/db_connection/mongodb/read.rst
@@ -0,0 +1,25 @@
+.. _mongodb-read:
+
+Reading from MongoDB
+====================
+
+There are 2 ways of distributed data reading from MongoDB:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy` and :obj:`MongoDBReadOptions <onetl.connection.db_connection.mongodb.options.MongoDBReadOptions>`
+* Using :obj:`MongoDB.pipeline <onetl.connection.db_connection.mongodb.connection.MongoDB.pipeline>` with :obj:`MongoDBPipelineOptions <onetl.connection.db_connection.mongodb.options.MongoDBPipelineOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.mongodb.connection
+
+.. automethod:: MongoDB.pipeline
+
+.. currentmodule:: onetl.connection.db_connection.mongodb.options
+
+.. autopydantic_model:: MongoDBReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
+
+.. autopydantic_model:: MongoDBPipelineOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mongodb/write.rst b/docs/connection/db_connection/mongodb/write.rst
new file mode 100644
index 000000000..5fff32d70
--- /dev/null
+++ b/docs/connection/db_connection/mongodb/write.rst
@@ -0,0 +1,13 @@
+.. _mongodb-write:
+
+Writing to MongoDB
+==================
+
+For writing data to MongoDB, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.mongodb.options
+
+.. autopydantic_model:: MongoDBWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/onetl/connection/db_connection/mongodb/__init__.py b/onetl/connection/db_connection/mongodb/__init__.py
new file mode 100644
index 000000000..0ec92b2d7
--- /dev/null
+++ b/onetl/connection/db_connection/mongodb/__init__.py
@@ -0,0 +1,22 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.mongodb.connection import MongoDB, MongoDBExtra
+from onetl.connection.db_connection.mongodb.dialect import MongoDBDialect
+from onetl.connection.db_connection.mongodb.options import (
+    MongoDBCollectionExistBehavior,
+    MongoDBPipelineOptions,
+    MongoDBReadOptions,
+    MongoDBWriteOptions,
+)
diff --git a/onetl/connection/db_connection/mongodb.py b/onetl/connection/db_connection/mongodb/connection.py
similarity index 57%
rename from onetl/connection/db_connection/mongodb.py
rename to onetl/connection/db_connection/mongodb/connection.py
index 629cd0ce4..98c579492 100644
--- a/onetl/connection/db_connection/mongodb.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -14,32 +14,26 @@
 
 from __future__ import annotations
 
-import json
 import logging
-import operator
 import warnings
-from datetime import datetime
-from enum import Enum
-from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, Iterable, Mapping
+from typing import TYPE_CHECKING, Any
 from urllib import parse as parser
 
 from etl_entities.instance import Host
-from pydantic import Field, SecretStr, root_validator, validator
+from pydantic import SecretStr, validator
 
 from onetl._util.classproperty import classproperty
 from onetl._util.java import try_import_java_class
 from onetl._util.scala import get_default_scala_version
 from onetl._util.spark import get_spark_version
 from onetl._util.version import Version
-from onetl.base.base_db_connection import BaseDBConnection
 from onetl.connection.db_connection.db_connection import DBConnection
-from onetl.connection.db_connection.db_connection.dialect import DBDialect
-from onetl.connection.db_connection.dialect_mixins import (
-    SupportColumnsNone,
-    SupportDfSchemaStruct,
-    SupportHWMColumnStr,
-    SupportHWMExpressionNone,
-    SupportTableWithoutDBSchema,
+from onetl.connection.db_connection.mongodb.dialect import MongoDBDialect
+from onetl.connection.db_connection.mongodb.options import (
+    MongoDBCollectionExistBehavior,
+    MongoDBPipelineOptions,
+    MongoDBReadOptions,
+    MongoDBWriteOptions,
 )
 from onetl.exception import MISSING_JVM_CLASS_MSG
 from onetl.hooks import slot, support_hooks
@@ -54,125 +48,9 @@
 log = logging.getLogger(__name__)
 
 
-_upper_level_operators = frozenset(  # noqa: WPS527
-    [
-        "$addFields",
-        "$bucket",
-        "$bucketAuto",
-        "$changeStream",
-        "$collStats",
-        "$count",
-        "$currentOp",
-        "$densify",
-        "$documents",
-        "$facet",
-        "$fill",
-        "$geoNear",
-        "$graphLookup",
-        "$group",
-        "$indexStats",
-        "$limit",
-        "$listLocalSessions",
-        "$listSessions",
-        "$lookup",
-        "$merge",
-        "$out",
-        "$planCacheStats",
-        "$project",
-        "$redact",
-        "$replaceRoot",
-        "$replaceWith",
-        "$sample",
-        "$search",
-        "$searchMeta",
-        "$set",
-        "$setWindowFields",
-        "$shardedDataDistribution",
-        "$skip",
-        "$sort",
-        "$sortByCount",
-        "$unionWith",
-        "$unset",
-        "$unwind",
-    ],
-)
-
-
-class MongoDBCollectionExistBehavior(str, Enum):
-    APPEND = "append"
-    REPLACE_ENTIRE_COLLECTION = "replace_entire_collection"
-
-    def __str__(self) -> str:
-        return str(self.value)
-
-    @classmethod  # noqa: WPS120
-    def _missing_(cls, value: object):  # noqa: WPS120
-        if str(value) == "overwrite":
-            warnings.warn(
-                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                "Use `replace_entire_collection` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_ENTIRE_COLLECTION
-
-
-PIPELINE_PROHIBITED_OPTIONS = frozenset(
-    (
-        "uri",
-        "database",
-        "collection",
-        "pipeline",
-    ),
-)
-
-PROHIBITED_OPTIONS = frozenset(
-    (
-        "uri",
-        "database",
-        "collection",
-        "pipeline",
-        "hint",
-    ),
-)
-
-KNOWN_READ_OPTIONS = frozenset(
-    (
-        "localThreshold",
-        "readPreference.name",
-        "readPreference.tagSets",
-        "readConcern.level",
-        "sampleSize",
-        "samplePoolSize",
-        "partitioner",
-        "partitionerOptions",
-        "registerSQLHelperFunctions",
-        "sql.inferschema.mapTypes.enabled",
-        "sql.inferschema.mapTypes.minimumKeys",
-        "sql.pipeline.includeNullFilters",
-        "sql.pipeline.includeFiltersAndProjections",
-        "pipeline",
-        "hint",
-        "collation",
-        "allowDiskUse",
-        "batchSize",
-    ),
-)
-
-KNOWN_WRITE_OPTIONS = frozenset(
-    (
-        "extendedBsonTypes",
-        "localThreshold",
-        "replaceDocument",
-        "maxBatchSize",
-        "writeConcern.w",
-        "writeConcern.journal",
-        "writeConcern.wTimeoutMS",
-        "shardKey",
-        "forceInsert",
-        "ordered",
-    ),
-)
+class MongoDBExtra(GenericOptions):
+    class Config:
+        extra = "allow"
 
 
 @support_hooks
@@ -263,16 +141,18 @@ class MongoDB(DBConnection):
         )
     """
 
-    class Extra(GenericOptions):
-        class Config:
-            extra = "allow"
-
     database: str
     host: Host
     user: str
     password: SecretStr
     port: int = 27017
-    extra: Extra = Extra()
+    extra: MongoDBExtra = MongoDBExtra()
+
+    Dialect = MongoDBDialect
+    ReadOptions = MongoDBReadOptions
+    WriteOptions = MongoDBWriteOptions
+    PipelineOptions = MongoDBPipelineOptions
+    Extra = MongoDBExtra
 
     @slot
     @classmethod
@@ -358,295 +238,13 @@ def package_spark_3_4(cls) -> str:
         warnings.warn(msg, UserWarning, stacklevel=3)
         return "org.mongodb.spark:mongo-spark-connector_2.12:10.1.1"
 
-    class PipelineOptions(GenericOptions):
-        """Aggregation pipeline options for MongoDB connector.
-
-        The only difference from :obj:`~ReadOptions` that it is allowed to pass the 'hint' parameter.
-
-        .. note ::
-
-            You can pass any value
-            `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/read>`_,
-            even if it is not mentioned in this documentation.
-
-            The set of supported options depends on connector version. See link above.
-
-        .. warning::
-
-            Options ``uri``, ``database``, ``collection``, ``pipeline`` are populated from connection attributes,
-            and cannot be set in ``PipelineOptions`` class.
-
-        Examples
-        --------
-
-        Pipeline options initialization
-
-        .. code:: python
-
-            MongoDB.PipelineOptions(
-                hint="{'_id': 1}",
-            )
-        """
-
-        class Config:
-            prohibited_options = PIPELINE_PROHIBITED_OPTIONS
-            known_options = KNOWN_READ_OPTIONS
-            extra = "allow"
-
-    class ReadOptions(GenericOptions):
-        """Reading options for MongoDB connector.
-
-        .. note ::
-
-            You can pass any value
-            `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/read>`_,
-            even if it is not mentioned in this documentation.
-
-            The set of supported options depends on connector version. See link above.
-
-        .. warning::
-
-            Options ``uri``, ``database``, ``collection``, ``pipeline``, ``hint`` are populated from connection
-            attributes, and cannot be set in ``ReadOptions`` class.
-
-        Examples
-        --------
-
-        Read options initialization
-
-        .. code:: python
-
-            MongoDB.ReadOptions(
-                batchSize=10000,
-            )
-        """
-
-        class Config:
-            prohibited_options = PROHIBITED_OPTIONS
-            known_options = KNOWN_READ_OPTIONS
-            extra = "allow"
-
-    class WriteOptions(GenericOptions):
-        """Writing options for MongoDB connector.
-
-        .. note ::
-
-            You can pass any value
-            `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/write/>`_,
-            even if it is not mentioned in this documentation.
-
-            The set of supported options depends on connector version. See link above.
-
-        .. warning::
-
-            Options ``uri``, ``database``, ``collection`` are populated from connection attributes,
-            and cannot be set in ``WriteOptions`` class.
-
-        Examples
-        --------
-
-        Write options initialization
-
-        .. code:: python
-
-            options = MongoDB.WriteOptions(
-                if_exists="append",
-                sampleSize=500,
-                localThreshold=20,
-            )
-        """
-
-        if_exists: MongoDBCollectionExistBehavior = Field(default=MongoDBCollectionExistBehavior.APPEND, alias="mode")
-        """Behavior of writing data into existing collection.
-
-        Possible values:
-            * ``append`` (default)
-                Adds new objects into existing collection.
-
-                .. dropdown:: Behavior in details
-
-                * Collection does not exist
-                    Collection is created using options provided by user
-                    (``shardkey`` and others).
-
-                * Collection exists
-                    Data is appended to a collection.
-
-                    .. warning::
-
-                        This mode does not check whether collection already contains
-                        objects from dataframe, so duplicated objects can be created.
-
-            * ``replace_entire_collection``
-                **Collection is deleted and then created**.
-
-                .. dropdown:: Behavior in details
-
-                * Collection does not exist
-                    Collection is created using options provided by user
-                    (``shardkey`` and others).
-
-                * Collection exists
-                    Collection content is replaced with dataframe content.
-
-        .. note::
-
-            ``error`` and ``ignore`` modes are not supported.
-        """
-
-        class Config:
-            prohibited_options = PROHIBITED_OPTIONS
-            known_options = KNOWN_WRITE_OPTIONS
-            extra = "allow"
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `MongoDB.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `MongoDB.WriteOptions(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
-    class Dialect(  # noqa: WPS215
-        SupportTableWithoutDBSchema,
-        SupportHWMExpressionNone,
-        SupportColumnsNone,
-        SupportDfSchemaStruct,
-        SupportHWMColumnStr,
-        DBDialect,
-    ):
-        _compare_statements: ClassVar[Dict[Callable, str]] = {
-            operator.ge: "$gte",
-            operator.gt: "$gt",
-            operator.le: "$lte",
-            operator.lt: "$lt",
-            operator.eq: "$eq",
-            operator.ne: "$ne",
-        }
-
-        @classmethod
-        def validate_where(
-            cls,
-            connection: BaseDBConnection,
-            where: Any,
-        ) -> dict | None:
-            if where is None:
-                return None
-
-            if not isinstance(where, dict):
-                raise ValueError(
-                    f"{connection.__class__.__name__} requires 'where' parameter type to be 'dict', "
-                    f"got {where.__class__.__name__!r}",
-                )
-
-            for key in where:
-                cls._validate_top_level_keys_in_where_parameter(key)
-            return where
-
-        @classmethod
-        def validate_hint(
-            cls,
-            connection: BaseDBConnection,
-            hint: Any,
-        ) -> dict | None:
-            if hint is None:
-                return None
-
-            if not isinstance(hint, dict):
-                raise ValueError(
-                    f"{connection.__class__.__name__} requires 'hint' parameter type to be 'dict', "
-                    f"got {hint.__class__.__name__!r}",
-                )
-            return hint
-
-        @classmethod
-        def prepare_pipeline(
-            cls,
-            pipeline: Any,
-        ) -> Any:
-            """
-            Prepares pipeline (list or dict) to MongoDB syntax, but without converting it to string.
-            """
-
-            if isinstance(pipeline, datetime):
-                return {"$date": pipeline.astimezone().isoformat()}
-
-            if isinstance(pipeline, Mapping):
-                return {cls.prepare_pipeline(key): cls.prepare_pipeline(value) for key, value in pipeline.items()}
-
-            if isinstance(pipeline, Iterable) and not isinstance(pipeline, str):
-                return [cls.prepare_pipeline(item) for item in pipeline]
-
-            return pipeline
-
-        @classmethod
-        def convert_to_str(
-            cls,
-            value: Any,
-        ) -> str:
-            """
-            Converts the given dictionary, list or primitive to a string.
-            """
-
-            return json.dumps(cls.prepare_pipeline(value))
-
-        @classmethod
-        def _merge_conditions(cls, conditions: list[Any]) -> Any:
-            if len(conditions) == 1:
-                return conditions[0]
-
-            return {"$and": conditions}
-
-        @classmethod
-        def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> dict:
-            """
-            Returns the comparison statement in MongoDB syntax:
-
-            .. code::
-
-                {
-                    "field": {
-                        "$gt": "some_value",
-                    }
-                }
-            """
-            return {
-                arg1: {
-                    cls._compare_statements[comparator]: arg2,
-                },
-            }
-
-        @classmethod
-        def _validate_top_level_keys_in_where_parameter(cls, key: str):
-            """
-            Checks the 'where' parameter for illegal operators, such as ``$match``, ``$merge`` or ``$changeStream``.
-
-            'where' clause can contain only filtering operators, like ``{"col1" {"$eq": 1}}`` or ``{"$and": [...]}``.
-            """
-            if key.startswith("$"):
-                if key == "$match":
-                    raise ValueError(
-                        "'$match' operator not allowed at the top level of the 'where' parameter dictionary. "
-                        "This error most likely occurred due to the fact that you used the MongoDB format for the "
-                        "pipeline {'$match': {'column': ...}}. In the onETL paradigm, you do not need to specify the "
-                        "'$match' keyword, but write the filtering condition right away, like {'column': ...}",
-                    )
-                if key in _upper_level_operators:  # noqa: WPS220
-                    raise ValueError(  # noqa: WPS220
-                        f"An invalid parameter {key!r} was specified in the 'where' "
-                        "field. You cannot use aggregations or 'groupBy' clauses in 'where'",
-                    )
-
     @slot
     def pipeline(
         self,
         collection: str,
         pipeline: dict | list[dict],
         df_schema: StructType | None = None,
-        options: PipelineOptions | dict | None = None,
+        options: MongoDBPipelineOptions | dict | None = None,
     ):
         """
         Execute a pipeline for a specific collection, and return DataFrame. |support_hooks|
@@ -807,7 +405,7 @@ def get_min_max_bounds(
         expression: str | None = None,  # noqa: U100
         hint: dict | None = None,  # noqa: U100
         where: dict | None = None,
-        options: ReadOptions | dict | None = None,
+        options: MongoDBReadOptions | dict | None = None,
     ) -> tuple[Any, Any]:
         log.info("|Spark| Getting min and max values for column %r", column)
 
@@ -853,7 +451,7 @@ def read_source_as_df(
         df_schema: StructType | None = None,
         start_from: Statement | None = None,
         end_at: Statement | None = None,
-        options: ReadOptions | dict | None = None,
+        options: MongoDBReadOptions | dict | None = None,
     ) -> DataFrame:
         read_options = self.ReadOptions.parse(options).dict(by_alias=True, exclude_none=True)
         final_where = self.Dialect._condition_assembler(
@@ -894,7 +492,7 @@ def write_df_to_target(
         self,
         df: DataFrame,
         target: str,
-        options: WriteOptions | dict | None = None,
+        options: MongoDBWriteOptions | dict | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
         write_options_dict = write_options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"})
diff --git a/onetl/connection/db_connection/mongodb/dialect.py b/onetl/connection/db_connection/mongodb/dialect.py
new file mode 100644
index 000000000..865288dea
--- /dev/null
+++ b/onetl/connection/db_connection/mongodb/dialect.py
@@ -0,0 +1,204 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import json
+import operator
+from datetime import datetime
+from typing import Any, Callable, ClassVar, Dict, Iterable, Mapping
+
+from onetl.base.base_db_connection import BaseDBConnection
+from onetl.connection.db_connection.db_connection.dialect import DBDialect
+from onetl.connection.db_connection.dialect_mixins import (
+    SupportColumnsNone,
+    SupportDfSchemaStruct,
+    SupportHWMColumnStr,
+    SupportHWMExpressionNone,
+    SupportTableWithoutDBSchema,
+)
+
+_upper_level_operators = frozenset(  # noqa: WPS527
+    [
+        "$addFields",
+        "$bucket",
+        "$bucketAuto",
+        "$changeStream",
+        "$collStats",
+        "$count",
+        "$currentOp",
+        "$densify",
+        "$documents",
+        "$facet",
+        "$fill",
+        "$geoNear",
+        "$graphLookup",
+        "$group",
+        "$indexStats",
+        "$limit",
+        "$listLocalSessions",
+        "$listSessions",
+        "$lookup",
+        "$merge",
+        "$out",
+        "$planCacheStats",
+        "$project",
+        "$redact",
+        "$replaceRoot",
+        "$replaceWith",
+        "$sample",
+        "$search",
+        "$searchMeta",
+        "$set",
+        "$setWindowFields",
+        "$shardedDataDistribution",
+        "$skip",
+        "$sort",
+        "$sortByCount",
+        "$unionWith",
+        "$unset",
+        "$unwind",
+    ],
+)
+
+
+class MongoDBDialect(  # noqa: WPS215
+    SupportTableWithoutDBSchema,
+    SupportHWMExpressionNone,
+    SupportColumnsNone,
+    SupportDfSchemaStruct,
+    SupportHWMColumnStr,
+    DBDialect,
+):
+    _compare_statements: ClassVar[Dict[Callable, str]] = {
+        operator.ge: "$gte",
+        operator.gt: "$gt",
+        operator.le: "$lte",
+        operator.lt: "$lt",
+        operator.eq: "$eq",
+        operator.ne: "$ne",
+    }
+
+    @classmethod
+    def validate_where(
+        cls,
+        connection: BaseDBConnection,
+        where: Any,
+    ) -> dict | None:
+        if where is None:
+            return None
+
+        if not isinstance(where, dict):
+            raise ValueError(
+                f"{connection.__class__.__name__} requires 'where' parameter type to be 'dict', "
+                f"got {where.__class__.__name__!r}",
+            )
+
+        for key in where:
+            cls._validate_top_level_keys_in_where_parameter(key)
+        return where
+
+    @classmethod
+    def validate_hint(
+        cls,
+        connection: BaseDBConnection,
+        hint: Any,
+    ) -> dict | None:
+        if hint is None:
+            return None
+
+        if not isinstance(hint, dict):
+            raise ValueError(
+                f"{connection.__class__.__name__} requires 'hint' parameter type to be 'dict', "
+                f"got {hint.__class__.__name__!r}",
+            )
+        return hint
+
+    @classmethod
+    def prepare_pipeline(
+        cls,
+        pipeline: Any,
+    ) -> Any:
+        """
+        Prepares pipeline (list or dict) to MongoDB syntax, but without converting it to string.
+        """
+
+        if isinstance(pipeline, datetime):
+            return {"$date": pipeline.astimezone().isoformat()}
+
+        if isinstance(pipeline, Mapping):
+            return {cls.prepare_pipeline(key): cls.prepare_pipeline(value) for key, value in pipeline.items()}
+
+        if isinstance(pipeline, Iterable) and not isinstance(pipeline, str):
+            return [cls.prepare_pipeline(item) for item in pipeline]
+
+        return pipeline
+
+    @classmethod
+    def convert_to_str(
+        cls,
+        value: Any,
+    ) -> str:
+        """
+        Converts the given dictionary, list or primitive to a string.
+        """
+
+        return json.dumps(cls.prepare_pipeline(value))
+
+    @classmethod
+    def _merge_conditions(cls, conditions: list[Any]) -> Any:
+        if len(conditions) == 1:
+            return conditions[0]
+
+        return {"$and": conditions}
+
+    @classmethod
+    def _get_compare_statement(cls, comparator: Callable, arg1: Any, arg2: Any) -> dict:
+        """
+        Returns the comparison statement in MongoDB syntax:
+
+        .. code::
+
+            {
+                "field": {
+                    "$gt": "some_value",
+                }
+            }
+        """
+        return {
+            arg1: {
+                cls._compare_statements[comparator]: arg2,
+            },
+        }
+
+    @classmethod
+    def _validate_top_level_keys_in_where_parameter(cls, key: str):
+        """
+        Checks the 'where' parameter for illegal operators, such as ``$match``, ``$merge`` or ``$changeStream``.
+
+        'where' clause can contain only filtering operators, like ``{"col1" {"$eq": 1}}`` or ``{"$and": [...]}``.
+        """
+        if key.startswith("$"):
+            if key == "$match":
+                raise ValueError(
+                    "'$match' operator not allowed at the top level of the 'where' parameter dictionary. "
+                    "This error most likely occurred due to the fact that you used the MongoDB format for the "
+                    "pipeline {'$match': {'column': ...}}. In the onETL paradigm, you do not need to specify the "
+                    "'$match' keyword, but write the filtering condition right away, like {'column': ...}",
+                )
+            if key in _upper_level_operators:  # noqa: WPS220
+                raise ValueError(  # noqa: WPS220
+                    f"An invalid parameter {key!r} was specified in the 'where' "
+                    "field. You cannot use aggregations or 'groupBy' clauses in 'where'",
+                )
diff --git a/onetl/connection/db_connection/mongodb/options.py b/onetl/connection/db_connection/mongodb/options.py
new file mode 100644
index 000000000..85f1935a3
--- /dev/null
+++ b/onetl/connection/db_connection/mongodb/options.py
@@ -0,0 +1,253 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+from enum import Enum
+
+from pydantic import Field, root_validator
+
+from onetl.impl import GenericOptions
+
+PIPELINE_PROHIBITED_OPTIONS = frozenset(
+    (
+        "uri",
+        "database",
+        "collection",
+        "pipeline",
+    ),
+)
+
+PROHIBITED_OPTIONS = frozenset(
+    (
+        "uri",
+        "database",
+        "collection",
+        "pipeline",
+        "hint",
+    ),
+)
+
+KNOWN_READ_OPTIONS = frozenset(
+    (
+        "localThreshold",
+        "readPreference.name",
+        "readPreference.tagSets",
+        "readConcern.level",
+        "sampleSize",
+        "samplePoolSize",
+        "partitioner",
+        "partitionerOptions",
+        "registerSQLHelperFunctions",
+        "sql.inferschema.mapTypes.enabled",
+        "sql.inferschema.mapTypes.minimumKeys",
+        "sql.pipeline.includeNullFilters",
+        "sql.pipeline.includeFiltersAndProjections",
+        "pipeline",
+        "hint",
+        "collation",
+        "allowDiskUse",
+        "batchSize",
+    ),
+)
+
+KNOWN_WRITE_OPTIONS = frozenset(
+    (
+        "extendedBsonTypes",
+        "localThreshold",
+        "replaceDocument",
+        "maxBatchSize",
+        "writeConcern.w",
+        "writeConcern.journal",
+        "writeConcern.wTimeoutMS",
+        "shardKey",
+        "forceInsert",
+        "ordered",
+    ),
+)
+
+
+class MongoDBCollectionExistBehavior(str, Enum):
+    APPEND = "append"
+    REPLACE_ENTIRE_COLLECTION = "replace_entire_collection"
+
+    def __str__(self) -> str:
+        return str(self.value)
+
+    @classmethod  # noqa: WPS120
+    def _missing_(cls, value: object):  # noqa: WPS120
+        if str(value) == "overwrite":
+            warnings.warn(
+                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `replace_entire_collection` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_ENTIRE_COLLECTION
+
+
+class MongoDBPipelineOptions(GenericOptions):
+    """Aggregation pipeline options for MongoDB connector.
+
+    The only difference from :obj:`MongoDBReadOptions` that it is allowed to pass the ``hint`` parameter.
+
+    .. note ::
+
+        You can pass any value
+        `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/read>`_,
+        even if it is not mentioned in this documentation.
+
+        The set of supported options depends on connector version. See link above.
+
+    .. warning::
+
+        Options ``uri``, ``database``, ``collection``, ``pipeline`` are populated from connection attributes,
+        and cannot be set in ``PipelineOptions`` class.
+
+    Examples
+    --------
+
+    Pipeline options initialization
+
+    .. code:: python
+
+        MongoDB.PipelineOptions(
+            hint="{'_id': 1}",
+        )
+    """
+
+    class Config:
+        prohibited_options = PIPELINE_PROHIBITED_OPTIONS
+        known_options = KNOWN_READ_OPTIONS
+        extra = "allow"
+
+
+class MongoDBReadOptions(GenericOptions):
+    """Reading options for MongoDB connector.
+
+    .. note ::
+
+        You can pass any value
+        `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/read>`_,
+        even if it is not mentioned in this documentation.
+
+        The set of supported options depends on connector version. See link above.
+
+    .. warning::
+
+        Options ``uri``, ``database``, ``collection``, ``pipeline``, ``hint`` are populated from connection
+        attributes, and cannot be set in ``ReadOptions`` class.
+
+    Examples
+    --------
+
+    Read options initialization
+
+    .. code:: python
+
+        MongoDB.ReadOptions(
+            batchSize=10000,
+        )
+    """
+
+    class Config:
+        prohibited_options = PROHIBITED_OPTIONS
+        known_options = KNOWN_READ_OPTIONS
+        extra = "allow"
+
+
+class MongoDBWriteOptions(GenericOptions):
+    """Writing options for MongoDB connector.
+
+    .. note ::
+
+        You can pass any value
+        `supported by connector <https://www.mongodb.com/docs/spark-connector/current/configuration/write/>`_,
+        even if it is not mentioned in this documentation.
+
+        The set of supported options depends on connector version. See link above.
+
+    .. warning::
+
+        Options ``uri``, ``database``, ``collection`` are populated from connection attributes,
+        and cannot be set in ``WriteOptions`` class.
+
+    Examples
+    --------
+
+    Write options initialization
+
+    .. code:: python
+
+        options = MongoDB.WriteOptions(
+            if_exists="append",
+            sampleSize=500,
+            localThreshold=20,
+        )
+    """
+
+    if_exists: MongoDBCollectionExistBehavior = Field(default=MongoDBCollectionExistBehavior.APPEND, alias="mode")
+    """Behavior of writing data into existing collection.
+
+    Possible values:
+        * ``append`` (default)
+            Adds new objects into existing collection.
+
+            .. dropdown:: Behavior in details
+
+            * Collection does not exist
+                Collection is created using options provided by user
+                (``shardkey`` and others).
+
+            * Collection exists
+                Data is appended to a collection.
+
+                .. warning::
+
+                    This mode does not check whether collection already contains
+                    objects from dataframe, so duplicated objects can be created.
+
+        * ``replace_entire_collection``
+            **Collection is deleted and then created**.
+
+            .. dropdown:: Behavior in details
+
+            * Collection does not exist
+                Collection is created using options provided by user
+                (``shardkey`` and others).
+
+            * Collection exists
+                Collection content is replaced with dataframe content.
+
+    .. note::
+
+        ``error`` and ``ignore`` modes are not supported.
+    """
+
+    class Config:
+        prohibited_options = PROHIBITED_OPTIONS
+        known_options = KNOWN_WRITE_OPTIONS
+        extra = "allow"
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `MongoDB.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `MongoDB.WriteOptions(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/setup.cfg b/setup.cfg
index fa71f2403..af73444ce 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -307,9 +307,6 @@ per-file-ignores =
     *connection.py:
 # WPS437 Found protected attribute usage: spark._sc._gateway
         WPS437,
-    onetl/connection/db_connection/mongodb.py:
-# WPS437 Found protected attribute usage: self.Dialect._
-        WPS437,
     onetl/connection/db_connection/jdbc_mixin/connection.py:
 # too few type annotations
         TAE001,

From 87cc783d06731a108a635fafe776c6a39f7b8359 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 22 Aug 2023 04:16:44 +0000
Subject: [PATCH 09/30] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/blacken-docs: 1.15.0 → 1.16.0](https://github.com/asottile/blacken-docs/compare/1.15.0...1.16.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ba0ab0b60..0145b90c6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -69,7 +69,7 @@ repos:
   - id: black
     language_version: python3
 - repo: https://github.com/asottile/blacken-docs
-  rev: 1.15.0
+  rev: 1.16.0
   hooks:
   - id: blacken-docs
 - repo: meta

From 7853f781c18781d21e55af2e216905478b70ea03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 15:36:19 +0000
Subject: [PATCH 10/30] [DOP-8140] Split HDFS and HDFS.Slots to separated
 submodules

---
 docs/connection/file_connection/hdfs.rst      |  20 -
 .../file_connection/hdfs/connection.rst       |   9 +
 .../connection/file_connection/hdfs/index.rst |  16 +
 .../connection/file_connection/hdfs/slots.rst |  10 +
 docs/connection/file_connection/index.rst     |   2 +-
 .../file_df_connection/spark_hdfs/slots.rst   |   2 +-
 .../file_connection/hdfs/__init__.py          |  16 +
 .../{hdfs.py => hdfs/connection.py}           | 388 +++---------------
 .../connection/file_connection/hdfs/slots.py  | 286 +++++++++++++
 .../spark_hdfs/connection.py                  |   2 +-
 setup.cfg                                     |   2 +-
 .../test_hdfs_file_connection_integration.py  |   8 +-
 12 files changed, 408 insertions(+), 353 deletions(-)
 delete mode 100644 docs/connection/file_connection/hdfs.rst
 create mode 100644 docs/connection/file_connection/hdfs/connection.rst
 create mode 100644 docs/connection/file_connection/hdfs/index.rst
 create mode 100644 docs/connection/file_connection/hdfs/slots.rst
 create mode 100644 onetl/connection/file_connection/hdfs/__init__.py
 rename onetl/connection/file_connection/{hdfs.py => hdfs/connection.py} (68%)
 create mode 100644 onetl/connection/file_connection/hdfs/slots.py

diff --git a/docs/connection/file_connection/hdfs.rst b/docs/connection/file_connection/hdfs.rst
deleted file mode 100644
index 38752a467..000000000
--- a/docs/connection/file_connection/hdfs.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. _hdfs:
-
-HDFS connection
-===============
-
-.. currentmodule:: onetl.connection.file_connection.hdfs
-
-.. autosummary::
-
-    HDFS
-    HDFS.Slots
-
-.. autoclass:: HDFS
-    :members: __init__, check, path_exists, is_file, is_dir, get_stat, resolve_dir, resolve_file, create_dir, remove_file, remove_dir, rename_dir, rename_file, list_dir, walk, download_file, upload_file
-
-.. currentmodule:: onetl.connection.file_connection.hdfs.HDFS
-
-.. autoclass:: Slots
-    :members: normalize_cluster_name, normalize_namenode_name, get_known_clusters, get_cluster_namenodes, get_current_cluster, get_webhdfs_port, is_namenode_active
-    :member-order: bysource
diff --git a/docs/connection/file_connection/hdfs/connection.rst b/docs/connection/file_connection/hdfs/connection.rst
new file mode 100644
index 000000000..7fd657571
--- /dev/null
+++ b/docs/connection/file_connection/hdfs/connection.rst
@@ -0,0 +1,9 @@
+.. _hdfs-connection:
+
+HDFS connection
+===============
+
+.. currentmodule:: onetl.connection.file_connection.hdfs.connection
+
+.. autoclass:: HDFS
+    :members: get_current, check, path_exists, is_file, is_dir, get_stat, resolve_dir, resolve_file, create_dir, remove_file, remove_dir, rename_dir, rename_file, list_dir, walk, download_file, upload_file
diff --git a/docs/connection/file_connection/hdfs/index.rst b/docs/connection/file_connection/hdfs/index.rst
new file mode 100644
index 000000000..a9d57a7a5
--- /dev/null
+++ b/docs/connection/file_connection/hdfs/index.rst
@@ -0,0 +1,16 @@
+.. _hdfs:
+
+HDFS
+====
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: For developers
+
+    slots
diff --git a/docs/connection/file_connection/hdfs/slots.rst b/docs/connection/file_connection/hdfs/slots.rst
new file mode 100644
index 000000000..2128b328c
--- /dev/null
+++ b/docs/connection/file_connection/hdfs/slots.rst
@@ -0,0 +1,10 @@
+.. _hdfs-slots:
+
+HDFS Slots
+==========
+
+.. currentmodule:: onetl.connection.file_connection.hdfs.slots
+
+.. autoclass:: HDFSSlots
+    :members: normalize_cluster_name, normalize_namenode_host, get_known_clusters, get_cluster_namenodes, get_current_cluster, get_webhdfs_port, is_namenode_active
+    :member-order: bysource
diff --git a/docs/connection/file_connection/index.rst b/docs/connection/file_connection/index.rst
index c47ddf75c..2fc998c7f 100644
--- a/docs/connection/file_connection/index.rst
+++ b/docs/connection/file_connection/index.rst
@@ -9,7 +9,7 @@ File Connections
 
     FTP <ftp>
     FTPS <ftps>
-    HDFS <hdfs>
+    HDFS <hdfs/index>
     SFTP <sftp>
     S3 <s3>
     Webdav <webdav>
diff --git a/docs/connection/file_df_connection/spark_hdfs/slots.rst b/docs/connection/file_df_connection/spark_hdfs/slots.rst
index 6adb4e1f0..3797c54ca 100644
--- a/docs/connection/file_df_connection/spark_hdfs/slots.rst
+++ b/docs/connection/file_df_connection/spark_hdfs/slots.rst
@@ -6,5 +6,5 @@ Spark HDFS Slots
 .. currentmodule:: onetl.connection.file_df_connection.spark_hdfs.slots
 
 .. autoclass:: SparkHDFSSlots
-    :members: normalize_cluster_name, normalize_namenode_name, get_known_clusters, get_cluster_namenodes, get_current_cluster, get_ipc_port, is_namenode_active
+    :members: normalize_cluster_name, normalize_namenode_host, get_known_clusters, get_cluster_namenodes, get_current_cluster, get_ipc_port, is_namenode_active
     :member-order: bysource
diff --git a/onetl/connection/file_connection/hdfs/__init__.py b/onetl/connection/file_connection/hdfs/__init__.py
new file mode 100644
index 000000000..56be3211c
--- /dev/null
+++ b/onetl/connection/file_connection/hdfs/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.file_connection.hdfs.connection import HDFS
+from onetl.connection.file_connection.hdfs.slots import HDFSSlots
diff --git a/onetl/connection/file_connection/hdfs.py b/onetl/connection/file_connection/hdfs/connection.py
similarity index 68%
rename from onetl/connection/file_connection/hdfs.py
rename to onetl/connection/file_connection/hdfs/connection.py
index 31a4c3012..20869909c 100644
--- a/onetl/connection/file_connection/hdfs.py
+++ b/onetl/connection/file_connection/hdfs/connection.py
@@ -25,6 +25,7 @@
 
 from onetl.base import PathStatProtocol
 from onetl.connection.file_connection.file_connection import FileConnection
+from onetl.connection.file_connection.hdfs.slots import HDFSSlots
 from onetl.connection.file_connection.mixins.rename_dir_mixin import RenameDirMixin
 from onetl.connection.kerberos_helpers import kinit
 from onetl.hooks import slot, support_hooks
@@ -88,7 +89,8 @@ class HDFS(FileConnection, RenameDirMixin):
         Used for:
             * HWM and lineage (as instance name for file paths), if set.
             * Validation of ``host`` value,
-                if latter is passed and if some hooks are bound to :obj:`~slots.get_cluster_namenodes`.
+                if latter is passed and if some hooks are bound to
+                :obj:`Slots.get_cluster_namenodes <onetl.connection.file_connection.hdfs.slots.HDFSSlots.get_cluster_namenodes>`
 
         .. warning:
 
@@ -100,7 +102,8 @@ class HDFS(FileConnection, RenameDirMixin):
         Should be an active namenode (NOT standby).
 
         If value is not set, but there are some hooks bound to
-        :obj:`~slots.get_cluster_namenodes` and :obj:`~slots.is_namenode_active`,
+        :obj:`Slots.get_cluster_namenodes <onetl.connection.file_connection.hdfs.slots.HDFSSlots.get_cluster_namenodes>`
+        and :obj:`Slots.is_namenode_active <onetl.connection.file_connection.hdfs.slots.HDFSSlots.is_namenode_active>`,
         onETL will iterate over cluster namenodes to detect which one is active.
 
         .. warning:
@@ -110,7 +113,8 @@ class HDFS(FileConnection, RenameDirMixin):
     webhdfs_port : int, default: ``50070``
         Port of Hadoop namenode (WebHDFS protocol).
 
-        If omitted, but there are some hooks bound to :obj:`~slots.get_webhdfs_port` slot,
+        If omitted, but there are some hooks bound to
+        :obj:`Slots.get_webhdfs_port <onetl.connection.file_connection.hdfs.slots.HDFSSlots.get_webhdfs_port>` slot,
         onETL will try to detect port number for a specific ``cluster``.
 
     user : str, optional
@@ -202,287 +206,74 @@ class HDFS(FileConnection, RenameDirMixin):
         ).check()
     """
 
-    @support_hooks
-    class Slots:
-        """Slots that could be implemented by third-party plugins"""
-
-        @slot
-        @staticmethod
-        def normalize_cluster_name(cluster: str) -> str | None:
-            """
-            Normalize cluster name passed into HDFS constructor.
-
-            If hooks didn't return anything, cluster name is left intact.
-
-            Parameters
-            ----------
-            cluster : :obj:`str`
-                Cluster name
-
-            Returns
-            -------
-            str | None
-                Normalized cluster name.
-
-                If hook cannot be applied to a specific cluster, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.normalize_cluster_name.bind
-                @hook
-                def normalize_cluster_name(cluster: str) -> str:
-                    return cluster.lower()
-            """
-
-        @slot
-        @staticmethod
-        def normalize_namenode_host(host: str, cluster: str | None) -> str | None:
-            """
-            Normalize namenode host passed into HDFS constructor.
-
-            If hooks didn't return anything, host is left intact.
-
-            Parameters
-            ----------
-            host : :obj:`str`
-                Namenode host (raw)
-
-            cluster : :obj:`str` or :obj:`None`
-                Cluster name (normalized), if set
-
-            Returns
-            -------
-            str | None
-                Normalized namenode host name.
-
-                If hook cannot be applied to a specific host name, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.normalize_namenode_host.bind
-                @hook
-                def normalize_namenode_host(host: str, cluster: str) -> str | None:
-                    if cluster == "rnd-dwh":
-                        if not host.endswith(".domain.com"):
-                            # fix missing domain name
-                            host += ".domain.com"
-                        return host
-
-                    return None
-            """
-
-        @slot
-        @staticmethod
-        def get_known_clusters() -> set[str] | None:
-            """
-            Return collection of known clusters.
-
-            Cluster passed into HDFS constructor should be present in this list.
-            If hooks didn't return anything, no validation will be performed.
-
-            Returns
-            -------
-            set[str] | None
-                Collection of cluster names (in normalized form).
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.get_known_clusters.bind
-                @hook
-                def get_known_clusters() -> str[str]:
-                    return {"rnd-dwh", "rnd-prod"}
-            """
-
-        @slot
-        @staticmethod
-        def get_cluster_namenodes(cluster: str) -> set[str] | None:
-            """
-            Return collection of known namenodes for the cluster.
-
-            Namenode host passed into HDFS constructor should be present in this list.
-            If hooks didn't return anything, no validation will be performed.
-
-            Parameters
-            ----------
-            cluster : :obj:`str`
-                Cluster name (normalized)
-
-            Returns
-            -------
-            set[str] | None
-                Collection of host names (in normalized form).
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.get_cluster_namenodes.bind
-                @hook
-                def get_cluster_namenodes(cluster: str) -> str[str] | None:
-                    if cluster == "rnd-dwh":
-                        return {"namenode1.domain.com", "namenode2.domain.com"}
-                    return None
-            """
-
-        @slot
-        @staticmethod
-        def get_current_cluster() -> str | None:
-            """
-            Get current cluster name.
-
-            Used in :obj:`~get_current_cluster` to  automatically fill up ``cluster`` attribute of a connection.
-            If hooks didn't return anything, calling the method above will raise an exception.
-
-            Returns
-            -------
-            str | None
-                Current cluster name (in normalized form).
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.get_current_cluster.bind
-                @hook
-                def get_current_cluster() -> str:
-                    # some magic here
-                    return "rnd-dwh"
-            """
-
-        @slot
-        @staticmethod
-        def get_webhdfs_port(cluster: str) -> int | None:
-            """
-            Get WebHDFS port number for a specific cluster.
-
-            Used by constructor to automatically set port number if omitted.
-
-            Parameters
-            ----------
-            cluster : :obj:`str`
-                Cluster name (normalized)
-
-            Returns
-            -------
-            int | None
-                WebHDFS port number.
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
-
-
-                @HDFS.Slots.get_webhdfs_port.bind
-                @hook
-                def get_webhdfs_port(cluster: str) -> int | None:
-                    if cluster == "rnd-dwh":
-                        return 50007  # Cloudera
-                    return None
-            """
-
-        @slot
-        @staticmethod
-        def is_namenode_active(host: str, cluster: str | None) -> bool | None:
-            """
-            Check whether a namenode of a specified cluster is active (=not standby) or not.
+    cluster: Optional[Cluster] = None
+    host: Optional[Host] = None
+    webhdfs_port: int = Field(alias="port", default=50070)
+    user: Optional[str] = None
+    password: Optional[SecretStr] = None
+    keytab: Optional[FilePath] = None
+    timeout: int = 10
 
-            Used for:
-                * If HDFS connection is created without ``host``
+    Slots = HDFSSlots
+    # TODO: remove in v1.0.0
+    slots = Slots
 
-                    Connector will iterate over :obj:`~get_cluster_namenodes` of a cluster to get active namenode,
-                    and then use it instead of ``host`` attribute.
+    @slot
+    @classmethod
+    def get_current(cls, **kwargs):
+        """
+        Create connection for current cluster. |support_hooks|
 
-                * If HDFS connection is created with ``host``
+        Automatically sets up current cluster name as ``cluster``.
 
-                    :obj:`~check` will determine whether this host is active.
+        .. note::
 
-            Parameters
-            ----------
-            host : :obj:`str`
-                Namenode host (normalized)
+            Can be used only if there are a some hooks bound to slot
+            :obj:`Slots.get_current_cluster <onetl.connection.file_connection.hdfs.slots.HDFSSlots.get_current_cluster>`
 
-            cluster : :obj:`str` or :obj:`None`
-                Cluster name (normalized), if set
+        Parameters
+        ----------
+        user : str
+        password : str | None
+        keytab : str | None
+        timeout : int
 
-            Returns
-            -------
-            bool | None
-                ``True`` if namenode is active, ``False`` if not.
+            See :obj:`~HDFS` constructor documentation.
 
-                If hook cannot be applied, it should return ``None``.
+        Examples
+        --------
 
-            Examples
-            --------
+        .. code:: python
 
-            .. code:: python
+            from onetl.connection import HDFS
 
-                from onetl.connection import HDFS
-                from onetl.hooks import hook
+            # injecting current cluster name via hooks mechanism
+            hdfs = HDFS.get_current(user="me", password="pass")
+        """
 
+        log.info("|%s| Detecting current cluster...", cls.__name__)
+        current_cluster = cls.Slots.get_current_cluster()
+        if not current_cluster:
+            raise RuntimeError(
+                f"{cls.__name__}.get_current() can be used only if there are "
+                f"some hooks bound to {cls.__name__}.Slots.get_current_cluster",
+            )
 
-                @HDFS.Slots.is_namenode_active.bind
-                @hook
-                def is_namenode_active(host: str, cluster: str | None) -> bool:
-                    # some magic here
-                    return True
-            """
+        log.info("|%s|   Got %r", cls.__name__, current_cluster)
+        return cls(cluster=current_cluster, **kwargs)
 
-    # TODO: remove in v1.0.0
-    slots = Slots
+    @property
+    def instance_url(self) -> str:
+        if self.cluster:
+            return self.cluster
+        return f"hdfs://{self.host}:{self.webhdfs_port}"
 
-    cluster: Optional[Cluster] = None
-    host: Optional[Host] = None
-    webhdfs_port: int = Field(alias="port", default=50070)
-    user: Optional[str] = None
-    password: Optional[SecretStr] = None
-    keytab: Optional[FilePath] = None
-    timeout: int = 10
+    @slot
+    def path_exists(self, path: os.PathLike | str) -> bool:
+        return self.client.status(os.fspath(path), strict=False)
 
     @validator("user", pre=True)
-    def validate_packages(cls, user):
+    def _validate_packages(cls, user):
         if user:
             try:
                 from hdfs.ext.kerberos import KerberosClient as CheckForKerberosSupport
@@ -507,7 +298,7 @@ def validate_packages(cls, user):
         return user
 
     @root_validator
-    def validate_cluster_or_hostname_set(cls, values):
+    def _validate_cluster_or_hostname_set(cls, values):
         host = values.get("host")
         cluster = values.get("cluster")
 
@@ -517,7 +308,7 @@ def validate_cluster_or_hostname_set(cls, values):
         return values
 
     @validator("cluster")
-    def validate_cluster_name(cls, cluster):
+    def _validate_cluster_name(cls, cluster):
         log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
         validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
         if validated_cluster != cluster:
@@ -533,7 +324,7 @@ def validate_cluster_name(cls, cluster):
         return validated_cluster
 
     @validator("host")
-    def validate_host_name(cls, host, values):
+    def _validate_host_name(cls, host, values):
         cluster = values.get("cluster")
 
         log.debug("|%s| Normalizing namenode %r ...", cls.__name__, host)
@@ -553,7 +344,7 @@ def validate_host_name(cls, host, values):
         return namenode
 
     @validator("webhdfs_port", always=True)
-    def validate_port_number(cls, port, values):
+    def _validate_port_number(cls, port, values):
         cluster = values.get("cluster")
         if cluster:
             log.debug("|%s| Getting WebHDFS port of cluster %r ...", cls.__name__, cluster)
@@ -565,7 +356,7 @@ def validate_port_number(cls, port, values):
         return port
 
     @root_validator
-    def validate_credentials(cls, values):
+    def _validate_credentials(cls, values):
         user = values.get("user")
         password = values.get("password")
         keytab = values.get("keytab")
@@ -577,59 +368,6 @@ def validate_credentials(cls, values):
 
         return values
 
-    @slot
-    @classmethod
-    def get_current(cls, **kwargs):
-        """
-        Create connection for current cluster. |support_hooks|
-
-        Automatically sets up current cluster name as ``cluster``.
-
-        .. note::
-
-            Can be used only if there are a some hooks bound to slot :obj:`~slots.get_current_cluster`.
-
-        Parameters
-        ----------
-        user : str
-        password : str | None
-        keytab : str | None
-        timeout : int
-
-            See :obj:`~HDFS` constructor documentation.
-
-        Examples
-        --------
-
-        .. code:: python
-
-            from onetl.connection import HDFS
-
-            # injecting current cluster name via hooks mechanism
-            hdfs = HDFS.get_current(user="me", password="pass")
-        """
-
-        log.info("|%s| Detecting current cluster...", cls.__name__)
-        current_cluster = cls.Slots.get_current_cluster()
-        if not current_cluster:
-            raise RuntimeError(
-                f"{cls.__name__}.get_current() can be used only if there are "
-                f"some hooks bound to {cls.__name__}.Slots.get_current_cluster",
-            )
-
-        log.info("|%s|   Got %r", cls.__name__, current_cluster)
-        return cls(cluster=current_cluster, **kwargs)
-
-    @property
-    def instance_url(self) -> str:
-        if self.cluster:
-            return self.cluster
-        return f"hdfs://{self.host}:{self.webhdfs_port}"
-
-    @slot
-    def path_exists(self, path: os.PathLike | str) -> bool:
-        return self.client.status(os.fspath(path), strict=False)
-
     def _get_active_namenode(self) -> str:
         class_name = self.__class__.__name__
         log.info("|%s| Detecting active namenode of cluster %r ...", class_name, self.cluster)
diff --git a/onetl/connection/file_connection/hdfs/slots.py b/onetl/connection/file_connection/hdfs/slots.py
new file mode 100644
index 000000000..c57e69af4
--- /dev/null
+++ b/onetl/connection/file_connection/hdfs/slots.py
@@ -0,0 +1,286 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from onetl.hooks import slot, support_hooks
+
+
+@support_hooks
+class HDFSSlots:
+    """Slots that could be implemented by third-party plugins"""
+
+    @slot
+    @staticmethod
+    def normalize_cluster_name(cluster: str) -> str | None:
+        """
+        Normalize cluster name passed into HDFS constructor.
+
+        If hooks didn't return anything, cluster name is left intact.
+
+        Parameters
+        ----------
+        cluster : :obj:`str`
+            Cluster name
+
+        Returns
+        -------
+        str | None
+            Normalized cluster name.
+
+            If hook cannot be applied to a specific cluster, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.normalize_cluster_name.bind
+            @hook
+            def normalize_cluster_name(cluster: str) -> str:
+                return cluster.lower()
+        """
+
+    @slot
+    @staticmethod
+    def normalize_namenode_host(host: str, cluster: str | None) -> str | None:
+        """
+        Normalize namenode host passed into HDFS constructor.
+
+        If hooks didn't return anything, host is left intact.
+
+        Parameters
+        ----------
+        host : :obj:`str`
+            Namenode host (raw)
+
+        cluster : :obj:`str` or :obj:`None`
+            Cluster name (normalized), if set
+
+        Returns
+        -------
+        str | None
+            Normalized namenode host name.
+
+            If hook cannot be applied to a specific host name, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.normalize_namenode_host.bind
+            @hook
+            def normalize_namenode_host(host: str, cluster: str) -> str | None:
+                if cluster == "rnd-dwh":
+                    if not host.endswith(".domain.com"):
+                        # fix missing domain name
+                        host += ".domain.com"
+                    return host
+
+                return None
+        """
+
+    @slot
+    @staticmethod
+    def get_known_clusters() -> set[str] | None:
+        """
+        Return collection of known clusters.
+
+        Cluster passed into HDFS constructor should be present in this list.
+        If hooks didn't return anything, no validation will be performed.
+
+        Returns
+        -------
+        set[str] | None
+            Collection of cluster names (in normalized form).
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.get_known_clusters.bind
+            @hook
+            def get_known_clusters() -> str[str]:
+                return {"rnd-dwh", "rnd-prod"}
+        """
+
+    @slot
+    @staticmethod
+    def get_cluster_namenodes(cluster: str) -> set[str] | None:
+        """
+        Return collection of known namenodes for the cluster.
+
+        Namenode host passed into HDFS constructor should be present in this list.
+        If hooks didn't return anything, no validation will be performed.
+
+        Parameters
+        ----------
+        cluster : :obj:`str`
+            Cluster name (normalized)
+
+        Returns
+        -------
+        set[str] | None
+            Collection of host names (in normalized form).
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.get_cluster_namenodes.bind
+            @hook
+            def get_cluster_namenodes(cluster: str) -> str[str] | None:
+                if cluster == "rnd-dwh":
+                    return {"namenode1.domain.com", "namenode2.domain.com"}
+                return None
+        """
+
+    @slot
+    @staticmethod
+    def get_current_cluster() -> str | None:
+        """
+        Get current cluster name.
+
+        Used in :obj:`~get_current_cluster` to  automatically fill up ``cluster`` attribute of a connection.
+        If hooks didn't return anything, calling the method above will raise an exception.
+
+        Returns
+        -------
+        str | None
+            Current cluster name (in normalized form).
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.get_current_cluster.bind
+            @hook
+            def get_current_cluster() -> str:
+                # some magic here
+                return "rnd-dwh"
+        """
+
+    @slot
+    @staticmethod
+    def get_webhdfs_port(cluster: str) -> int | None:
+        """
+        Get WebHDFS port number for a specific cluster.
+
+        Used by constructor to automatically set port number if omitted.
+
+        Parameters
+        ----------
+        cluster : :obj:`str`
+            Cluster name (normalized)
+
+        Returns
+        -------
+        int | None
+            WebHDFS port number.
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.get_webhdfs_port.bind
+            @hook
+            def get_webhdfs_port(cluster: str) -> int | None:
+                if cluster == "rnd-dwh":
+                    return 50007  # Cloudera
+                return None
+        """
+
+    @slot
+    @staticmethod
+    def is_namenode_active(host: str, cluster: str | None) -> bool | None:
+        """
+        Check whether a namenode of a specified cluster is active (=not standby) or not.
+
+        Used for:
+            * If HDFS connection is created without ``host``
+
+                Connector will iterate over :obj:`~get_cluster_namenodes` of a cluster to get active namenode,
+                and then use it instead of ``host`` attribute.
+
+            * If HDFS connection is created with ``host``
+
+                :obj:`~check` will determine whether this host is active.
+
+        Parameters
+        ----------
+        host : :obj:`str`
+            Namenode host (normalized)
+
+        cluster : :obj:`str` or :obj:`None`
+            Cluster name (normalized), if set
+
+        Returns
+        -------
+        bool | None
+            ``True`` if namenode is active, ``False`` if not.
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import HDFS
+            from onetl.hooks import hook
+
+
+            @HDFS.Slots.is_namenode_active.bind
+            @hook
+            def is_namenode_active(host: str, cluster: str | None) -> bool:
+                # some magic here
+                return True
+        """
diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py
index 021230779..91ce7e66c 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/connection.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py
@@ -75,7 +75,7 @@ class SparkHDFS(SparkFileDFConnection):
         Supports only reading files as Spark DataFrame and writing DataFrame to files.
 
         Does NOT support file operations, like create, delete, rename, etc. For these operations,
-        use :obj:`HDFS <onetl.connection.file_connection.hdfs.HDFS>` connection.
+        use :obj:`HDFS <onetl.connection.file_connection.hdfs.connection.HDFS>` connection.
 
     Parameters
     ----------
diff --git a/setup.cfg b/setup.cfg
index af73444ce..a2caa1331 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -335,7 +335,7 @@ per-file-ignores =
     onetl/connection/file_connection/file_connection.py:
 # WPS220: Found too deep nesting
         WPS220,
-    onetl/connection/file_connection/hdfs.py:
+    onetl/connection/file_connection/hdfs/connection.py:
 # E800  Found commented out code
         E800,
 # F401 'hdfs.ext.kerberos.KerberosClient as CheckForKerberosSupport' imported but unused
diff --git a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
index a7600e166..ba742acdb 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
@@ -30,9 +30,9 @@ def test_hdfs_file_connection_check_anonymous(hdfs_file_connection, caplog):
 
 def test_hdfs_file_connection_check_with_keytab(mocker, hdfs_server, caplog, request, tmp_path_factory):
     from onetl.connection import HDFS
-    from onetl.connection.file_connection import hdfs
+    from onetl.connection.file_connection.hdfs import connection
 
-    mocker.patch.object(hdfs, "kinit")
+    mocker.patch.object(connection, "kinit")
 
     folder: Path = tmp_path_factory.mktemp("keytab")
     folder.mkdir(exist_ok=True, parents=True)
@@ -63,9 +63,9 @@ def finalizer():
 
 def test_hdfs_file_connection_check_with_password(mocker, hdfs_server, caplog):
     from onetl.connection import HDFS
-    from onetl.connection.file_connection import hdfs
+    from onetl.connection.file_connection.hdfs import connection
 
-    mocker.patch.object(hdfs, "kinit")
+    mocker.patch.object(connection, "kinit")
 
     hdfs = HDFS(host=hdfs_server.host, port=hdfs_server.webhdfs_port, user=getuser(), password="somepass")
 

From 710c0b0ab60020c2e2375eb57c1a775a0fb1fbf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 18 Aug 2023 12:22:10 +0000
Subject: [PATCH 11/30] [DOP-8140] Split Hive.Dialect, .Slots and .*Options to
 separated submodules

---
 docs/connection/db_connection/hive.rst        |  26 -
 .../db_connection/hive/connection.rst         |  10 +
 .../connection/db_connection/hive/execute.rst |   8 +
 docs/connection/db_connection/hive/index.rst  |  24 +
 docs/connection/db_connection/hive/read.rst   |  13 +
 docs/connection/db_connection/hive/slots.rst  |  10 +
 docs/connection/db_connection/hive/write.rst  |  13 +
 docs/connection/db_connection/index.rst       |   2 +-
 .../connection/db_connection/hive/__init__.py |  22 +
 .../{hive.py => hive/connection.py}           | 498 ++----------------
 .../connection/db_connection/hive/dialect.py  |  39 ++
 .../connection/db_connection/hive/options.py  | 337 ++++++++++++
 onetl/connection/db_connection/hive/slots.py  | 120 +++++
 .../spark_hdfs/connection.py                  |   3 +-
 setup.cfg                                     |   3 -
 .../test_db_options_unit.py                   |   4 +-
 16 files changed, 636 insertions(+), 496 deletions(-)
 delete mode 100644 docs/connection/db_connection/hive.rst
 create mode 100644 docs/connection/db_connection/hive/connection.rst
 create mode 100644 docs/connection/db_connection/hive/execute.rst
 create mode 100644 docs/connection/db_connection/hive/index.rst
 create mode 100644 docs/connection/db_connection/hive/read.rst
 create mode 100644 docs/connection/db_connection/hive/slots.rst
 create mode 100644 docs/connection/db_connection/hive/write.rst
 create mode 100644 onetl/connection/db_connection/hive/__init__.py
 rename onetl/connection/db_connection/{hive.py => hive/connection.py} (51%)
 create mode 100644 onetl/connection/db_connection/hive/dialect.py
 create mode 100644 onetl/connection/db_connection/hive/options.py
 create mode 100644 onetl/connection/db_connection/hive/slots.py

diff --git a/docs/connection/db_connection/hive.rst b/docs/connection/db_connection/hive.rst
deleted file mode 100644
index 7cb09ad53..000000000
--- a/docs/connection/db_connection/hive.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-.. _hive:
-
-Hive connection
-===============
-
-.. currentmodule:: onetl.connection.db_connection.hive
-
-.. autosummary::
-
-    Hive
-    Hive.WriteOptions
-    Hive.Slots
-
-.. autoclass:: Hive
-    :members: get_current, check, sql, execute
-    :member-order: bysource
-
-.. currentmodule:: onetl.connection.db_connection.hive.Hive
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, format, partition_by, bucket_by, sort_by, compression
-    :member-order: bysource
-
-.. autoclass:: Slots
-    :members: normalize_cluster_name, get_known_clusters, get_current_cluster
-    :member-order: bysource
diff --git a/docs/connection/db_connection/hive/connection.rst b/docs/connection/db_connection/hive/connection.rst
new file mode 100644
index 000000000..cbc51eac3
--- /dev/null
+++ b/docs/connection/db_connection/hive/connection.rst
@@ -0,0 +1,10 @@
+.. _hive-connection:
+
+Hive Connection
+===============
+
+.. currentmodule:: onetl.connection.db_connection.hive.connection
+
+.. autoclass:: Hive
+    :members: get_current, check
+    :member-order: bysource
diff --git a/docs/connection/db_connection/hive/execute.rst b/docs/connection/db_connection/hive/execute.rst
new file mode 100644
index 000000000..ae32e61d2
--- /dev/null
+++ b/docs/connection/db_connection/hive/execute.rst
@@ -0,0 +1,8 @@
+.. _hive-execute:
+
+Executing statements in Hive
+============================
+
+.. currentmodule:: onetl.connection.db_connection.hive.connection
+
+.. automethod:: Hive.execute
diff --git a/docs/connection/db_connection/hive/index.rst b/docs/connection/db_connection/hive/index.rst
new file mode 100644
index 000000000..9dd900b07
--- /dev/null
+++ b/docs/connection/db_connection/hive/index.rst
@@ -0,0 +1,24 @@
+.. _hive:
+
+Hive
+====
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
+
+.. toctree::
+    :maxdepth: 1
+    :caption: For developers
+
+    slots
diff --git a/docs/connection/db_connection/hive/read.rst b/docs/connection/db_connection/hive/read.rst
new file mode 100644
index 000000000..a9961b4ab
--- /dev/null
+++ b/docs/connection/db_connection/hive/read.rst
@@ -0,0 +1,13 @@
+.. _hive-read:
+
+Reading from Hive
+=================
+
+There are 2 ways of distributed data reading from Hive:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`Hive.sql <onetl.connection.db_connection.hive.connection.Hive.sql>`
+
+.. currentmodule:: onetl.connection.db_connection.hive.connection
+
+.. automethod:: Hive.sql
diff --git a/docs/connection/db_connection/hive/slots.rst b/docs/connection/db_connection/hive/slots.rst
new file mode 100644
index 000000000..f60dc34e5
--- /dev/null
+++ b/docs/connection/db_connection/hive/slots.rst
@@ -0,0 +1,10 @@
+.. _hive-slots:
+
+Hive Slots
+==========
+
+.. currentmodule:: onetl.connection.db_connection.hive.slots
+
+.. autoclass:: HiveSlots
+    :members: normalize_cluster_name, get_known_clusters, get_current_cluster
+    :member-order: bysource
diff --git a/docs/connection/db_connection/hive/write.rst b/docs/connection/db_connection/hive/write.rst
new file mode 100644
index 000000000..70c9f3099
--- /dev/null
+++ b/docs/connection/db_connection/hive/write.rst
@@ -0,0 +1,13 @@
+.. _hive-write:
+
+Writing to Hive
+===============
+
+For writing data to Hive, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.hive.options
+
+.. autopydantic_model:: HiveWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/index.rst b/docs/connection/db_connection/index.rst
index d8e2b7c77..caa5e5a94 100644
--- a/docs/connection/db_connection/index.rst
+++ b/docs/connection/db_connection/index.rst
@@ -10,7 +10,7 @@ DB Connections
     Clickhouse <clickhouse>
     Greenplum <greenplum/index>
     Kafka <kafka/index>
-    Hive <hive>
+    Hive <hive/index>
     MongoDB <mongodb/index>
     MSSQL <mssql>
     MySQL <mysql>
diff --git a/onetl/connection/db_connection/hive/__init__.py b/onetl/connection/db_connection/hive/__init__.py
new file mode 100644
index 000000000..54a00f004
--- /dev/null
+++ b/onetl/connection/db_connection/hive/__init__.py
@@ -0,0 +1,22 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.hive.connection import Hive
+from onetl.connection.db_connection.hive.dialect import HiveDialect
+from onetl.connection.db_connection.hive.options import (
+    HiveLegacyOptions,
+    HiveTableExistBehavior,
+    HiveWriteOptions,
+)
+from onetl.connection.db_connection.hive.slots import HiveSlots
diff --git a/onetl/connection/db_connection/hive.py b/onetl/connection/db_connection/hive/connection.py
similarity index 51%
rename from onetl/connection/db_connection/hive.py
rename to onetl/connection/db_connection/hive/connection.py
index fe80d0c35..e8d5a916d 100644
--- a/onetl/connection/db_connection/hive.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -15,33 +15,24 @@
 from __future__ import annotations
 
 import logging
-import warnings
-from enum import Enum
 from textwrap import dedent
-from typing import TYPE_CHECKING, Any, ClassVar, Iterable, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Tuple
 
-from deprecated import deprecated
 from etl_entities.instance import Cluster
-from pydantic import Field, root_validator, validator
+from pydantic import validator
 
 from onetl._internal import clear_statement, get_sql_query
 from onetl._util.spark import inject_spark_param
 from onetl.connection.db_connection.db_connection import DBConnection
-from onetl.connection.db_connection.db_connection.dialect import DBDialect
-from onetl.connection.db_connection.dialect_mixins import (
-    SupportColumnsList,
-    SupportDfSchemaNone,
-    SupportHintStr,
-    SupportHWMColumnStr,
-    SupportHWMExpressionStr,
-    SupportWhereStr,
-)
-from onetl.connection.db_connection.dialect_mixins.support_table_with_dbschema import (
-    SupportTableWithDBSchema,
+from onetl.connection.db_connection.hive.dialect import HiveDialect
+from onetl.connection.db_connection.hive.options import (
+    HiveLegacyOptions,
+    HiveTableExistBehavior,
+    HiveWriteOptions,
 )
+from onetl.connection.db_connection.hive.slots import HiveSlots
 from onetl.hooks import slot, support_hooks
 from onetl.hwm import Statement
-from onetl.impl import GenericOptions
 from onetl.log import log_lines, log_with_indent
 
 if TYPE_CHECKING:
@@ -52,44 +43,6 @@
 log = logging.getLogger(__name__)
 
 
-class HiveTableExistBehavior(str, Enum):
-    APPEND = "append"
-    REPLACE_ENTIRE_TABLE = "replace_entire_table"
-    REPLACE_OVERLAPPING_PARTITIONS = "replace_overlapping_partitions"
-
-    def __str__(self):
-        return str(self.value)
-
-    @classmethod  # noqa: WPS120
-    def _missing_(cls, value: object):  # noqa: WPS120
-        if str(value) == "overwrite":
-            warnings.warn(
-                "Mode `overwrite` is deprecated since v0.4.0 and will be removed in v1.0.0. "
-                "Use `replace_overlapping_partitions` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_OVERLAPPING_PARTITIONS
-
-        if str(value) == "overwrite_partitions":
-            warnings.warn(
-                "Mode `overwrite_partitions` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                "Use `replace_overlapping_partitions` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_OVERLAPPING_PARTITIONS
-
-        if str(value) == "overwrite_table":
-            warnings.warn(
-                "Mode `overwrite_table` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                "Use `replace_entire_table` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_ENTIRE_TABLE
-
-
 @support_hooks
 class Hive(DBConnection):
     """Spark connection with Hive MetaStore support. |support_hooks|
@@ -184,415 +137,17 @@ class Hive(DBConnection):
         hive = Hive(cluster="rnd-dwh", spark=spark).check()
     """
 
-    class WriteOptions(GenericOptions):
-        """Hive source writing options.
-
-        You can pass here key-value items which then will be converted to calls
-        of :obj:`pyspark.sql.readwriter.DataFrameWriter` methods.
-
-        For example, ``Hive.WriteOptions(if_exists="append", partitionBy="reg_id")`` will
-        be converted to ``df.write.mode("append").partitionBy("reg_id")`` call, and so on.
-
-        .. note::
-
-            You can pass any method and its value
-            `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html>`_,
-            even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
-
-            The set of supported options depends on Spark version used. See link above.
-
-        Examples
-        --------
-
-        Writing options initialization
-
-        .. code:: python
-
-            options = Hive.WriteOptions(
-                if_exists="append",
-                partitionBy="reg_id",
-                someNewOption="value",
-            )
-        """
-
-        class Config:
-            known_options: frozenset = frozenset()
-            extra = "allow"
-
-        if_exists: HiveTableExistBehavior = Field(default=HiveTableExistBehavior.APPEND, alias="mode")
-        """Behavior of writing data into existing table.
-
-        Possible values:
-            * ``append`` (default)
-                Appends data into existing partition/table, or create partition/table if it does not exist.
-
-                Same as Spark's ``df.write.insertInto(table, overwrite=False)``.
-
-                .. dropdown:: Behavior in details
-
-                    * Table does not exist
-                        Table is created using options provided by user (``format``, ``compression``, etc).
-
-                    * Table exists, but not partitioned, :obj:`~partition_by` is set
-                        Data is appended to a table. Table is still not partitioned (DDL is unchanged).
-
-                    * Table exists and partitioned, but has different partitioning schema than :obj:`~partition_by`
-                        Partition is created based on table's ``PARTITIONED BY (...)`` options.
-                        Explicit :obj:`~partition_by` value is ignored.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in dataframe
-                        Partition is created.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, partition is present in both dataframe and table
-                        Data is appended to existing partition.
-
-                        .. warning::
-
-                            This mode does not check whether table already contains
-                            rows from dataframe, so duplicated rows can be created.
-
-                            To implement deduplication, write data to staging table first,
-                            and then perform some deduplication logic using :obj:`~sql`.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in table, not dataframe
-                        Existing partition is left intact.
-
-            * ``replace_overlapping_partitions``
-                Overwrites data in the existing partition, or create partition/table if it does not exist.
-
-                Same as Spark's ``df.write.insertInto(table, overwrite=True)`` +
-                ``spark.sql.sources.partitionOverwriteMode=dynamic``.
-
-                .. dropdown:: Behavior in details
-
-                    * Table does not exist
-                        Table is created using options provided by user (``format``, ``compression``, etc).
-
-                    * Table exists, but not partitioned, :obj:`~partition_by` is set
-                        Data is **overwritten in all the table**. Table is still not partitioned (DDL is unchanged).
-
-                    * Table exists and partitioned, but has different partitioning schema than :obj:`~partition_by`
-                        Partition is created based on table's ``PARTITIONED BY (...)`` options.
-                        Explicit :obj:`~partition_by` value is ignored.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in dataframe
-                        Partition is created.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, partition is present in both dataframe and table
-                        Existing partition **replaced** with data from dataframe.
-
-                    * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in table, not dataframe
-                        Existing partition is left intact.
-
-            * ``replace_entire_table``
-                **Recreates table** (via ``DROP + CREATE``), **deleting all existing data**.
-                **All existing partitions are dropped.**
-
-                Same as Spark's ``df.write.saveAsTable(table, mode="overwrite")`` (NOT ``insertInto``)!
-
-                .. warning::
-
-                    Table is recreated using options provided by user (``format``, ``compression``, etc)
-                    **instead of using original table options**. Be careful
-
-        .. note::
-
-            ``error`` and ``ignore`` modes are not supported.
-
-        .. note::
-
-            Unlike using pure Spark, config option ``spark.sql.sources.partitionOverwriteMode``
-            does not affect behavior.
-        """
-
-        format: str = "orc"
-        """Format of files which should be used for storing table data.
-
-        Examples: ``orc`` (default), ``parquet``, ``csv`` (NOT recommended)
-
-        .. note::
-
-            It's better to use column-based formats like ``orc`` or ``parquet``,
-            not row-based (``csv``, ``json``)
-
-        .. warning::
-
-            Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
-        """
-
-        partition_by: Optional[Union[List[str], str]] = Field(default=None, alias="partitionBy")
-        """
-        List of columns should be used for data partitioning. ``None`` means partitioning is disabled.
-
-        Each partition is a folder which contains only files with the specific column value,
-        like ``myschema.db/mytable/col1=value1``, ``myschema.db/mytable/col1=value2``, and so on.
-
-        Multiple partitions columns means nested folder structure, like ``myschema.db/mytable/col1=val1/col2=val2``.
-
-        If ``WHERE`` clause in the query contains expression like ``partition = value``,
-        Spark will scan only files in a specific partition.
-
-        Examples: ``reg_id`` or ``["reg_id", "business_dt"]``
-
-        .. note::
-
-            Values should be scalars (integers, strings),
-            and either static (``countryId``) or incrementing (dates, years), with low
-            number of distinct values.
-
-            Columns like ``userId`` or ``datetime``/``timestamp`` should **NOT** be used for partitioning.
-
-        .. warning::
-
-            Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
-        """
-
-        bucket_by: Optional[Tuple[int, Union[List[str], str]]] = Field(default=None, alias="bucketBy")  # noqa: WPS234
-        """Number of buckets plus bucketing columns. ``None`` means bucketing is disabled.
-
-        Each bucket is created as a set of files with name containing result of calculation ``hash(columns) mod num_buckets``.
-
-        This allows to remove shuffle from queries containing ``GROUP BY`` or ``JOIN`` or using ``=`` / ``IN`` predicates
-        on specific columns.
-
-        Examples: ``(10, "user_id")``, ``(10, ["user_id", "user_phone"])``
-
-        .. note::
-
-            Bucketing should be used on columns containing a lot of unique values,
-            like ``userId``.
-
-            Columns like ``date`` should **NOT** be used for bucketing
-            because of too low number of unique values.
-
-        .. warning::
-
-            It is recommended to use this option **ONLY** if you have a large table
-            (hundreds of Gb or more), which is used mostly for JOINs with other tables,
-            and you're inserting data using ``if_exists=overwrite_partitions`` or ``if_exists=recreate_entire_table``.
-
-            Otherwise Spark will create a lot of small files
-            (one file for each bucket and each executor), drastically **decreasing** HDFS performance.
-
-        .. warning::
-
-            Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
-        """
-
-        sort_by: Optional[Union[List[str], str]] = Field(default=None, alias="sortBy")
-        """Each file in a bucket will be sorted by these columns value. ``None`` means sorting is disabled.
-
-        Examples: ``user_id`` or ``["user_id", "user_phone"]``
-
-        .. note::
-
-            Sorting columns should contain values which are used in ``ORDER BY`` clauses.
-
-        .. warning::
-
-            Could be used only with :obj:`~bucket_by` option
-
-        .. warning::
-
-            Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
-        """
-
-        compression: Optional[str] = None
-        """Compressing algorithm which should be used for compressing created files in HDFS.
-        ``None`` means compression is disabled.
-
-        Examples: ``snappy``, ``zlib``
-
-        .. warning::
-
-            Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
-        """
-
-        @validator("sort_by")
-        def sort_by_cannot_be_used_without_bucket_by(cls, sort_by, values):
-            options = values.copy()
-            bucket_by = options.pop("bucket_by", None)
-            if sort_by and not bucket_by:
-                raise ValueError("`sort_by` option can only be used with non-empty `bucket_by`")
-
-            return sort_by
-
-        @root_validator
-        def partition_overwrite_mode_is_not_allowed(cls, values):
-            partition_overwrite_mode = values.get("partitionOverwriteMode") or values.get("partition_overwrite_mode")
-            if partition_overwrite_mode:
-                if partition_overwrite_mode == "static":
-                    recommend_mode = "replace_entire_table"
-                else:
-                    recommend_mode = "replace_overlapping_partitions"
-                raise ValueError(
-                    f"`partitionOverwriteMode` option should be replaced with if_exists='{recommend_mode}'",
-                )
-
-            if values.get("insert_into") is not None or values.get("insertInto") is not None:
-                raise ValueError(
-                    "`insertInto` option was removed in onETL 0.4.0, "
-                    "now df.write.insertInto or df.write.saveAsTable is selected based on table existence",
-                )
-
-            return values
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `Hive.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `Hive.WriteOptions(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(WriteOptions):
-        pass
-
-    @support_hooks
-    class Slots:
-        """:ref:`Slots <slot-decorator>` that could be implemented by third-party plugins."""
-
-        @slot
-        @staticmethod
-        def normalize_cluster_name(cluster: str) -> str | None:
-            """
-            Normalize cluster name passed into Hive constructor. |support_hooks|
-
-            If hooks didn't return anything, cluster name is left intact.
-
-            Parameters
-            ----------
-            cluster : :obj:`str`
-                Cluster name (raw)
-
-            Returns
-            -------
-            str | None
-                Normalized cluster name.
-
-                If hook cannot be applied to a specific cluster, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import Hive
-                from onetl.hooks import hook
-
-
-                @Hive.Slots.normalize_cluster_name.bind
-                @hook
-                def normalize_cluster_name(cluster: str) -> str:
-                    return cluster.lower()
-            """
-
-        @slot
-        @staticmethod
-        def get_known_clusters() -> set[str] | None:
-            """
-            Return collection of known clusters. |support_hooks|
-
-            Cluster passed into Hive constructor should be present in this list.
-            If hooks didn't return anything, no validation will be performed.
-
-            Returns
-            -------
-            set[str] | None
-                Collection of cluster names (normalized).
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import Hive
-                from onetl.hooks import hook
-
-
-                @Hive.Slots.get_known_clusters.bind
-                @hook
-                def get_known_clusters() -> str[str]:
-                    return {"rnd-dwh", "rnd-prod"}
-            """
-
-        @slot
-        @staticmethod
-        def get_current_cluster() -> str | None:
-            """
-            Get current cluster name. |support_hooks|
-
-            Used in :obj:`~check` method to verify that connection is created only from the same cluster.
-            If hooks didn't return anything, no validation will be performed.
-
-            Returns
-            -------
-            str | None
-                Current cluster name (normalized).
-
-                If hook cannot be applied, it should return ``None``.
-
-            Examples
-            --------
-
-            .. code:: python
-
-                from onetl.connection import Hive
-                from onetl.hooks import hook
-
-
-                @Hive.Slots.get_current_cluster.bind
-                @hook
-                def get_current_cluster() -> str:
-                    # some magic here
-                    return "rnd-dwh"
-            """
+    cluster: Cluster
 
+    Dialect = HiveDialect
+    WriteOptions = HiveWriteOptions
+    Options = HiveLegacyOptions
+    Slots = HiveSlots
     # TODO: remove in v1.0.0
-    slots = Slots
-
-    class Dialect(  # noqa: WPS215
-        SupportTableWithDBSchema,
-        SupportColumnsList,
-        SupportDfSchemaNone,
-        SupportWhereStr,
-        SupportHintStr,
-        SupportHWMExpressionStr,
-        SupportHWMColumnStr,
-        DBDialect,
-    ):
-        pass
+    slots = HiveSlots
 
-    cluster: Cluster
     _CHECK_QUERY: ClassVar[str] = "SELECT 1"
 
-    @validator("cluster")
-    def validate_cluster_name(cls, cluster):
-        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
-        validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
-        if validated_cluster != cluster:
-            log.debug("|%s|   Got %r", cls.__name__)
-
-        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
-        known_clusters = cls.Slots.get_known_clusters()
-        if known_clusters and validated_cluster not in known_clusters:
-            raise ValueError(
-                f"Cluster {validated_cluster!r} is not in the known clusters list: {sorted(known_clusters)!r}",
-            )
-
-        return validated_cluster
-
     @slot
     @classmethod
     def get_current(cls, spark: SparkSession):
@@ -601,7 +156,8 @@ def get_current(cls, spark: SparkSession):
 
         .. note::
 
-            Can be used only if there are some hooks bound :obj:`~slots.get_current_cluster` slot.
+            Can be used only if there are some hooks bound to
+            :obj:`Slots.get_current_cluster <onetl.connection.db_connection.hive.slots.HiveSlots.get_current_cluster>` slot.
 
         Parameters
         ----------
@@ -770,7 +326,7 @@ def write_df_to_target(
         self,
         df: DataFrame,
         target: str,
-        options: WriteOptions | dict | None = None,
+        options: HiveWriteOptions | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
@@ -870,6 +426,22 @@ def get_min_max_bounds(
 
         return min_value, max_value
 
+    @validator("cluster")
+    def _validate_cluster_name(cls, cluster):
+        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
+        validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
+        if validated_cluster != cluster:
+            log.debug("|%s|   Got %r", cls.__name__)
+
+        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
+        known_clusters = cls.Slots.get_known_clusters()
+        if known_clusters and validated_cluster not in known_clusters:
+            raise ValueError(
+                f"Cluster {validated_cluster!r} is not in the known clusters list: {sorted(known_clusters)!r}",
+            )
+
+        return validated_cluster
+
     def _execute_sql(self, query: str) -> DataFrame:
         return self.spark.sql(query)
 
@@ -923,7 +495,7 @@ def _insert_into(
         self,
         df: DataFrame,
         table: str,
-        options: WriteOptions | dict | None = None,
+        options: HiveWriteOptions | dict | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
@@ -955,7 +527,7 @@ def _save_as_table(
         self,
         df: DataFrame,
         table: str,
-        options: WriteOptions | dict | None = None,
+        options: HiveWriteOptions | dict | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
diff --git a/onetl/connection/db_connection/hive/dialect.py b/onetl/connection/db_connection/hive/dialect.py
new file mode 100644
index 000000000..19362f496
--- /dev/null
+++ b/onetl/connection/db_connection/hive/dialect.py
@@ -0,0 +1,39 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from onetl.connection.db_connection.db_connection import DBDialect
+from onetl.connection.db_connection.dialect_mixins import (
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportHintStr,
+    SupportHWMColumnStr,
+    SupportHWMExpressionStr,
+    SupportTableWithDBSchema,
+    SupportWhereStr,
+)
+
+
+class HiveDialect(  # noqa: WPS215
+    SupportTableWithDBSchema,
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportWhereStr,
+    SupportHintStr,
+    SupportHWMExpressionStr,
+    SupportHWMColumnStr,
+    DBDialect,
+):
+    pass
diff --git a/onetl/connection/db_connection/hive/options.py b/onetl/connection/db_connection/hive/options.py
new file mode 100644
index 000000000..c46b7882d
--- /dev/null
+++ b/onetl/connection/db_connection/hive/options.py
@@ -0,0 +1,337 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+from enum import Enum
+from typing import List, Optional, Tuple, Union
+
+from deprecated import deprecated
+from pydantic import Field, root_validator, validator
+
+from onetl.impl import GenericOptions
+
+
+class HiveTableExistBehavior(str, Enum):
+    APPEND = "append"
+    REPLACE_ENTIRE_TABLE = "replace_entire_table"
+    REPLACE_OVERLAPPING_PARTITIONS = "replace_overlapping_partitions"
+
+    def __str__(self):
+        return str(self.value)
+
+    @classmethod  # noqa: WPS120
+    def _missing_(cls, value: object):  # noqa: WPS120
+        if str(value) == "overwrite":
+            warnings.warn(
+                "Mode `overwrite` is deprecated since v0.4.0 and will be removed in v1.0.0. "
+                "Use `replace_overlapping_partitions` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_OVERLAPPING_PARTITIONS
+
+        if str(value) == "overwrite_partitions":
+            warnings.warn(
+                "Mode `overwrite_partitions` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `replace_overlapping_partitions` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_OVERLAPPING_PARTITIONS
+
+        if str(value) == "overwrite_table":
+            warnings.warn(
+                "Mode `overwrite_table` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `replace_entire_table` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_ENTIRE_TABLE
+
+
+class HiveWriteOptions(GenericOptions):
+    """Hive source writing options.
+
+    You can pass here key-value items which then will be converted to calls
+    of :obj:`pyspark.sql.readwriter.DataFrameWriter` methods.
+
+    For example, ``Hive.WriteOptions(if_exists="append", partitionBy="reg_id")`` will
+    be converted to ``df.write.mode("append").partitionBy("reg_id")`` call, and so on.
+
+    .. note::
+
+        You can pass any method and its value
+        `supported by Spark <https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html>`_,
+        even if it is not mentioned in this documentation. **Option names should be in** ``camelCase``!
+
+        The set of supported options depends on Spark version used. See link above.
+
+    Examples
+    --------
+
+    Writing options initialization
+
+    .. code:: python
+
+        options = Hive.WriteOptions(
+            if_exists="append",
+            partitionBy="reg_id",
+            someNewOption="value",
+        )
+    """
+
+    class Config:
+        known_options: frozenset = frozenset()
+        extra = "allow"
+
+    if_exists: HiveTableExistBehavior = Field(default=HiveTableExistBehavior.APPEND, alias="mode")
+    """Behavior of writing data into existing table.
+
+    Possible values:
+        * ``append`` (default)
+            Appends data into existing partition/table, or create partition/table if it does not exist.
+
+            Same as Spark's ``df.write.insertInto(table, overwrite=False)``.
+
+            .. dropdown:: Behavior in details
+
+                * Table does not exist
+                    Table is created using options provided by user (``format``, ``compression``, etc).
+
+                * Table exists, but not partitioned, :obj:`~partition_by` is set
+                    Data is appended to a table. Table is still not partitioned (DDL is unchanged).
+
+                * Table exists and partitioned, but has different partitioning schema than :obj:`~partition_by`
+                    Partition is created based on table's ``PARTITIONED BY (...)`` options.
+                    Explicit :obj:`~partition_by` value is ignored.
+
+                * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in dataframe
+                    Partition is created.
+
+                * Table exists and partitioned according :obj:`~partition_by`, partition is present in both dataframe and table
+                    Data is appended to existing partition.
+
+                    .. warning::
+
+                        This mode does not check whether table already contains
+                        rows from dataframe, so duplicated rows can be created.
+
+                        To implement deduplication, write data to staging table first,
+                        and then perform some deduplication logic using :obj:`~sql`.
+
+                * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in table, not dataframe
+                    Existing partition is left intact.
+
+        * ``replace_overlapping_partitions``
+            Overwrites data in the existing partition, or create partition/table if it does not exist.
+
+            Same as Spark's ``df.write.insertInto(table, overwrite=True)`` +
+            ``spark.sql.sources.partitionOverwriteMode=dynamic``.
+
+            .. dropdown:: Behavior in details
+
+                * Table does not exist
+                    Table is created using options provided by user (``format``, ``compression``, etc).
+
+                * Table exists, but not partitioned, :obj:`~partition_by` is set
+                    Data is **overwritten in all the table**. Table is still not partitioned (DDL is unchanged).
+
+                * Table exists and partitioned, but has different partitioning schema than :obj:`~partition_by`
+                    Partition is created based on table's ``PARTITIONED BY (...)`` options.
+                    Explicit :obj:`~partition_by` value is ignored.
+
+                * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in dataframe
+                    Partition is created.
+
+                * Table exists and partitioned according :obj:`~partition_by`, partition is present in both dataframe and table
+                    Existing partition **replaced** with data from dataframe.
+
+                * Table exists and partitioned according :obj:`~partition_by`, but partition is present only in table, not dataframe
+                    Existing partition is left intact.
+
+        * ``replace_entire_table``
+            **Recreates table** (via ``DROP + CREATE``), **deleting all existing data**.
+            **All existing partitions are dropped.**
+
+            Same as Spark's ``df.write.saveAsTable(table, mode="overwrite")`` (NOT ``insertInto``)!
+
+            .. warning::
+
+                Table is recreated using options provided by user (``format``, ``compression``, etc)
+                **instead of using original table options**. Be careful
+
+    .. note::
+
+        ``error`` and ``ignore`` modes are not supported.
+
+    .. note::
+
+        Unlike using pure Spark, config option ``spark.sql.sources.partitionOverwriteMode``
+        does not affect behavior.
+    """
+
+    format: str = "orc"
+    """Format of files which should be used for storing table data.
+
+    Examples: ``orc`` (default), ``parquet``, ``csv`` (NOT recommended)
+
+    .. note::
+
+        It's better to use column-based formats like ``orc`` or ``parquet``,
+        not row-based (``csv``, ``json``)
+
+    .. warning::
+
+        Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
+    """
+
+    partition_by: Optional[Union[List[str], str]] = Field(default=None, alias="partitionBy")
+    """
+    List of columns should be used for data partitioning. ``None`` means partitioning is disabled.
+
+    Each partition is a folder which contains only files with the specific column value,
+    like ``myschema.db/mytable/col1=value1``, ``myschema.db/mytable/col1=value2``, and so on.
+
+    Multiple partitions columns means nested folder structure, like ``myschema.db/mytable/col1=val1/col2=val2``.
+
+    If ``WHERE`` clause in the query contains expression like ``partition = value``,
+    Spark will scan only files in a specific partition.
+
+    Examples: ``reg_id`` or ``["reg_id", "business_dt"]``
+
+    .. note::
+
+        Values should be scalars (integers, strings),
+        and either static (``countryId``) or incrementing (dates, years), with low
+        number of distinct values.
+
+        Columns like ``userId`` or ``datetime``/``timestamp`` should **NOT** be used for partitioning.
+
+    .. warning::
+
+        Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
+    """
+
+    bucket_by: Optional[Tuple[int, Union[List[str], str]]] = Field(default=None, alias="bucketBy")  # noqa: WPS234
+    """Number of buckets plus bucketing columns. ``None`` means bucketing is disabled.
+
+    Each bucket is created as a set of files with name containing result of calculation ``hash(columns) mod num_buckets``.
+
+    This allows to remove shuffle from queries containing ``GROUP BY`` or ``JOIN`` or using ``=`` / ``IN`` predicates
+    on specific columns.
+
+    Examples: ``(10, "user_id")``, ``(10, ["user_id", "user_phone"])``
+
+    .. note::
+
+        Bucketing should be used on columns containing a lot of unique values,
+        like ``userId``.
+
+        Columns like ``date`` should **NOT** be used for bucketing
+        because of too low number of unique values.
+
+    .. warning::
+
+        It is recommended to use this option **ONLY** if you have a large table
+        (hundreds of Gb or more), which is used mostly for JOINs with other tables,
+        and you're inserting data using ``if_exists=overwrite_partitions`` or ``if_exists=recreate_entire_table``.
+
+        Otherwise Spark will create a lot of small files
+        (one file for each bucket and each executor), drastically **decreasing** HDFS performance.
+
+    .. warning::
+
+        Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
+    """
+
+    sort_by: Optional[Union[List[str], str]] = Field(default=None, alias="sortBy")
+    """Each file in a bucket will be sorted by these columns value. ``None`` means sorting is disabled.
+
+    Examples: ``user_id`` or ``["user_id", "user_phone"]``
+
+    .. note::
+
+        Sorting columns should contain values which are used in ``ORDER BY`` clauses.
+
+    .. warning::
+
+        Could be used only with :obj:`~bucket_by` option
+
+    .. warning::
+
+        Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
+    """
+
+    compression: Optional[str] = None
+    """Compressing algorithm which should be used for compressing created files in HDFS.
+    ``None`` means compression is disabled.
+
+    Examples: ``snappy``, ``zlib``
+
+    .. warning::
+
+        Used **only** while **creating new table**, or in case of ``if_exists=recreate_entire_table``
+    """
+
+    @validator("sort_by")
+    def _sort_by_cannot_be_used_without_bucket_by(cls, sort_by, values):
+        options = values.copy()
+        bucket_by = options.pop("bucket_by", None)
+        if sort_by and not bucket_by:
+            raise ValueError("`sort_by` option can only be used with non-empty `bucket_by`")
+
+        return sort_by
+
+    @root_validator
+    def _partition_overwrite_mode_is_not_allowed(cls, values):
+        partition_overwrite_mode = values.get("partitionOverwriteMode") or values.get("partition_overwrite_mode")
+        if partition_overwrite_mode:
+            if partition_overwrite_mode == "static":
+                recommend_mode = "replace_entire_table"
+            else:
+                recommend_mode = "replace_overlapping_partitions"
+            raise ValueError(
+                f"`partitionOverwriteMode` option should be replaced with if_exists='{recommend_mode}'",
+            )
+
+        if values.get("insert_into") is not None or values.get("insertInto") is not None:
+            raise ValueError(
+                "`insertInto` option was removed in onETL 0.4.0, "
+                "now df.write.insertInto or df.write.saveAsTable is selected based on table existence",
+            )
+
+        return values
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `Hive.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `Hive.WriteOptions(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
+
+
+@deprecated(
+    version="0.5.0",
+    reason="Please use 'WriteOptions' class instead. Will be removed in v1.0.0",
+    action="always",
+    category=UserWarning,
+)
+class HiveLegacyOptions(HiveWriteOptions):
+    pass
diff --git a/onetl/connection/db_connection/hive/slots.py b/onetl/connection/db_connection/hive/slots.py
new file mode 100644
index 000000000..4105b5901
--- /dev/null
+++ b/onetl/connection/db_connection/hive/slots.py
@@ -0,0 +1,120 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from onetl.hooks import slot, support_hooks
+
+
+@support_hooks
+class HiveSlots:
+    """:ref:`Slots <slot-decorator>` that could be implemented by third-party plugins."""
+
+    @slot
+    @staticmethod
+    def normalize_cluster_name(cluster: str) -> str | None:
+        """
+        Normalize cluster name passed into Hive constructor. |support_hooks|
+
+        If hooks didn't return anything, cluster name is left intact.
+
+        Parameters
+        ----------
+        cluster : :obj:`str`
+            Cluster name (raw)
+
+        Returns
+        -------
+        str | None
+            Normalized cluster name.
+
+            If hook cannot be applied to a specific cluster, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import Hive
+            from onetl.hooks import hook
+
+
+            @Hive.Slots.normalize_cluster_name.bind
+            @hook
+            def normalize_cluster_name(cluster: str) -> str:
+                return cluster.lower()
+        """
+
+    @slot
+    @staticmethod
+    def get_known_clusters() -> set[str] | None:
+        """
+        Return collection of known clusters. |support_hooks|
+
+        Cluster passed into Hive constructor should be present in this list.
+        If hooks didn't return anything, no validation will be performed.
+
+        Returns
+        -------
+        set[str] | None
+            Collection of cluster names (normalized).
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import Hive
+            from onetl.hooks import hook
+
+
+            @Hive.Slots.get_known_clusters.bind
+            @hook
+            def get_known_clusters() -> str[str]:
+                return {"rnd-dwh", "rnd-prod"}
+        """
+
+    @slot
+    @staticmethod
+    def get_current_cluster() -> str | None:
+        """
+        Get current cluster name. |support_hooks|
+
+        Used in :obj:`~check` method to verify that connection is created only from the same cluster.
+        If hooks didn't return anything, no validation will be performed.
+
+        Returns
+        -------
+        str | None
+            Current cluster name (normalized).
+
+            If hook cannot be applied, it should return ``None``.
+
+        Examples
+        --------
+
+        .. code:: python
+
+            from onetl.connection import Hive
+            from onetl.hooks import hook
+
+
+            @Hive.Slots.get_current_cluster.bind
+            @hook
+            def get_current_cluster() -> str:
+                # some magic here
+                return "rnd-dwh"
+        """
diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py
index 91ce7e66c..78083bbe7 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/connection.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py
@@ -85,7 +85,8 @@ class SparkHDFS(SparkFileDFConnection):
         Used for:
             * HWM and lineage (as instance name for file paths)
             * Validation of ``host`` value,
-                if latter is passed and if some hooks are bound to :obj:`~slots.get_cluster_namenodes`.
+                if latter is passed and if some hooks are bound to
+                :obj:`Slots.get_cluster_namenodes <onetl.connection.file_df_connection.spark_hdfs.slots.SparkHDFSSlots.get_cluster_namenodes>`.
 
     host : str, optional
         Hadoop namenode host. For example: ``namenode1.domain.com``.
diff --git a/setup.cfg b/setup.cfg
index a2caa1331..820572b9b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -314,9 +314,6 @@ per-file-ignores =
         WPS219,
 # WPS437: Found protected attribute usage: spark._jvm
         WPS437,
-    onetl/connection/db_connection/hive.py:
-# WPS437 Found protected attribute usage: self.Dialect._
-        WPS437,
     onetl/connection/db_connection/greenplum.py:
 # WPS437 Found protected attribute usage: self.Dialect._
         WPS437,
diff --git a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
index 8cc50b45c..e98da8e09 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
@@ -36,8 +36,8 @@ def test_db_options_connection_parameters_cannot_be_passed(options_class, arg, v
 @pytest.mark.parametrize(
     "options_class, options_class_name, known_options",
     [
-        (Hive.WriteOptions, "WriteOptions", {"if_exists": "replace_overlapping_partitions"}),
-        (Hive.Options, "Options", {"if_exists": "replace_overlapping_partitions"}),
+        (Hive.WriteOptions, "HiveWriteOptions", {"if_exists": "replace_overlapping_partitions"}),
+        (Hive.Options, "HiveLegacyOptions", {"if_exists": "replace_overlapping_partitions"}),
         (Postgres.ReadOptions, "ReadOptions", {"fetchsize": 10, "keytab": "a/b/c"}),
         (Postgres.WriteOptions, "JDBCWriteOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
         (Postgres.Options, "Options", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),

From ae4b5c939199d094968fccc537b64f0c90f13510 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 21 Aug 2023 09:41:57 +0000
Subject: [PATCH 12/30] [DOP-8140] Split Greenplum, Greenplum.Dialect and
 .*Options to separated submodules

---
 .../db_connection/greenplum/connection.rst    |   9 +
 .../db_connection/greenplum/execute.rst       |  17 +
 .../db_connection/greenplum/greenplum.rst     |  42 --
 .../db_connection/greenplum/index.rst         |  14 +-
 .../db_connection/greenplum/read.rst          |  31 ++
 .../db_connection/greenplum/write.rst         |  13 +
 .../db_connection/greenplum/__init__.py       |  21 +
 .../{greenplum.py => greenplum/connection.py} | 374 ++----------------
 .../greenplum/connection_limit.py             |  40 ++
 .../db_connection/greenplum/dialect.py        |  49 +++
 .../db_connection/greenplum/options.py        | 292 ++++++++++++++
 setup.cfg                                     |   3 -
 .../test_db_options_unit.py                   |   4 +-
 .../test_greenplum_unit.py                    |  51 +--
 14 files changed, 534 insertions(+), 426 deletions(-)
 create mode 100644 docs/connection/db_connection/greenplum/connection.rst
 create mode 100644 docs/connection/db_connection/greenplum/execute.rst
 delete mode 100644 docs/connection/db_connection/greenplum/greenplum.rst
 create mode 100644 docs/connection/db_connection/greenplum/read.rst
 create mode 100644 docs/connection/db_connection/greenplum/write.rst
 create mode 100644 onetl/connection/db_connection/greenplum/__init__.py
 rename onetl/connection/db_connection/{greenplum.py => greenplum/connection.py} (62%)
 create mode 100644 onetl/connection/db_connection/greenplum/connection_limit.py
 create mode 100644 onetl/connection/db_connection/greenplum/dialect.py
 create mode 100644 onetl/connection/db_connection/greenplum/options.py

diff --git a/docs/connection/db_connection/greenplum/connection.rst b/docs/connection/db_connection/greenplum/connection.rst
new file mode 100644
index 000000000..59bca8421
--- /dev/null
+++ b/docs/connection/db_connection/greenplum/connection.rst
@@ -0,0 +1,9 @@
+.. _greenplum-connection:
+
+Greenplum connection
+====================
+
+.. currentmodule:: onetl.connection.db_connection.greenplum.connection
+
+.. autoclass:: Greenplum
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/greenplum/execute.rst b/docs/connection/db_connection/greenplum/execute.rst
new file mode 100644
index 000000000..b0833b213
--- /dev/null
+++ b/docs/connection/db_connection/greenplum/execute.rst
@@ -0,0 +1,17 @@
+.. _greenplum-execute:
+
+Executing statements in Greenplum
+==================================
+
+.. currentmodule:: onetl.connection.db_connection.greenplum.connection
+
+.. automethod:: Greenplum.fetch
+.. automethod:: Greenplum.execute
+.. automethod:: Greenplum.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/greenplum/greenplum.rst b/docs/connection/db_connection/greenplum/greenplum.rst
deleted file mode 100644
index c8192a823..000000000
--- a/docs/connection/db_connection/greenplum/greenplum.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-.. _greenplum:
-
-Greenplum connector
-====================
-
-.. currentmodule:: onetl.connection.db_connection.greenplum
-
-.. autosummary::
-
-    Greenplum
-    Greenplum.ReadOptions
-    Greenplum.WriteOptions
-    Greenplum.JDBCOptions
-
-.. note::
-
-    Unlike JDBC connectors, *Greenplum connector for Spark* does not support
-    executing **custom** SQL queries using ``.sql`` method, because this leads to sending
-    the result through *master* node which is really bad for cluster performance.
-
-    To make distributed queries like ``JOIN`` **on Greenplum side**, you should create a temporary table,
-    populate it with the data you need (using ``.execute`` method to call ``INSERT INTO ... AS SELECT ...``),
-    and then read the data from this table using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>`.
-
-    In this case data will be read directly from segment nodes in a distributed way
-
-.. autoclass:: Greenplum
-    :members: get_packages, check, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.greenplum.Greenplum
-
-.. autopydantic_model:: ReadOptions
-    :members: partition_column, num_partitions
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/greenplum/index.rst b/docs/connection/db_connection/greenplum/index.rst
index b99888c9f..a1d836766 100644
--- a/docs/connection/db_connection/greenplum/index.rst
+++ b/docs/connection/db_connection/greenplum/index.rst
@@ -1,11 +1,19 @@
 .. _greenplum:
 
-Greenplum connector
+Greenplum
 ====================
 
 .. toctree::
     :maxdepth: 1
-    :caption: Greenplum connector
+    :caption: Connection
 
     prerequisites
-    greenplum
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Options
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/greenplum/read.rst b/docs/connection/db_connection/greenplum/read.rst
new file mode 100644
index 000000000..2640f7e6c
--- /dev/null
+++ b/docs/connection/db_connection/greenplum/read.rst
@@ -0,0 +1,31 @@
+.. _greenplum-read:
+
+Reading from Greenplum
+=======================
+
+For reading data from Greenplum, use :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with options below.
+
+.. note::
+
+    Unlike JDBC connectors, *Greenplum connector for Spark* does not support
+    executing **custom** SQL queries using ``.sql`` method, because this leads to sending
+    the result through *master* node which is really bad for cluster performance.
+
+    To make distributed queries like ``JOIN`` **on Greenplum side**, you should create a staging table,
+    populate it with the data you need (using ``.execute`` method to call ``INSERT INTO ... AS SELECT ...``),
+    then read the data from this table using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>`,
+    and drop staging table after reading is finished.
+
+    In this case data will be read directly from Greenplum segment nodes in a distributed way.
+
+.. warning::
+
+    Greenplum connection does **NOT** support reading data from views which does not have ``gp_segment_id`` column.
+    Either add this column to a view, or use stating table solution (see above).
+
+.. currentmodule:: onetl.connection.db_connection.greenplum.options
+
+.. autopydantic_model:: GreenplumReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/greenplum/write.rst b/docs/connection/db_connection/greenplum/write.rst
new file mode 100644
index 000000000..aeb688ac5
--- /dev/null
+++ b/docs/connection/db_connection/greenplum/write.rst
@@ -0,0 +1,13 @@
+.. _greenplum-write:
+
+Writing to Greenplum
+=====================
+
+For writing data to Greenplum, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.greenplum.options
+
+.. autopydantic_model:: GreenplumWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/onetl/connection/db_connection/greenplum/__init__.py b/onetl/connection/db_connection/greenplum/__init__.py
new file mode 100644
index 000000000..d080e8932
--- /dev/null
+++ b/onetl/connection/db_connection/greenplum/__init__.py
@@ -0,0 +1,21 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.greenplum.connection import Greenplum
+from onetl.connection.db_connection.greenplum.dialect import GreenplumDialect
+from onetl.connection.db_connection.greenplum.options import (
+    GreenplumReadOptions,
+    GreenplumTableExistBehavior,
+    GreenplumWriteOptions,
+)
diff --git a/onetl/connection/db_connection/greenplum.py b/onetl/connection/db_connection/greenplum/connection.py
similarity index 62%
rename from onetl/connection/db_connection/greenplum.py
rename to onetl/connection/db_connection/greenplum/connection.py
index 369d75503..db29461a9 100644
--- a/onetl/connection/db_connection/greenplum.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -18,13 +18,10 @@
 import os
 import textwrap
 import warnings
-from dataclasses import dataclass
-from datetime import date, datetime
-from enum import Enum
-from typing import TYPE_CHECKING, Any, ClassVar, Optional
+from typing import TYPE_CHECKING, Any, ClassVar
 
 from etl_entities.instance import Host
-from pydantic import Field, root_validator, validator
+from pydantic import validator
 
 from onetl._internal import get_sql_query
 from onetl._util.classproperty import classproperty
@@ -32,15 +29,15 @@
 from onetl._util.scala import get_default_scala_version
 from onetl._util.spark import get_executor_total_cores, get_spark_version
 from onetl._util.version import Version
-from onetl.connection.db_connection.db_connection import DBConnection, DBDialect
-from onetl.connection.db_connection.dialect_mixins import (
-    SupportColumnsList,
-    SupportDfSchemaNone,
-    SupportHintNone,
-    SupportHWMColumnStr,
-    SupportHWMExpressionStr,
-    SupportTableWithDBSchema,
-    SupportWhereStr,
+from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.greenplum.connection_limit import (
+    GreenplumConnectionLimit,
+)
+from onetl.connection.db_connection.greenplum.dialect import GreenplumDialect
+from onetl.connection.db_connection.greenplum.options import (
+    GreenplumReadOptions,
+    GreenplumTableExistBehavior,
+    GreenplumWriteOptions,
 )
 from onetl.connection.db_connection.jdbc_mixin import JDBCMixin
 from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
@@ -57,14 +54,6 @@
 
 log = logging.getLogger(__name__)
 
-# options from which are populated by Greenplum class methods
-GENERIC_PROHIBITED_OPTIONS = frozenset(
-    (
-        "dbschema",
-        "dbtable",
-    ),
-)
-
 EXTRA_OPTIONS = frozenset(
     (
         "server.*",
@@ -72,67 +61,15 @@
     ),
 )
 
-WRITE_OPTIONS = frozenset(
-    (
-        "mode",
-        "truncate",
-        "distributedBy",
-        "distributed_by",
-        "iteratorOptimization",
-        "iterator_optimization",
-    ),
-)
-
-READ_OPTIONS = frozenset(
-    (
-        "partitions",
-        "num_partitions",
-        "numPartitions",
-        "partitionColumn",
-        "partition_column",
-    ),
-)
-
-
-class GreenplumTableExistBehavior(str, Enum):
-    APPEND = "append"
-    REPLACE_ENTIRE_TABLE = "replace_entire_table"
 
-    def __str__(self) -> str:
-        return str(self.value)
-
-    @classmethod  # noqa: WPS120
-    def _missing_(cls, value: object):  # noqa: WPS120
-        if str(value) == "overwrite":
-            warnings.warn(
-                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                "Use `replace_entire_table` instead",
-                category=UserWarning,
-                stacklevel=4,
-            )
-            return cls.REPLACE_ENTIRE_TABLE
+class GreenplumExtra(GenericOptions):
+    # avoid closing connections from server side
+    # while connector is moving data to executors before insert
+    tcpKeepAlive: str = "true"  # noqa: N815
 
-
-@dataclass
-class ConnectionLimits:
-    maximum: int
-    reserved: int
-    occupied: int
-
-    @property
-    def available(self) -> int:
-        return self.maximum - self.reserved - self.occupied
-
-    @property
-    def summary(self) -> str:
-        return textwrap.dedent(
-            f"""
-            available connections: {self.available}
-            occupied: {self.occupied}
-            max: {self.maximum} ("max_connection" in postgresql.conf)
-            reserved: {self.reserved} ("superuser_reserved_connections" in postgresql.conf)
-            """,
-        ).strip()
+    class Config:
+        extra = "allow"
+        prohibited_options = JDBCOptions.Config.prohibited_options
 
 
 @support_hooks
@@ -227,266 +164,15 @@ class Greenplum(JDBCMixin, DBConnection):
         )
     """
 
-    class Extra(GenericOptions):
-        # avoid closing connections from server side
-        # while connector is moving data to executors before insert
-        tcpKeepAlive: str = "true"  # noqa: N815
-
-        class Config:
-            extra = "allow"
-            prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS
-
-    class ReadOptions(JDBCOptions):
-        """Pivotal's Greenplum Spark connector reading options.
-
-        .. note ::
-
-            You can pass any value
-            `supported by connector <https://docs.vmware.com/en/VMware-Tanzu-Greenplum-Connector-for-Apache-Spark/2.1/tanzu-greenplum-connector-spark/GUID-read_from_gpdb.html>`_,
-            even if it is not mentioned in this documentation.
-
-            The set of supported options depends on connector version. See link above.
-
-        .. warning::
-
-            Some options, like ``url``, ``dbtable``, ``server.*``, ``pool.*``,
-            etc are populated from connection attributes, and cannot be set in ``ReadOptions`` class
-
-        Examples
-        --------
-
-        Read options initialization
-
-        .. code:: python
-
-            Greenplum.ReadOptions(
-                partition_column="reg_id",
-                num_partitions=10,
-            )
-        """
-
-        class Config:
-            known_options = READ_OPTIONS
-            prohibited_options = (
-                JDBCOptions.Config.prohibited_options | EXTRA_OPTIONS | GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
-            )
-
-        partition_column: Optional[str] = Field(alias="partitionColumn")
-        """Column used to parallelize reading from a table.
-
-        .. warning::
-
-            You should not change this option, unless you know what you're doing
-
-        Possible values:
-            * ``None`` (default):
-                Spark generates N jobs (where N == number of segments in Greenplum cluster),
-                each job is reading only data from a specific segment
-                (filtering data by ``gp_segment_id`` column).
-
-                This is very effective way to fetch the data from a cluster.
-
-            * table column
-                Allocate each executor a range of values from a specific column.
-
-                .. note::
-                    Column type must be numeric. Other types are not supported.
-
-                Spark generates for each executor an SQL query like:
-
-                Executor 1:
-
-                .. code:: sql
-
-                    SELECT ... FROM table
-                    WHERE (partition_column >= lowerBound
-                            OR partition_column IS NULL)
-                    AND partition_column < (lower_bound + stride)
-
-                Executor 2:
-
-                .. code:: sql
-
-                    SELECT ... FROM table
-                    WHERE partition_column >= (lower_bound + stride)
-                    AND partition_column < (lower_bound + 2 * stride)
-
-                ...
-
-                Executor N:
-
-                .. code:: sql
-
-                    SELECT ... FROM table
-                    WHERE partition_column >= (lower_bound + (N-1) * stride)
-                    AND partition_column <= upper_bound
-
-                Where ``stride=(upper_bound - lower_bound) / num_partitions``,
-                ``lower_bound=MIN(partition_column)``, ``upper_bound=MAX(partition_column)``.
-
-                .. note::
-
-                    :obj:`~num_partitions` is used just to
-                    calculate the partition stride, **NOT** for filtering the rows in table.
-                    So all rows in the table will be returned (unlike *Incremental* :ref:`strategy`).
-
-                .. note::
-
-                    All queries are executed in parallel. To execute them sequentially, use *Batch* :ref:`strategy`.
-
-        .. warning::
-
-            Both options :obj:`~partition_column` and :obj:`~num_partitions` should have a value,
-            or both should be ``None``
-
-        Examples
-        --------
-
-        Read data in 10 parallel jobs by range of values in ``id_column`` column:
-
-        .. code:: python
-
-            Greenplum.ReadOptions(
-                partition_column="id_column",
-                num_partitions=10,
-            )
-        """
-
-        num_partitions: Optional[int] = Field(alias="partitions")
-        """Number of jobs created by Spark to read the table content in parallel.
-
-        See documentation for :obj:`~partition_column` for more details
-
-        .. warning::
-
-            By default connector uses number of segments in the Greenplum cluster.
-            You should not change this option, unless you know what you're doing
-
-        .. warning::
-
-            Both options :obj:`~partition_column` and :obj:`~num_partitions` should have a value,
-            or both should be ``None``
-        """
-
-    class WriteOptions(JDBCOptions):
-        """Pivotal's Greenplum Spark connector writing options.
-
-        .. note ::
-
-            You can pass any value
-            `supported by connector <https://docs.vmware.com/en/VMware-Tanzu-Greenplum-Connector-for-Apache-Spark/2.1/tanzu-greenplum-connector-spark/GUID-write_to_gpdb.html>`_,
-            even if it is not mentioned in this documentation.
-
-            The set of supported options depends on connector version. See link above.
-
-        .. warning::
-
-            Some options, like ``url``, ``dbtable``, ``server.*``, ``pool.*``,
-            etc are populated from connection attributes, and cannot be set in ``WriteOptions`` class
-
-        Examples
-        --------
-
-        Write options initialization
-
-        .. code:: python
-
-            options = Greenplum.WriteOptions(
-                if_exists="append",
-                truncate="false",
-                distributedBy="mycolumn",
-            )
-        """
-
-        class Config:
-            known_options = WRITE_OPTIONS
-            prohibited_options = (
-                JDBCOptions.Config.prohibited_options | EXTRA_OPTIONS | GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
-            )
-
-        if_exists: GreenplumTableExistBehavior = Field(default=GreenplumTableExistBehavior.APPEND, alias="mode")
-        """Behavior of writing data into existing table.
-
-        Possible values:
-            * ``append`` (default)
-                Adds new rows into existing table.
-
-                .. dropdown:: Behavior in details
-
-                * Table does not exist
-                    Table is created using options provided by user
-                    (``distributedBy`` and others).
-
-                * Table exists
-                    Data is appended to a table. Table has the same DDL as before writing data.
-
-                    .. warning::
-
-                        This mode does not check whether table already contains
-                        rows from dataframe, so duplicated rows can be created.
-
-                        Also Spark does not support passing custom options to
-                        insert statement, like ``ON CONFLICT``, so don't try to
-                        implement deduplication using unique indexes or constraints.
-
-                        Instead, write to staging table and perform deduplication
-                        using :obj:`~execute` method.
-
-            * ``replace_entire_table``
-                **Table is dropped and then created**.
-
-                .. dropdown:: Behavior in details
-
-                * Table does not exist
-                    Table is created using options provided by user
-                    (``distributedBy`` and others).
-
-                * Table exists
-                    Table content is replaced with dataframe content.
-
-                    After writing completed, target table could either have the same DDL as
-                    before writing data (``truncate=True``), or can be recreated (``truncate=False``).
-
-        .. note::
-
-            ``error`` and ``ignore`` modes are not supported.
-        """
-
-        @root_validator(pre=True)
-        def mode_is_deprecated(cls, values):
-            if "mode" in values:
-                warnings.warn(
-                    "Option `Greenplum.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
-                    "Use `Greenplum.WriteOptions(if_exists=...)` instead",
-                    category=UserWarning,
-                    stacklevel=3,
-                )
-            return values
-
-    class Dialect(  # noqa: WPS215
-        SupportTableWithDBSchema,
-        SupportColumnsList,
-        SupportDfSchemaNone,
-        SupportWhereStr,
-        SupportHintNone,
-        SupportHWMExpressionStr,
-        SupportHWMColumnStr,
-        DBDialect,
-    ):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.isoformat()
-            return f"cast('{result}' as timestamp)"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.isoformat()
-            return f"cast('{result}' as date)"
-
     host: Host
     database: str
     port: int = 5432
-    extra: Extra = Extra()
+    extra: GreenplumExtra = GreenplumExtra()
+
+    Extra = GreenplumExtra
+    Dialect = GreenplumDialect
+    ReadOptions = GreenplumReadOptions
+    WriteOptions = GreenplumWriteOptions
 
     DRIVER: ClassVar[str] = "org.postgresql.Driver"
     CONNECTIONS_WARNING_LIMIT: ClassVar[int] = 31
@@ -593,7 +279,7 @@ def read_source_as_df(
         df_schema: StructType | None = None,
         start_from: Statement | None = None,
         end_at: Statement | None = None,
-        options: ReadOptions | dict | None = None,
+        options: GreenplumReadOptions | None = None,
     ) -> DataFrame:
         read_options = self.ReadOptions.parse(options).dict(by_alias=True, exclude_none=True)
         log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
@@ -618,7 +304,7 @@ def write_df_to_target(
         self,
         df: DataFrame,
         target: str,
-        options: WriteOptions | dict | None = None,
+        options: GreenplumWriteOptions | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
         options_dict = write_options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"})
@@ -639,7 +325,7 @@ def get_df_schema(
         self,
         source: str,
         columns: list[str] | None = None,
-        options: JDBCOptions | dict | None = None,
+        options: JDBCOptions | None = None,
     ) -> StructType:
         log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
 
@@ -662,7 +348,7 @@ def get_min_max_bounds(
         expression: str | None = None,
         hint: str | None = None,
         where: str | None = None,
-        options: JDBCOptions | dict | None = None,
+        options: JDBCOptions | None = None,
     ) -> tuple[Any, Any]:
         log.info("|Spark| Getting min and max values for column %r", column)
 
@@ -781,11 +467,11 @@ def _get_occupied_connections_count(self) -> int:
         )
         return int(result[0][0])
 
-    def _get_connections_limits(self) -> ConnectionLimits:
+    def _get_connections_limits(self) -> GreenplumConnectionLimit:
         max_connections = int(self._get_server_setting("max_connections"))
         reserved_connections = int(self._get_server_setting("superuser_reserved_connections"))
         occupied_connections = self._get_occupied_connections_count()
-        return ConnectionLimits(
+        return GreenplumConnectionLimit(
             maximum=max_connections,
             reserved=reserved_connections,
             occupied=occupied_connections,
diff --git a/onetl/connection/db_connection/greenplum/connection_limit.py b/onetl/connection/db_connection/greenplum/connection_limit.py
new file mode 100644
index 000000000..7dec5ac26
--- /dev/null
+++ b/onetl/connection/db_connection/greenplum/connection_limit.py
@@ -0,0 +1,40 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import textwrap
+from dataclasses import dataclass
+
+
+@dataclass
+class GreenplumConnectionLimit:
+    maximum: int
+    reserved: int
+    occupied: int
+
+    @property
+    def available(self) -> int:
+        return self.maximum - self.reserved - self.occupied
+
+    @property
+    def summary(self) -> str:
+        return textwrap.dedent(
+            f"""
+            available connections: {self.available}
+            occupied: {self.occupied}
+            max: {self.maximum} ("max_connection" in postgresql.conf)
+            reserved: {self.reserved} ("superuser_reserved_connections" in postgresql.conf)
+            """,
+        ).strip()
diff --git a/onetl/connection/db_connection/greenplum/dialect.py b/onetl/connection/db_connection/greenplum/dialect.py
new file mode 100644
index 000000000..ddf882273
--- /dev/null
+++ b/onetl/connection/db_connection/greenplum/dialect.py
@@ -0,0 +1,49 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.db_connection import DBDialect
+from onetl.connection.db_connection.dialect_mixins import (
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportHintNone,
+    SupportHWMColumnStr,
+    SupportHWMExpressionStr,
+    SupportTableWithDBSchema,
+    SupportWhereStr,
+)
+
+
+class GreenplumDialect(  # noqa: WPS215
+    SupportTableWithDBSchema,
+    SupportColumnsList,
+    SupportDfSchemaNone,
+    SupportWhereStr,
+    SupportHintNone,
+    SupportHWMExpressionStr,
+    SupportHWMColumnStr,
+    DBDialect,
+):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.isoformat()
+        return f"cast('{result}' as timestamp)"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.isoformat()
+        return f"cast('{result}' as date)"
diff --git a/onetl/connection/db_connection/greenplum/options.py b/onetl/connection/db_connection/greenplum/options.py
new file mode 100644
index 000000000..5a75db60f
--- /dev/null
+++ b/onetl/connection/db_connection/greenplum/options.py
@@ -0,0 +1,292 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+from enum import Enum
+from typing import Optional
+
+from pydantic import Field, root_validator
+
+from onetl.connection.db_connection.jdbc_mixin import JDBCOptions
+
+# options from which are populated by Greenplum class methods
+GENERIC_PROHIBITED_OPTIONS = frozenset(
+    (
+        "dbschema",
+        "dbtable",
+    ),
+)
+
+WRITE_OPTIONS = frozenset(
+    (
+        "mode",
+        "truncate",
+        "distributedBy",
+        "iteratorOptimization",
+    ),
+)
+
+READ_OPTIONS = frozenset(
+    (
+        "partitions",
+        "numPartitions",
+        "partitionColumn",
+    ),
+)
+
+
+class GreenplumTableExistBehavior(str, Enum):
+    APPEND = "append"
+    REPLACE_ENTIRE_TABLE = "replace_entire_table"
+
+    def __str__(self) -> str:
+        return str(self.value)
+
+    @classmethod  # noqa: WPS120
+    def _missing_(cls, value: object):  # noqa: WPS120
+        if str(value) == "overwrite":
+            warnings.warn(
+                "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `replace_entire_table` instead",
+                category=UserWarning,
+                stacklevel=4,
+            )
+            return cls.REPLACE_ENTIRE_TABLE
+
+
+class GreenplumReadOptions(JDBCOptions):
+    """Pivotal's Greenplum Spark connector reading options.
+
+    .. note ::
+
+        You can pass any value
+        `supported by connector <https://docs.vmware.com/en/VMware-Tanzu-Greenplum-Connector-for-Apache-Spark/2.1/tanzu-greenplum-connector-spark/GUID-read_from_gpdb.html>`_,
+        even if it is not mentioned in this documentation.
+
+        The set of supported options depends on connector version. See link above.
+
+    .. warning::
+
+        Some options, like ``url``, ``dbtable``, ``server.*``, ``pool.*``,
+        etc are populated from connection attributes, and cannot be set in ``ReadOptions`` class
+
+    Examples
+    --------
+
+    Read options initialization
+
+    .. code:: python
+
+        Greenplum.ReadOptions(
+            partition_column="reg_id",
+            num_partitions=10,
+        )
+    """
+
+    class Config:
+        known_options = READ_OPTIONS
+        prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
+
+    partition_column: Optional[str] = Field(alias="partitionColumn")
+    """Column used to parallelize reading from a table.
+
+    .. warning::
+
+        You should not change this option, unless you know what you're doing
+
+    Possible values:
+        * ``None`` (default):
+            Spark generates N jobs (where N == number of segments in Greenplum cluster),
+            each job is reading only data from a specific segment
+            (filtering data by ``gp_segment_id`` column).
+
+            This is very effective way to fetch the data from a cluster.
+
+        * table column
+            Allocate each executor a range of values from a specific column.
+
+            .. note::
+                Column type must be numeric. Other types are not supported.
+
+            Spark generates for each executor an SQL query like:
+
+            Executor 1:
+
+            .. code:: sql
+
+                SELECT ... FROM table
+                WHERE (partition_column >= lowerBound
+                        OR partition_column IS NULL)
+                AND partition_column < (lower_bound + stride)
+
+            Executor 2:
+
+            .. code:: sql
+
+                SELECT ... FROM table
+                WHERE partition_column >= (lower_bound + stride)
+                AND partition_column < (lower_bound + 2 * stride)
+
+            ...
+
+            Executor N:
+
+            .. code:: sql
+
+                SELECT ... FROM table
+                WHERE partition_column >= (lower_bound + (N-1) * stride)
+                AND partition_column <= upper_bound
+
+            Where ``stride=(upper_bound - lower_bound) / num_partitions``,
+            ``lower_bound=MIN(partition_column)``, ``upper_bound=MAX(partition_column)``.
+
+            .. note::
+
+                :obj:`~num_partitions` is used just to
+                calculate the partition stride, **NOT** for filtering the rows in table.
+                So all rows in the table will be returned (unlike *Incremental* :ref:`strategy`).
+
+            .. note::
+
+                All queries are executed in parallel. To execute them sequentially, use *Batch* :ref:`strategy`.
+
+    .. warning::
+
+        Both options :obj:`~partition_column` and :obj:`~num_partitions` should have a value,
+        or both should be ``None``
+
+    Examples
+    --------
+
+    Read data in 10 parallel jobs by range of values in ``id_column`` column:
+
+    .. code:: python
+
+        Greenplum.ReadOptions(
+            partition_column="id_column",
+            num_partitions=10,
+        )
+    """
+
+    num_partitions: Optional[int] = Field(alias="partitions")
+    """Number of jobs created by Spark to read the table content in parallel.
+
+    See documentation for :obj:`~partition_column` for more details
+
+    .. warning::
+
+        By default connector uses number of segments in the Greenplum cluster.
+        You should not change this option, unless you know what you're doing
+
+    .. warning::
+
+        Both options :obj:`~partition_column` and :obj:`~num_partitions` should have a value,
+        or both should be ``None``
+    """
+
+
+class GreenplumWriteOptions(JDBCOptions):
+    """Pivotal's Greenplum Spark connector writing options.
+
+    .. note ::
+
+        You can pass any value
+        `supported by connector <https://docs.vmware.com/en/VMware-Tanzu-Greenplum-Connector-for-Apache-Spark/2.1/tanzu-greenplum-connector-spark/GUID-write_to_gpdb.html>`_,
+        even if it is not mentioned in this documentation.
+
+        The set of supported options depends on connector version. See link above.
+
+    .. warning::
+
+        Some options, like ``url``, ``dbtable``, ``server.*``, ``pool.*``,
+        etc are populated from connection attributes, and cannot be set in ``WriteOptions`` class
+
+    Examples
+    --------
+
+    Write options initialization
+
+    .. code:: python
+
+        options = Greenplum.WriteOptions(
+            if_exists="append",
+            truncate="false",
+            distributedBy="mycolumn",
+        )
+    """
+
+    class Config:
+        known_options = WRITE_OPTIONS
+        prohibited_options = JDBCOptions.Config.prohibited_options | GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
+
+    if_exists: GreenplumTableExistBehavior = Field(default=GreenplumTableExistBehavior.APPEND, alias="mode")
+    """Behavior of writing data into existing table.
+
+    Possible values:
+        * ``append`` (default)
+            Adds new rows into existing table.
+
+            .. dropdown:: Behavior in details
+
+            * Table does not exist
+                Table is created using options provided by user
+                (``distributedBy`` and others).
+
+            * Table exists
+                Data is appended to a table. Table has the same DDL as before writing data.
+
+                .. warning::
+
+                    This mode does not check whether table already contains
+                    rows from dataframe, so duplicated rows can be created.
+
+                    Also Spark does not support passing custom options to
+                    insert statement, like ``ON CONFLICT``, so don't try to
+                    implement deduplication using unique indexes or constraints.
+
+                    Instead, write to staging table and perform deduplication
+                    using :obj:`~execute` method.
+
+        * ``replace_entire_table``
+            **Table is dropped and then created**.
+
+            .. dropdown:: Behavior in details
+
+            * Table does not exist
+                Table is created using options provided by user
+                (``distributedBy`` and others).
+
+            * Table exists
+                Table content is replaced with dataframe content.
+
+                After writing completed, target table could either have the same DDL as
+                before writing data (``truncate=True``), or can be recreated (``truncate=False``).
+
+    .. note::
+
+        ``error`` and ``ignore`` modes are not supported.
+    """
+
+    @root_validator(pre=True)
+    def _mode_is_deprecated(cls, values):
+        if "mode" in values:
+            warnings.warn(
+                "Option `Greenplum.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+                "Use `Greenplum.WriteOptions(if_exists=...)` instead",
+                category=UserWarning,
+                stacklevel=3,
+            )
+        return values
diff --git a/setup.cfg b/setup.cfg
index 820572b9b..6a799b4c5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -314,9 +314,6 @@ per-file-ignores =
         WPS219,
 # WPS437: Found protected attribute usage: spark._jvm
         WPS437,
-    onetl/connection/db_connection/greenplum.py:
-# WPS437 Found protected attribute usage: self.Dialect._
-        WPS437,
     onetl/connection/db_connection/kafka/connection.py:
 # WPS342: Found implicit raw string \\n
         WPS342,
diff --git a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
index e98da8e09..2d0dd60b4 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
@@ -41,8 +41,8 @@ def test_db_options_connection_parameters_cannot_be_passed(options_class, arg, v
         (Postgres.ReadOptions, "ReadOptions", {"fetchsize": 10, "keytab": "a/b/c"}),
         (Postgres.WriteOptions, "JDBCWriteOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
         (Postgres.Options, "Options", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
-        (Greenplum.ReadOptions, "ReadOptions", {"partitions": 10}),
-        (Greenplum.WriteOptions, "WriteOptions", {"if_exists": "replace_entire_table"}),
+        (Greenplum.ReadOptions, "GreenplumReadOptions", {"partitions": 10}),
+        (Greenplum.WriteOptions, "GreenplumWriteOptions", {"if_exists": "replace_entire_table"}),
     ],
 )
 def test_db_options_warn_for_unknown(options_class, options_class_name, known_options, caplog):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
index 6954186c1..5574a3393 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
@@ -176,60 +176,49 @@ def test_greenplum_write_options_default():
     options = Greenplum.WriteOptions()
 
     assert options.if_exists == GreenplumTableExistBehavior.APPEND
-    assert options.query_timeout is None
+
+
+@pytest.mark.parametrize(
+    "klass, name",
+    [
+        (Greenplum.ReadOptions, "GreenplumReadOptions"),
+        (Greenplum.WriteOptions, "GreenplumWriteOptions"),
+        (Greenplum.JDBCOptions, "JDBCOptions"),
+        (Greenplum.Extra, "GreenplumExtra"),
+    ],
+)
+def test_greenplum_jdbc_options_populated_by_connection_class(klass, name):
+    error_msg = rf"Options \['driver', 'password', 'url', 'user'\] are not allowed to use in a {name}"
+    with pytest.raises(ValueError, match=error_msg):
+        klass(user="me", password="abc", driver="some.Class", url="jdbc:postgres://some/db")
 
 
 def test_greenplum_read_write_options_populated_by_connection_class():
-    error_msg = r"Options \['dbschema', 'dbtable'\] are not allowed to use in a ReadOptions"
+    error_msg = r"Options \['dbschema', 'dbtable'\] are not allowed to use in a GreenplumReadOptions"
     with pytest.raises(ValueError, match=error_msg):
         Greenplum.ReadOptions(dbschema="myschema", dbtable="mytable")
 
-    error_msg = r"Options \['dbschema', 'dbtable'\] are not allowed to use in a WriteOptions"
+    error_msg = r"Options \['dbschema', 'dbtable'\] are not allowed to use in a GreenplumWriteOptions"
     with pytest.raises(ValueError, match=error_msg):
         Greenplum.WriteOptions(dbschema="myschema", dbtable="mytable")
 
-    error_msg = r"Options \['dbschema', 'dbtable'\] are not allowed to use in a Extra"
-    with pytest.raises(ValueError, match=error_msg):
-        Greenplum.Extra(dbschema="myschema", dbtable="mytable")
-
     # JDBCOptions does not have such restriction
     options = Greenplum.JDBCOptions(dbschema="myschema", dbtable="mytable")
     assert options.dbschema == "myschema"
     assert options.dbtable == "mytable"
 
 
-@pytest.mark.parametrize(
-    "options_class",
-    [
-        Greenplum.ReadOptions,
-        Greenplum.WriteOptions,
-    ],
-)
-@pytest.mark.parametrize(
-    "arg, value",
-    [
-        ("server.port", 8000),
-        ("pool.maxSize", "40"),
-    ],
-)
-def test_greenplum_read_write_options_prohibited(arg, value, options_class):
-    with pytest.raises(ValueError, match=rf"Options \['{arg}'\] are not allowed to use in a {options_class.__name__}"):
-        options_class.parse({arg: value})
-
-
 @pytest.mark.parametrize(
     "arg, value",
     [
         ("mode", "append"),
         ("truncate", "true"),
         ("distributedBy", "abc"),
-        ("distributed_by", "abc"),
         ("iteratorOptimization", "true"),
-        ("iterator_optimization", "true"),
     ],
 )
 def test_greenplum_write_options_cannot_be_used_in_read_options(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a ReadOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a GreenplumReadOptions"
     with pytest.raises(ValueError, match=error_msg):
         Greenplum.ReadOptions.parse({arg: value})
 
@@ -238,14 +227,12 @@ def test_greenplum_write_options_cannot_be_used_in_read_options(arg, value):
     "arg, value",
     [
         ("partitions", 10),
-        ("num_partitions", 10),
         ("numPartitions", 10),
         ("partitionColumn", "abc"),
-        ("partition_column", "abc"),
     ],
 )
 def test_greenplum_read_options_cannot_be_used_in_write_options(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a WriteOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a GreenplumWriteOptions"
     with pytest.raises(ValueError, match=error_msg):
         Greenplum.WriteOptions.parse({arg: value})
 

From b7a7916baa5ecd149f434f9e4061e23063ed0ae3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 21 Aug 2023 08:34:28 +0000
Subject: [PATCH 13/30] [DOP-8140] Split Clickhouse and Clickhouse.Dialect to
 separated submodules

---
 docs/changelog/next_release/133.bugfix.rst    |  1 +
 docs/connection/db_connection/clickhouse.rst  | 30 ------
 .../db_connection/clickhouse/connection.rst   |  9 ++
 .../db_connection/clickhouse/execute.rst      | 17 ++++
 .../db_connection/clickhouse/index.rst        | 18 ++++
 .../db_connection/clickhouse/read.rst         | 22 +++++
 .../db_connection/clickhouse/write.rst        | 13 +++
 docs/connection/db_connection/index.rst       | 12 +--
 docs/connection/db_connection/mssql.rst       | 30 ------
 .../db_connection/mssql/connection.rst        |  9 ++
 .../db_connection/mssql/execute.rst           | 17 ++++
 docs/connection/db_connection/mssql/index.rst | 18 ++++
 docs/connection/db_connection/mssql/read.rst  | 22 +++++
 docs/connection/db_connection/mssql/write.rst | 13 +++
 docs/connection/db_connection/mysql.rst       | 30 ------
 .../db_connection/mysql/connection.rst        |  9 ++
 .../db_connection/mysql/execute.rst           | 17 ++++
 docs/connection/db_connection/mysql/index.rst | 18 ++++
 docs/connection/db_connection/mysql/read.rst  | 22 +++++
 docs/connection/db_connection/mysql/write.rst | 13 +++
 docs/connection/db_connection/oracle.rst      | 30 ------
 .../db_connection/oracle/connection.rst       |  9 ++
 .../db_connection/oracle/execute.rst          | 17 ++++
 .../connection/db_connection/oracle/index.rst | 18 ++++
 docs/connection/db_connection/oracle/read.rst | 22 +++++
 .../connection/db_connection/oracle/write.rst | 13 +++
 docs/connection/db_connection/postgres.rst    | 30 ------
 .../db_connection/postgres/connection.rst     |  9 ++
 .../db_connection/postgres/execute.rst        | 17 ++++
 .../db_connection/postgres/index.rst          | 18 ++++
 .../db_connection/postgres/read.rst           | 22 +++++
 .../db_connection/postgres/write.rst          | 13 +++
 docs/connection/db_connection/teradata.rst    | 30 ------
 .../db_connection/teradata/connection.rst     |  9 ++
 .../db_connection/teradata/execute.rst        | 17 ++++
 .../db_connection/teradata/index.rst          | 18 ++++
 .../db_connection/teradata/read.rst           | 22 +++++
 .../db_connection/teradata/write.rst          | 13 +++
 .../db_connection/clickhouse/__init__.py      | 19 ++++
 .../connection.py}                            | 41 +-------
 .../db_connection/clickhouse/dialect.py       | 39 ++++++++
 .../db_connection/db_connection/dialect.py    |  4 +
 .../db_connection/greenplum/connection.py     |  4 +-
 .../db_connection/hive/connection.py          |  4 +-
 .../connection/db_connection/hive/dialect.py  |  4 +-
 .../jdbc_connection/connection.py             | 77 +++++++++------
 .../db_connection/jdbc_connection/dialect.py  | 12 ++-
 .../db_connection/jdbc_connection/options.py  | 38 +++-----
 .../db_connection/mssql/__init__.py           | 16 ++++
 .../{mssql.py => mssql/connection.py}         | 57 +++--------
 .../connection/db_connection/mssql/dialect.py | 40 ++++++++
 .../db_connection/mysql/__init__.py           | 16 ++++
 .../{mysql.py => mysql/connection.py}         | 60 +++---------
 .../connection/db_connection/mysql/dialect.py | 43 +++++++++
 .../db_connection/oracle/__init__.py          | 16 ++++
 .../{oracle.py => oracle/connection.py}       | 95 +++++++++----------
 .../db_connection/oracle/dialect.py           | 39 ++++++++
 .../db_connection/postgres/__init__.py        | 16 ++++
 .../{postgres.py => postgres/connection.py}   | 61 +-----------
 .../db_connection/postgres/dialect.py         | 41 ++++++++
 .../db_connection/teradata/__init__.py        | 16 ++++
 .../{teradata.py => teradata/connection.py}   | 69 ++++----------
 .../db_connection/teradata/dialect.py         | 40 ++++++++
 .../test_clickhouse_reader_integration.py     | 45 +++------
 .../test_mssql_reader_integration.py          | 46 +++------
 .../test_mysql_reader_integration.py          | 45 +++------
 .../test_oracle_reader_integration.py         | 46 +++------
 .../test_postgres_reader_integration.py       | 45 +++------
 .../test_db_options_unit.py                   |  4 +-
 .../test_jdbc_options_unit.py                 |  4 +-
 .../test_mssql_unit.py                        |  2 +-
 .../test_teradata_unit.py                     |  5 +-
 72 files changed, 1061 insertions(+), 715 deletions(-)
 create mode 100644 docs/changelog/next_release/133.bugfix.rst
 delete mode 100644 docs/connection/db_connection/clickhouse.rst
 create mode 100644 docs/connection/db_connection/clickhouse/connection.rst
 create mode 100644 docs/connection/db_connection/clickhouse/execute.rst
 create mode 100644 docs/connection/db_connection/clickhouse/index.rst
 create mode 100644 docs/connection/db_connection/clickhouse/read.rst
 create mode 100644 docs/connection/db_connection/clickhouse/write.rst
 delete mode 100644 docs/connection/db_connection/mssql.rst
 create mode 100644 docs/connection/db_connection/mssql/connection.rst
 create mode 100644 docs/connection/db_connection/mssql/execute.rst
 create mode 100644 docs/connection/db_connection/mssql/index.rst
 create mode 100644 docs/connection/db_connection/mssql/read.rst
 create mode 100644 docs/connection/db_connection/mssql/write.rst
 delete mode 100644 docs/connection/db_connection/mysql.rst
 create mode 100644 docs/connection/db_connection/mysql/connection.rst
 create mode 100644 docs/connection/db_connection/mysql/execute.rst
 create mode 100644 docs/connection/db_connection/mysql/index.rst
 create mode 100644 docs/connection/db_connection/mysql/read.rst
 create mode 100644 docs/connection/db_connection/mysql/write.rst
 delete mode 100644 docs/connection/db_connection/oracle.rst
 create mode 100644 docs/connection/db_connection/oracle/connection.rst
 create mode 100644 docs/connection/db_connection/oracle/execute.rst
 create mode 100644 docs/connection/db_connection/oracle/index.rst
 create mode 100644 docs/connection/db_connection/oracle/read.rst
 create mode 100644 docs/connection/db_connection/oracle/write.rst
 delete mode 100644 docs/connection/db_connection/postgres.rst
 create mode 100644 docs/connection/db_connection/postgres/connection.rst
 create mode 100644 docs/connection/db_connection/postgres/execute.rst
 create mode 100644 docs/connection/db_connection/postgres/index.rst
 create mode 100644 docs/connection/db_connection/postgres/read.rst
 create mode 100644 docs/connection/db_connection/postgres/write.rst
 delete mode 100644 docs/connection/db_connection/teradata.rst
 create mode 100644 docs/connection/db_connection/teradata/connection.rst
 create mode 100644 docs/connection/db_connection/teradata/execute.rst
 create mode 100644 docs/connection/db_connection/teradata/index.rst
 create mode 100644 docs/connection/db_connection/teradata/read.rst
 create mode 100644 docs/connection/db_connection/teradata/write.rst
 create mode 100644 onetl/connection/db_connection/clickhouse/__init__.py
 rename onetl/connection/db_connection/{clickhouse.py => clickhouse/connection.py} (78%)
 create mode 100644 onetl/connection/db_connection/clickhouse/dialect.py
 create mode 100644 onetl/connection/db_connection/mssql/__init__.py
 rename onetl/connection/db_connection/{mssql.py => mssql/connection.py} (79%)
 create mode 100644 onetl/connection/db_connection/mssql/dialect.py
 create mode 100644 onetl/connection/db_connection/mysql/__init__.py
 rename onetl/connection/db_connection/{mysql.py => mysql/connection.py} (72%)
 create mode 100644 onetl/connection/db_connection/mysql/dialect.py
 create mode 100644 onetl/connection/db_connection/oracle/__init__.py
 rename onetl/connection/db_connection/{oracle.py => oracle/connection.py} (89%)
 create mode 100644 onetl/connection/db_connection/oracle/dialect.py
 create mode 100644 onetl/connection/db_connection/postgres/__init__.py
 rename onetl/connection/db_connection/{postgres.py => postgres/connection.py} (73%)
 create mode 100644 onetl/connection/db_connection/postgres/dialect.py
 create mode 100644 onetl/connection/db_connection/teradata/__init__.py
 rename onetl/connection/db_connection/{teradata.py => teradata/connection.py} (74%)
 create mode 100644 onetl/connection/db_connection/teradata/dialect.py

diff --git a/docs/changelog/next_release/133.bugfix.rst b/docs/changelog/next_release/133.bugfix.rst
new file mode 100644
index 000000000..37068bc5b
--- /dev/null
+++ b/docs/changelog/next_release/133.bugfix.rst
@@ -0,0 +1 @@
+Fix reading data from Oracle with ``partitioningMode="range"`` without explicitly set ``lowerBound`` / ``upperBound``.
diff --git a/docs/connection/db_connection/clickhouse.rst b/docs/connection/db_connection/clickhouse.rst
deleted file mode 100644
index 9c358c653..000000000
--- a/docs/connection/db_connection/clickhouse.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _clickhouse:
-
-Clickhouse connection
-=====================
-
-.. currentmodule:: onetl.connection.db_connection.clickhouse
-
-.. autosummary::
-
-    Clickhouse
-    Clickhouse.ReadOptions
-    Clickhouse.WriteOptions
-    Clickhouse.JDBCOptions
-
-.. autoclass:: Clickhouse
-    :members: get_packages, check, sql, fetch, execute
-
-.. currentmodule:: onetl.connection.db_connection.clickhouse.Clickhouse
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/clickhouse/connection.rst b/docs/connection/db_connection/clickhouse/connection.rst
new file mode 100644
index 000000000..862c503e5
--- /dev/null
+++ b/docs/connection/db_connection/clickhouse/connection.rst
@@ -0,0 +1,9 @@
+.. _clickhouse-connection:
+
+Clickhouse connection
+=====================
+
+.. currentmodule:: onetl.connection.db_connection.clickhouse.connection
+
+.. autoclass:: Clickhouse
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/clickhouse/execute.rst b/docs/connection/db_connection/clickhouse/execute.rst
new file mode 100644
index 000000000..f43eb3995
--- /dev/null
+++ b/docs/connection/db_connection/clickhouse/execute.rst
@@ -0,0 +1,17 @@
+.. _clickhouse-execute:
+
+Executing statements in Clickhouse
+==================================
+
+.. currentmodule:: onetl.connection.db_connection.clickhouse.connection
+
+.. automethod:: Clickhouse.fetch
+.. automethod:: Clickhouse.execute
+.. automethod:: Clickhouse.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/clickhouse/index.rst b/docs/connection/db_connection/clickhouse/index.rst
new file mode 100644
index 000000000..1e0d1de65
--- /dev/null
+++ b/docs/connection/db_connection/clickhouse/index.rst
@@ -0,0 +1,18 @@
+.. _clickhouse:
+
+Clickhouse
+==========
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/clickhouse/read.rst b/docs/connection/db_connection/clickhouse/read.rst
new file mode 100644
index 000000000..a2c07733c
--- /dev/null
+++ b/docs/connection/db_connection/clickhouse/read.rst
@@ -0,0 +1,22 @@
+.. _clickhouse-read:
+
+Reading from Clickhouse
+=======================
+
+There are 2 ways of distributed data reading from Clickhouse:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`Clickhouse.sql <onetl.connection.db_connection.clickhouse.connection.Clickhouse.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.clickhouse.connection
+
+.. automethod:: Clickhouse.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/clickhouse/write.rst b/docs/connection/db_connection/clickhouse/write.rst
new file mode 100644
index 000000000..97cc95d36
--- /dev/null
+++ b/docs/connection/db_connection/clickhouse/write.rst
@@ -0,0 +1,13 @@
+.. _clickhouse-write:
+
+Writing to Clickhouse
+=====================
+
+For writing data to Clickhouse, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/index.rst b/docs/connection/db_connection/index.rst
index caa5e5a94..429aaed71 100644
--- a/docs/connection/db_connection/index.rst
+++ b/docs/connection/db_connection/index.rst
@@ -7,13 +7,13 @@ DB Connections
     :maxdepth: 1
     :caption: DB Connections
 
-    Clickhouse <clickhouse>
+    Clickhouse <clickhouse/index>
     Greenplum <greenplum/index>
     Kafka <kafka/index>
     Hive <hive/index>
     MongoDB <mongodb/index>
-    MSSQL <mssql>
-    MySQL <mysql>
-    Oracle <oracle>
-    Postgres <postgres>
-    Teradata <teradata>
+    MSSQL <mssql/index>
+    MySQL <mysql/index>
+    Oracle <oracle/index>
+    Postgres <postgres/index>
+    Teradata <teradata/index>
diff --git a/docs/connection/db_connection/mssql.rst b/docs/connection/db_connection/mssql.rst
deleted file mode 100644
index 5b3941b04..000000000
--- a/docs/connection/db_connection/mssql.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _mssql:
-
-MSSQL connection
-================
-
-.. currentmodule:: onetl.connection.db_connection.mssql
-
-.. autosummary::
-
-    MSSQL
-    MSSQL.ReadOptions
-    MSSQL.WriteOptions
-    MSSQL.JDBCOptions
-
-.. autoclass:: MSSQL
-    :members: get_packages, check, sql, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.mssql.MSSQL
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/mssql/connection.rst b/docs/connection/db_connection/mssql/connection.rst
new file mode 100644
index 000000000..15fe31260
--- /dev/null
+++ b/docs/connection/db_connection/mssql/connection.rst
@@ -0,0 +1,9 @@
+.. _mssql-connection:
+
+MSSQL connection
+================
+
+.. currentmodule:: onetl.connection.db_connection.mssql.connection
+
+.. autoclass:: MSSQL
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/mssql/execute.rst b/docs/connection/db_connection/mssql/execute.rst
new file mode 100644
index 000000000..bed53fec5
--- /dev/null
+++ b/docs/connection/db_connection/mssql/execute.rst
@@ -0,0 +1,17 @@
+.. _mssql-execute:
+
+Executing statements in MSSQL
+=============================
+
+.. currentmodule:: onetl.connection.db_connection.mssql.connection
+
+.. automethod:: MSSQL.fetch
+.. automethod:: MSSQL.execute
+.. automethod:: MSSQL.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mssql/index.rst b/docs/connection/db_connection/mssql/index.rst
new file mode 100644
index 000000000..5e511e83e
--- /dev/null
+++ b/docs/connection/db_connection/mssql/index.rst
@@ -0,0 +1,18 @@
+.. _mssql:
+
+MSSQL
+=====
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/mssql/read.rst b/docs/connection/db_connection/mssql/read.rst
new file mode 100644
index 000000000..3a336f823
--- /dev/null
+++ b/docs/connection/db_connection/mssql/read.rst
@@ -0,0 +1,22 @@
+.. _mssql-read:
+
+Reading from MSSQL
+==================
+
+There are 2 ways of distributed data reading from MSSQL:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`MSSQL.sql <onetl.connection.db_connection.mssql.connection.MSSQL.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.mssql.connection
+
+.. automethod:: MSSQL.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mssql/write.rst b/docs/connection/db_connection/mssql/write.rst
new file mode 100644
index 000000000..c8a5e5906
--- /dev/null
+++ b/docs/connection/db_connection/mssql/write.rst
@@ -0,0 +1,13 @@
+.. _mssql-write:
+
+Writing to MSSQL
+================
+
+For writing data to MSSQL, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mysql.rst b/docs/connection/db_connection/mysql.rst
deleted file mode 100644
index 85f59403d..000000000
--- a/docs/connection/db_connection/mysql.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _mysql:
-
-MySQL connection
-=================
-
-.. currentmodule:: onetl.connection.db_connection.mysql
-
-.. autosummary::
-
-    MySQL
-    MySQL.ReadOptions
-    MySQL.WriteOptions
-    MySQL.JDBCOptions
-
-.. autoclass:: MySQL
-    :members: get_packages, check, sql, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.mysql.MySQL
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/mysql/connection.rst b/docs/connection/db_connection/mysql/connection.rst
new file mode 100644
index 000000000..cfeb00206
--- /dev/null
+++ b/docs/connection/db_connection/mysql/connection.rst
@@ -0,0 +1,9 @@
+.. _mysql-connection:
+
+MySQL connection
+================
+
+.. currentmodule:: onetl.connection.db_connection.mysql.connection
+
+.. autoclass:: MySQL
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/mysql/execute.rst b/docs/connection/db_connection/mysql/execute.rst
new file mode 100644
index 000000000..ec5d01482
--- /dev/null
+++ b/docs/connection/db_connection/mysql/execute.rst
@@ -0,0 +1,17 @@
+.. _mysql-execute:
+
+Executing statements in MySQL
+=============================
+
+.. currentmodule:: onetl.connection.db_connection.mysql.connection
+
+.. automethod:: MySQL.fetch
+.. automethod:: MySQL.execute
+.. automethod:: MySQL.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mysql/index.rst b/docs/connection/db_connection/mysql/index.rst
new file mode 100644
index 000000000..e221165cd
--- /dev/null
+++ b/docs/connection/db_connection/mysql/index.rst
@@ -0,0 +1,18 @@
+.. _mysql:
+
+MySQL
+=====
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/mysql/read.rst b/docs/connection/db_connection/mysql/read.rst
new file mode 100644
index 000000000..6a6960532
--- /dev/null
+++ b/docs/connection/db_connection/mysql/read.rst
@@ -0,0 +1,22 @@
+.. _mysql-read:
+
+Reading from MySQL
+==================
+
+There are 2 ways of distributed data reading from MySQL:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`MySQL.sql <onetl.connection.db_connection.mysql.connection.MySQL.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.mysql.connection
+
+.. automethod:: MySQL.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/mysql/write.rst b/docs/connection/db_connection/mysql/write.rst
new file mode 100644
index 000000000..67f13cf1b
--- /dev/null
+++ b/docs/connection/db_connection/mysql/write.rst
@@ -0,0 +1,13 @@
+.. _mysql-write:
+
+Writing to MySQL
+================
+
+For writing data to MySQL, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/oracle.rst b/docs/connection/db_connection/oracle.rst
deleted file mode 100644
index 45b3f0b84..000000000
--- a/docs/connection/db_connection/oracle.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _oracle:
-
-Oracle connection
-==================
-
-.. currentmodule:: onetl.connection.db_connection.oracle
-
-.. autosummary::
-
-    Oracle
-    Oracle.ReadOptions
-    Oracle.WriteOptions
-    Oracle.JDBCOptions
-
-.. autoclass:: Oracle
-    :members: get_packages, check, sql, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.oracle.Oracle
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/oracle/connection.rst b/docs/connection/db_connection/oracle/connection.rst
new file mode 100644
index 000000000..25e544823
--- /dev/null
+++ b/docs/connection/db_connection/oracle/connection.rst
@@ -0,0 +1,9 @@
+.. _oracle-connection:
+
+Oracle connection
+=================
+
+.. currentmodule:: onetl.connection.db_connection.oracle.connection
+
+.. autoclass:: Oracle
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/oracle/execute.rst b/docs/connection/db_connection/oracle/execute.rst
new file mode 100644
index 000000000..24ea689a4
--- /dev/null
+++ b/docs/connection/db_connection/oracle/execute.rst
@@ -0,0 +1,17 @@
+.. _oracle-execute:
+
+Executing statements in Oracle
+==============================
+
+.. currentmodule:: onetl.connection.db_connection.oracle.connection
+
+.. automethod:: Oracle.fetch
+.. automethod:: Oracle.execute
+.. automethod:: Oracle.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/oracle/index.rst b/docs/connection/db_connection/oracle/index.rst
new file mode 100644
index 000000000..519250fb5
--- /dev/null
+++ b/docs/connection/db_connection/oracle/index.rst
@@ -0,0 +1,18 @@
+.. _oracle:
+
+Oracle
+======
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/oracle/read.rst b/docs/connection/db_connection/oracle/read.rst
new file mode 100644
index 000000000..ffd393e6e
--- /dev/null
+++ b/docs/connection/db_connection/oracle/read.rst
@@ -0,0 +1,22 @@
+.. _oracle-read:
+
+Reading from Oracle
+===================
+
+There are 2 ways of distributed data reading from Oracle:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`Oracle.sql <onetl.connection.db_connection.oracle.connection.Oracle.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.oracle.connection
+
+.. automethod:: Oracle.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/oracle/write.rst b/docs/connection/db_connection/oracle/write.rst
new file mode 100644
index 000000000..78c57d915
--- /dev/null
+++ b/docs/connection/db_connection/oracle/write.rst
@@ -0,0 +1,13 @@
+.. _oracle-write:
+
+Writing to Oracle
+=================
+
+For writing data to Oracle, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/postgres.rst b/docs/connection/db_connection/postgres.rst
deleted file mode 100644
index 605a462d7..000000000
--- a/docs/connection/db_connection/postgres.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _postgres:
-
-Postgres connection
-====================
-
-.. currentmodule:: onetl.connection.db_connection.postgres
-
-.. autosummary::
-
-    Postgres
-    Postgres.ReadOptions
-    Postgres.WriteOptions
-    Postgres.JDBCOptions
-
-.. autoclass:: Postgres
-    :members: get_packages, check, sql, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.postgres.Postgres
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/postgres/connection.rst b/docs/connection/db_connection/postgres/connection.rst
new file mode 100644
index 000000000..517bcd5f2
--- /dev/null
+++ b/docs/connection/db_connection/postgres/connection.rst
@@ -0,0 +1,9 @@
+.. _postgres-connection:
+
+Postgres connection
+===================
+
+.. currentmodule:: onetl.connection.db_connection.postgres.connection
+
+.. autoclass:: Postgres
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/postgres/execute.rst b/docs/connection/db_connection/postgres/execute.rst
new file mode 100644
index 000000000..042b97197
--- /dev/null
+++ b/docs/connection/db_connection/postgres/execute.rst
@@ -0,0 +1,17 @@
+.. _postgres-execute:
+
+Executing statements in Postgres
+================================
+
+.. currentmodule:: onetl.connection.db_connection.postgres.connection
+
+.. automethod:: Postgres.fetch
+.. automethod:: Postgres.execute
+.. automethod:: Postgres.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/postgres/index.rst b/docs/connection/db_connection/postgres/index.rst
new file mode 100644
index 000000000..f5376ee93
--- /dev/null
+++ b/docs/connection/db_connection/postgres/index.rst
@@ -0,0 +1,18 @@
+.. _postgres:
+
+Postgres
+========
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/postgres/read.rst b/docs/connection/db_connection/postgres/read.rst
new file mode 100644
index 000000000..737a731db
--- /dev/null
+++ b/docs/connection/db_connection/postgres/read.rst
@@ -0,0 +1,22 @@
+.. _postgres-read:
+
+Reading from Postgres
+=====================
+
+There are 2 ways of distributed data reading from Postgres:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`Postgres.sql <onetl.connection.db_connection.postgres.connection.Postgres.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.postgres.connection
+
+.. automethod:: Postgres.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/postgres/write.rst b/docs/connection/db_connection/postgres/write.rst
new file mode 100644
index 000000000..db96b4ec8
--- /dev/null
+++ b/docs/connection/db_connection/postgres/write.rst
@@ -0,0 +1,13 @@
+.. _postgres-write:
+
+Writing to Postgres
+===================
+
+For writing data to Postgres, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/teradata.rst b/docs/connection/db_connection/teradata.rst
deleted file mode 100644
index cca8ea3cb..000000000
--- a/docs/connection/db_connection/teradata.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _teradata:
-
-Teradata connection
-====================
-
-.. currentmodule:: onetl.connection.db_connection.teradata
-
-.. autosummary::
-
-    Teradata
-    Teradata.ReadOptions
-    Teradata.WriteOptions
-    Teradata.JDBCOptions
-
-.. autoclass:: Teradata
-    :members: get_packages, check, sql, fetch, execute, close
-
-.. currentmodule:: onetl.connection.db_connection.teradata.Teradata
-
-.. autopydantic_model:: ReadOptions
-    :members: fetchsize, partitioning_mode, partition_column, num_partitions, lower_bound, upper_bound, session_init_statement
-    :member-order: bysource
-
-.. autopydantic_model:: WriteOptions
-    :members: mode, batchsize, isolation_level, query_timeout
-    :member-order: bysource
-
-.. autopydantic_model:: JDBCOptions
-    :members: query_timeout, fetchsize
-    :member-order: bysource
diff --git a/docs/connection/db_connection/teradata/connection.rst b/docs/connection/db_connection/teradata/connection.rst
new file mode 100644
index 000000000..0e70dda34
--- /dev/null
+++ b/docs/connection/db_connection/teradata/connection.rst
@@ -0,0 +1,9 @@
+.. _teradata-connection:
+
+Teradata connection
+===================
+
+.. currentmodule:: onetl.connection.db_connection.teradata.connection
+
+.. autoclass:: Teradata
+    :members: get_packages, check
diff --git a/docs/connection/db_connection/teradata/execute.rst b/docs/connection/db_connection/teradata/execute.rst
new file mode 100644
index 000000000..80853f919
--- /dev/null
+++ b/docs/connection/db_connection/teradata/execute.rst
@@ -0,0 +1,17 @@
+.. _teradata-execute:
+
+Executing statements in Teradata
+================================
+
+.. currentmodule:: onetl.connection.db_connection.teradata.connection
+
+.. automethod:: Teradata.fetch
+.. automethod:: Teradata.execute
+.. automethod:: Teradata.close
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_mixin.options
+
+.. autopydantic_model:: JDBCOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/teradata/index.rst b/docs/connection/db_connection/teradata/index.rst
new file mode 100644
index 000000000..2f6d6636d
--- /dev/null
+++ b/docs/connection/db_connection/teradata/index.rst
@@ -0,0 +1,18 @@
+.. _teradata:
+
+Teradata
+========
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Connection
+
+    connection
+
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+    execute
diff --git a/docs/connection/db_connection/teradata/read.rst b/docs/connection/db_connection/teradata/read.rst
new file mode 100644
index 000000000..b23c42a59
--- /dev/null
+++ b/docs/connection/db_connection/teradata/read.rst
@@ -0,0 +1,22 @@
+.. _teradata-read:
+
+Reading from Teradata
+=====================
+
+There are 2 ways of distributed data reading from Teradata:
+
+* Using :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with different :ref:`strategy`
+* Using :obj:`Teradata.sql <onetl.connection.db_connection.teradata.connection.Teradata.sql>`
+
+Both methods accept :obj:`JDBCReadOptions <onetl.connection.db_connection.jdbc.options.JDBCReadOptions>`
+
+.. currentmodule:: onetl.connection.db_connection.teradata.connection
+
+.. automethod:: Teradata.sql
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/teradata/write.rst b/docs/connection/db_connection/teradata/write.rst
new file mode 100644
index 000000000..7e5cbef40
--- /dev/null
+++ b/docs/connection/db_connection/teradata/write.rst
@@ -0,0 +1,13 @@
+.. _teradata-write:
+
+Writing to Teradata
+===================
+
+For writing data to Teradata, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with options below.
+
+.. currentmodule:: onetl.connection.db_connection.jdbc_connection.options
+
+.. autopydantic_model:: JDBCWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/onetl/connection/db_connection/clickhouse/__init__.py b/onetl/connection/db_connection/clickhouse/__init__.py
new file mode 100644
index 000000000..0fbebdd70
--- /dev/null
+++ b/onetl/connection/db_connection/clickhouse/__init__.py
@@ -0,0 +1,19 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.clickhouse.connection import (
+    Clickhouse,
+    ClickhouseExtra,
+)
+from onetl.connection.db_connection.clickhouse.dialect import ClickhouseDialect
diff --git a/onetl/connection/db_connection/clickhouse.py b/onetl/connection/db_connection/clickhouse/connection.py
similarity index 78%
rename from onetl/connection/db_connection/clickhouse.py
rename to onetl/connection/db_connection/clickhouse/connection.py
index defa63eb1..dc6acf163 100644
--- a/onetl/connection/db_connection/clickhouse.py
+++ b/onetl/connection/db_connection/clickhouse/connection.py
@@ -16,20 +16,12 @@
 
 import logging
 import warnings
-from datetime import date, datetime
 from typing import ClassVar, Optional
 
-from deprecated import deprecated
-
 from onetl._util.classproperty import classproperty
+from onetl.connection.db_connection.clickhouse.dialect import ClickhouseDialect
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
 from onetl.connection.db_connection.jdbc_mixin import JDBCStatementType
-from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
 from onetl.hooks import slot, support_hooks
 from onetl.impl import GenericOptions
 
@@ -142,6 +134,7 @@ class Clickhouse(JDBCConnection):
     extra: ClickhouseExtra = ClickhouseExtra()
 
     Extra = ClickhouseExtra
+    Dialect = ClickhouseDialect
 
     DRIVER: ClassVar[str] = "ru.yandex.clickhouse.ClickHouseDriver"
 
@@ -180,36 +173,6 @@ def jdbc_url(self) -> str:
 
         return f"jdbc:clickhouse://{self.host}:{self.port}?{parameters}".rstrip("?")
 
-    class Dialect(JDBCDialect):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.strftime("%Y-%m-%d %H:%M:%S")
-            return f"CAST('{result}' AS DateTime)"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.strftime("%Y-%m-%d")
-            return f"CAST('{result}' AS Date)"
-
-    class ReadOptions(JDBCReadOptions):
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"modulo(halfMD5({partition_column}), {num_partitions})"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"{partition_column} % {num_partitions}"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
-
     @staticmethod
     def _build_statement(
         statement: str,
diff --git a/onetl/connection/db_connection/clickhouse/dialect.py b/onetl/connection/db_connection/clickhouse/dialect.py
new file mode 100644
index 000000000..56fe44b33
--- /dev/null
+++ b/onetl/connection/db_connection/clickhouse/dialect.py
@@ -0,0 +1,39 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class ClickhouseDialect(JDBCDialect):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.strftime("%Y-%m-%d %H:%M:%S")
+        return f"CAST('{result}' AS DateTime)"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.strftime("%Y-%m-%d")
+        return f"CAST('{result}' AS Date)"
+
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"modulo(halfMD5({partition_column}), {num_partitions})"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"{partition_column} % {num_partitions}"
diff --git a/onetl/connection/db_connection/db_connection/dialect.py b/onetl/connection/db_connection/db_connection/dialect.py
index 0d118fbee..5c9472189 100644
--- a/onetl/connection/db_connection/db_connection/dialect.py
+++ b/onetl/connection/db_connection/db_connection/dialect.py
@@ -32,6 +32,10 @@ class DBDialect(BaseDBDialect):
         operator.ne: "{} != {}",
     }
 
+    @classmethod
+    def _escape_column(cls, value: str) -> str:
+        return f'"{value}"'
+
     @classmethod
     def _expression_with_alias(cls, expression: str, alias: str) -> str:
         return f"{expression} AS {alias}"
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index db29461a9..0b7dcc504 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -359,11 +359,11 @@ def get_min_max_bounds(
             columns=[
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_min_value_sql(expression or column),
-                    "min",
+                    self.Dialect._escape_column("min"),
                 ),
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_max_value_sql(expression or column),
-                    "max",
+                    self.Dialect._escape_column("max"),
                 ),
             ],
             where=where,
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index e8d5a916d..bb727d65c 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -401,11 +401,11 @@ def get_min_max_bounds(
             columns=[
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_min_value_sql(expression or column),
-                    "min",
+                    self.Dialect._escape_column("min"),
                 ),
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_max_value_sql(expression or column),
-                    "max",
+                    self.Dialect._escape_column("max"),
                 ),
             ],
             where=where,
diff --git a/onetl/connection/db_connection/hive/dialect.py b/onetl/connection/db_connection/hive/dialect.py
index 19362f496..b74279ec8 100644
--- a/onetl/connection/db_connection/hive/dialect.py
+++ b/onetl/connection/db_connection/hive/dialect.py
@@ -36,4 +36,6 @@ class HiveDialect(  # noqa: WPS215
     SupportHWMColumnStr,
     DBDialect,
 ):
-    pass
+    @classmethod
+    def _escape_column(cls, value: str) -> str:
+        return f"`{value}`"
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 3fc754c72..5c58bdee4 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -22,7 +22,10 @@
 
 from onetl._internal import clear_statement, get_sql_query
 from onetl.connection.db_connection.db_connection import DBConnection
+from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
 from onetl.connection.db_connection.jdbc_connection.options import (
+    JDBCLegacyOptions,
+    JDBCPartitioningMode,
     JDBCReadOptions,
     JDBCTableExistBehavior,
     JDBCWriteOptions,
@@ -57,8 +60,10 @@ class JDBCConnection(JDBCMixin, DBConnection):
     host: Host
     port: int
 
+    Dialect = JDBCDialect
     ReadOptions = JDBCReadOptions
     WriteOptions = JDBCWriteOptions
+    Options = JDBCLegacyOptions
 
     @property
     def instance_url(self) -> str:
@@ -159,24 +164,37 @@ def read_source_as_df(
             table=source,
             where=where,
             hint=hint,
-            options=self.ReadOptions.parse(options).copy(exclude={"if_exists", "partitioning_mode"}),
+            options=self.ReadOptions.parse(options),
         )
 
-        # hack to avoid column name verification
-        # in the spark, the expression in the partitioning of the column must
-        # have the same name as the field in the table ( 2.4 version )
-        # https://github.com/apache/spark/pull/21379
-
         new_columns = columns or ["*"]
-        alias = "x" + secrets.token_hex(5)
+        alias: str | None = None
 
         if read_options.partition_column:
-            aliased = self.Dialect._expression_with_alias(read_options.partition_column, alias)
-            read_options = read_options.copy(update={"partition_column": alias})
-            new_columns.append(aliased)
+            if read_options.partitioning_mode == JDBCPartitioningMode.MOD:
+                partition_column = self.Dialect._get_partition_column_mod(
+                    read_options.partition_column,
+                    read_options.num_partitions,
+                )
+            elif read_options.partitioning_mode == JDBCPartitioningMode.HASH:
+                partition_column = self.Dialect._get_partition_column_hash(
+                    read_options.partition_column,
+                    read_options.num_partitions,
+                )
+            else:
+                partition_column = read_options.partition_column
+
+            # hack to avoid column name verification
+            # in the spark, the expression in the partitioning of the column must
+            # have the same name as the field in the table ( 2.4 version )
+            # https://github.com/apache/spark/pull/21379
+            alias = "generated_" + secrets.token_hex(5)
+            alias_escaped = self.Dialect._escape_column(alias)
+            aliased_column = self.Dialect._expression_with_alias(partition_column, alias_escaped)
+            read_options = read_options.copy(update={"partition_column": alias_escaped})
+            new_columns.append(aliased_column)
 
         where = self.Dialect._condition_assembler(condition=where, start_from=start_from, end_at=end_at)
-
         query = get_sql_query(
             table=source,
             columns=new_columns,
@@ -185,8 +203,7 @@ def read_source_as_df(
         )
 
         result = self.sql(query, read_options)
-
-        if read_options.partition_column:
+        if alias:
             result = result.drop(alias)
 
         return result
@@ -256,7 +273,6 @@ def options_to_jdbc_params(
         )
 
         result["properties"].pop("partitioningMode", None)
-
         return result
 
     @slot
@@ -278,11 +294,11 @@ def get_min_max_bounds(
             columns=[
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_min_value_sql(expression or column),
-                    "min",
+                    self.Dialect._escape_column("min"),
                 ),
                 self.Dialect._expression_with_alias(
                     self.Dialect._get_max_value_sql(expression or column),
-                    "max",
+                    self.Dialect._escape_column("max"),
                 ),
             ],
             where=where,
@@ -324,21 +340,20 @@ def _exclude_partition_options(
     def _set_lower_upper_bound(
         self,
         table: str,
-        hint: str | None = None,
-        where: str | None = None,
-        options: JDBCReadOptions | None = None,
+        hint: str | None,
+        where: str | None,
+        options: JDBCReadOptions,
     ) -> JDBCReadOptions:
         """
         Determine values of upperBound and lowerBound options
         """
-        read_options = self.ReadOptions.parse(options)
-        if not read_options.partition_column:
-            return read_options
+        if not options.partition_column:
+            return options
 
         missing_values: list[str] = []
 
-        is_missed_lower_bound = read_options.lower_bound is None
-        is_missed_upper_bound = read_options.upper_bound is None
+        is_missed_lower_bound = options.lower_bound is None
+        is_missed_upper_bound = options.upper_bound is None
 
         if is_missed_lower_bound:
             missing_values.append("lowerBound")
@@ -347,30 +362,30 @@ def _set_lower_upper_bound(
             missing_values.append("upperBound")
 
         if not missing_values:
-            return read_options
+            return options
 
         log.warning(
             "|Spark| Passed numPartitions = %d, but values %r are not set. "
             "They will be detected automatically based on values in partitionColumn %r",
-            read_options.num_partitions,
+            options.num_partitions,
             missing_values,
-            read_options.partition_column,
+            options.partition_column,
         )
 
         min_partition_value, max_partition_value = self.get_min_max_bounds(
             source=table,
-            column=read_options.partition_column,
+            column=options.partition_column,
             where=where,
             hint=hint,
             options=options,
         )
 
         # The sessionInitStatement parameter is removed because it only needs to be applied once.
-        return read_options.copy(
+        return options.copy(
             exclude={"session_init_statement"},
             update={
-                "lower_bound": read_options.lower_bound if not is_missed_lower_bound else min_partition_value,
-                "upper_bound": read_options.upper_bound if not is_missed_upper_bound else max_partition_value,
+                "lower_bound": options.lower_bound if not is_missed_lower_bound else min_partition_value,
+                "upper_bound": options.upper_bound if not is_missed_upper_bound else max_partition_value,
             },
         )
 
diff --git a/onetl/connection/db_connection/jdbc_connection/dialect.py b/onetl/connection/db_connection/jdbc_connection/dialect.py
index c794dcad4..ae49738ec 100644
--- a/onetl/connection/db_connection/jdbc_connection/dialect.py
+++ b/onetl/connection/db_connection/jdbc_connection/dialect.py
@@ -14,6 +14,8 @@
 
 from __future__ import annotations
 
+from abc import abstractmethod
+
 from onetl.connection.db_connection.db_connection import DBDialect
 from onetl.connection.db_connection.dialect_mixins import (
     SupportColumnsList,
@@ -36,4 +38,12 @@ class JDBCDialect(  # noqa: WPS215
     SupportHWMColumnStr,
     DBDialect,
 ):
-    pass
+    @classmethod
+    @abstractmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        ...
+
+    @classmethod
+    @abstractmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        ...
diff --git a/onetl/connection/db_connection/jdbc_connection/options.py b/onetl/connection/db_connection/jdbc_connection/options.py
index 4a5608f22..c998055fe 100644
--- a/onetl/connection/db_connection/jdbc_connection/options.py
+++ b/onetl/connection/db_connection/jdbc_connection/options.py
@@ -15,10 +15,10 @@
 from __future__ import annotations
 
 import warnings
-from abc import abstractmethod
 from enum import Enum
 from typing import Optional
 
+from deprecated import deprecated
 from pydantic import Field, PositiveInt, root_validator
 
 from onetl._internal import to_camel
@@ -354,7 +354,7 @@ class Config:
     """
 
     @root_validator
-    def partitioning_mode_actions(cls, values):
+    def _partitioning_mode_actions(cls, values):
         mode = values["partitioning_mode"]
         num_partitions = values.get("num_partitions")
         partition_column = values.get("partition_column")
@@ -373,33 +373,10 @@ def partitioning_mode_actions(cls, values):
         if mode == JDBCPartitioningMode.RANGE:
             return values
 
-        if mode == JDBCPartitioningMode.HASH:
-            values["partition_column"] = cls._get_partition_column_hash(
-                partition_column=partition_column,
-                num_partitions=num_partitions,
-            )
-
-        if mode == JDBCPartitioningMode.MOD:
-            values["partition_column"] = cls._get_partition_column_mod(
-                partition_column=partition_column,
-                num_partitions=num_partitions,
-            )
-
         values["lower_bound"] = lower_bound if lower_bound is not None else 0
         values["upper_bound"] = upper_bound if upper_bound is not None else num_partitions
-
         return values
 
-    @classmethod
-    @abstractmethod
-    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-        ...
-
-    @classmethod
-    @abstractmethod
-    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-        ...
-
 
 class JDBCWriteOptions(JDBCOptions):
     """Spark JDBC writing options.
@@ -521,3 +498,14 @@ def _mode_is_deprecated(cls, values):
                 stacklevel=3,
             )
         return values
+
+
+@deprecated(
+    version="0.5.0",
+    reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
+    action="always",
+    category=UserWarning,
+)
+class JDBCLegacyOptions(JDBCReadOptions, JDBCWriteOptions):
+    class Config:
+        prohibited_options = JDBCOptions.Config.prohibited_options
diff --git a/onetl/connection/db_connection/mssql/__init__.py b/onetl/connection/db_connection/mssql/__init__.py
new file mode 100644
index 000000000..efd6e7072
--- /dev/null
+++ b/onetl/connection/db_connection/mssql/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.mssql.connection import MSSQL, MSSQLExtra
+from onetl.connection.db_connection.mssql.dialect import MSSQLDialect
diff --git a/onetl/connection/db_connection/mssql.py b/onetl/connection/db_connection/mssql/connection.py
similarity index 79%
rename from onetl/connection/db_connection/mssql.py
rename to onetl/connection/db_connection/mssql/connection.py
index a54391e78..49fc825d9 100644
--- a/onetl/connection/db_connection/mssql.py
+++ b/onetl/connection/db_connection/mssql/connection.py
@@ -15,26 +15,24 @@
 from __future__ import annotations
 
 import warnings
-from datetime import date, datetime
 from typing import ClassVar
 
-from deprecated import deprecated
-
 from onetl._util.classproperty import classproperty
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
-from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.connection.db_connection.mssql.dialect import MSSQLDialect
 from onetl.hooks import slot, support_hooks
 from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `MSSQL.get_packages()` for creating Spark session
 
 
+class MSSQLExtra(GenericOptions):
+    class Config:
+        extra = "allow"
+        prohibited_options = frozenset(("databaseName",))
+
+
 @support_hooks
 class MSSQL(JDBCConnection):
     """MSSQL JDBC connection. |support_hooks|
@@ -170,14 +168,12 @@ class MSSQL(JDBCConnection):
 
     """
 
-    class Extra(GenericOptions):
-        class Config:
-            extra = "allow"
-            prohibited_options = frozenset(("databaseName",))
-
     database: str
     port: int = 1433
-    extra: Extra = Extra()
+    extra: MSSQLExtra = MSSQLExtra()
+
+    Extra = MSSQLExtra
+    Dialect = MSSQLDialect
 
     DRIVER: ClassVar[str] = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
     _CHECK_QUERY: ClassVar[str] = "SELECT 1 AS field"
@@ -221,37 +217,6 @@ def package(cls) -> str:
         warnings.warn(msg, UserWarning, stacklevel=3)
         return "com.microsoft.sqlserver:mssql-jdbc:12.2.0.jre8"
 
-    class Dialect(JDBCDialect):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.isoformat()
-            return f"CAST('{result}' AS datetime2)"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.isoformat()
-            return f"CAST('{result}' AS date)"
-
-    class ReadOptions(JDBCReadOptions):
-        # https://docs.microsoft.com/ru-ru/sql/t-sql/functions/hashbytes-transact-sql?view=sql-server-ver16
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"CONVERT(BIGINT, HASHBYTES ( 'SHA' , {partition_column} )) % {num_partitions}"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"{partition_column} % {num_partitions}"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
-
     @property
     def jdbc_url(self) -> str:
         prop = self.extra.dict(by_alias=True)
diff --git a/onetl/connection/db_connection/mssql/dialect.py b/onetl/connection/db_connection/mssql/dialect.py
new file mode 100644
index 000000000..95e4ff022
--- /dev/null
+++ b/onetl/connection/db_connection/mssql/dialect.py
@@ -0,0 +1,40 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class MSSQLDialect(JDBCDialect):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.isoformat()
+        return f"CAST('{result}' AS datetime2)"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.isoformat()
+        return f"CAST('{result}' AS date)"
+
+    # https://docs.microsoft.com/ru-ru/sql/t-sql/functions/hashbytes-transact-sql?view=sql-server-ver16
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"CONVERT(BIGINT, HASHBYTES ( 'SHA' , {partition_column} )) % {num_partitions}"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"{partition_column} % {num_partitions}"
diff --git a/onetl/connection/db_connection/mysql/__init__.py b/onetl/connection/db_connection/mysql/__init__.py
new file mode 100644
index 000000000..ba7337b23
--- /dev/null
+++ b/onetl/connection/db_connection/mysql/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.mysql.connection import MySQL, MySQLExtra
+from onetl.connection.db_connection.mysql.dialect import MySQLDialect
diff --git a/onetl/connection/db_connection/mysql.py b/onetl/connection/db_connection/mysql/connection.py
similarity index 72%
rename from onetl/connection/db_connection/mysql.py
rename to onetl/connection/db_connection/mysql/connection.py
index 3988c04aa..868731eaf 100644
--- a/onetl/connection/db_connection/mysql.py
+++ b/onetl/connection/db_connection/mysql/connection.py
@@ -15,25 +15,25 @@
 from __future__ import annotations
 
 import warnings
-from datetime import date, datetime
 from typing import ClassVar, Optional
 
-from deprecated import deprecated
-
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
-from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.connection.db_connection.mysql.dialect import MySQLDialect
 from onetl.hooks import slot, support_hooks
 from onetl.impl.generic_options import GenericOptions
 
 # do not import PySpark here, as we allow user to use `MySQL.get_packages()` for creating Spark session
 
 
+class MySQLExtra(GenericOptions):
+    useUnicode: str = "yes"  # noqa: N815
+    characterEncoding: str = "UTF-8"  # noqa: N815
+
+    class Config:
+        extra = "allow"
+
+
 @support_hooks
 class MySQL(JDBCConnection):
     """MySQL JDBC connection. |support_hooks|
@@ -125,16 +125,12 @@ class MySQL(JDBCConnection):
 
     """
 
-    class Extra(GenericOptions):
-        useUnicode: str = "yes"  # noqa: N815
-        characterEncoding: str = "UTF-8"  # noqa: N815
-
-        class Config:
-            extra = "allow"
-
     port: int = 3306
     database: Optional[str] = None
-    extra: Extra = Extra()
+    extra: MySQLExtra = MySQLExtra()
+
+    Extra = MySQLExtra
+    Dialect = MySQLDialect
 
     DRIVER: ClassVar[str] = "com.mysql.cj.jdbc.Driver"
 
@@ -172,33 +168,3 @@ def jdbc_url(self):
             return f"jdbc:mysql://{self.host}:{self.port}/{self.database}?{parameters}"
 
         return f"jdbc:mysql://{self.host}:{self.port}?{parameters}"
-
-    class Dialect(JDBCDialect):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.strftime("%Y-%m-%d %H:%M:%S.%f")
-            return f"STR_TO_DATE('{result}', '%Y-%m-%d %H:%i:%s.%f')"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.strftime("%Y-%m-%d")
-            return f"STR_TO_DATE('{result}', '%Y-%m-%d')"
-
-    class ReadOptions(JDBCReadOptions):
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"MOD(CONV(CONV(RIGHT(MD5({partition_column}), 16),16, 2), 2, 10), {num_partitions})"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"MOD({partition_column}, {num_partitions})"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
diff --git a/onetl/connection/db_connection/mysql/dialect.py b/onetl/connection/db_connection/mysql/dialect.py
new file mode 100644
index 000000000..b3cd70a55
--- /dev/null
+++ b/onetl/connection/db_connection/mysql/dialect.py
@@ -0,0 +1,43 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class MySQLDialect(JDBCDialect):
+    @classmethod
+    def _escape_column(cls, value: str) -> str:
+        return f"`{value}`"
+
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.strftime("%Y-%m-%d %H:%M:%S.%f")
+        return f"STR_TO_DATE('{result}', '%Y-%m-%d %H:%i:%s.%f')"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.strftime("%Y-%m-%d")
+        return f"STR_TO_DATE('{result}', '%Y-%m-%d')"
+
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"MOD(CONV(CONV(RIGHT(MD5({partition_column}), 16),16, 2), 2, 10), {num_partitions})"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"MOD({partition_column}, {num_partitions})"
diff --git a/onetl/connection/db_connection/oracle/__init__.py b/onetl/connection/db_connection/oracle/__init__.py
new file mode 100644
index 000000000..79b1b9278
--- /dev/null
+++ b/onetl/connection/db_connection/oracle/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.oracle.connection import Oracle, OracleExtra
+from onetl.connection.db_connection.oracle.dialect import OracleDialect
diff --git a/onetl/connection/db_connection/oracle.py b/onetl/connection/db_connection/oracle/connection.py
similarity index 89%
rename from onetl/connection/db_connection/oracle.py
rename to onetl/connection/db_connection/oracle/connection.py
index eed81a52a..69d7e2c5b 100644
--- a/onetl/connection/db_connection/oracle.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -20,23 +20,19 @@
 import warnings
 from collections import OrderedDict
 from dataclasses import dataclass
-from datetime import date, datetime
+from decimal import Decimal
 from textwrap import indent
-from typing import TYPE_CHECKING, ClassVar, Optional
+from typing import TYPE_CHECKING, Any, ClassVar, Optional
 
-from deprecated import deprecated
 from pydantic import root_validator
 
 from onetl._internal import clear_statement
 from onetl._util.classproperty import classproperty
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
+from onetl.connection.db_connection.jdbc_connection.options import JDBCReadOptions
 from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.connection.db_connection.oracle.dialect import OracleDialect
 from onetl.hooks import slot, support_hooks
 from onetl.impl import GenericOptions
 from onetl.log import BASE_LOG_INDENT, log_lines
@@ -189,6 +185,7 @@ class Oracle(JDBCConnection):
     extra: OracleExtra = OracleExtra()
 
     Extra = OracleExtra
+    Dialect = OracleDialect
 
     DRIVER: ClassVar[str] = "oracle.jdbc.driver.OracleDriver"
     _CHECK_QUERY: ClassVar[str] = "SELECT 1 FROM dual"
@@ -232,49 +229,6 @@ def package(cls) -> str:
         warnings.warn(msg, UserWarning, stacklevel=3)
         return "com.oracle.database.jdbc:ojdbc8:23.2.0.0"
 
-    @root_validator
-    def only_one_of_sid_or_service_name(cls, values):
-        sid = values.get("sid")
-        service_name = values.get("service_name")
-
-        if sid and service_name:
-            raise ValueError("Only one of parameters ``sid``, ``service_name`` can be set, got both")
-
-        if not sid and not service_name:
-            raise ValueError("One of parameters ``sid``, ``service_name`` should be set, got none")
-
-        return values
-
-    class Dialect(JDBCDialect):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.strftime("%Y-%m-%d %H:%M:%S")
-            return f"TO_DATE('{result}', 'YYYY-MM-DD HH24:MI:SS')"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.strftime("%Y-%m-%d")
-            return f"TO_DATE('{result}', 'YYYY-MM-DD')"
-
-    class ReadOptions(JDBCReadOptions):
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"ora_hash({partition_column}, {num_partitions})"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"MOD({partition_column}, {num_partitions})"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
-
     @property
     def jdbc_url(self) -> str:
         extra = self.extra.dict(by_alias=True)
@@ -292,6 +246,32 @@ def instance_url(self) -> str:
 
         return f"{super().instance_url}/{self.service_name}"
 
+    @slot
+    def get_min_max_bounds(
+        self,
+        source: str,
+        column: str,
+        expression: str | None = None,
+        hint: str | None = None,
+        where: str | None = None,
+        options: JDBCReadOptions | None = None,
+    ) -> tuple[Any, Any]:
+        min_value, max_value = super().get_min_max_bounds(
+            source=source,
+            column=column,
+            expression=expression,
+            hint=hint,
+            where=where,
+            options=options,
+        )
+        # Oracle does not have Integer type, only Numeric, which is represented as Decimal in Python
+        # If number does not have decimal part, convert it to integer to use as lowerBound/upperBound
+        if isinstance(min_value, Decimal) and min_value == round(min_value):
+            min_value = int(min_value)
+        if isinstance(max_value, Decimal) and max_value == round(max_value):
+            max_value = int(max_value)
+        return min_value, max_value
+
     @slot
     def execute(
         self,
@@ -318,6 +298,19 @@ def execute(
             log.info("|%s| Execution succeeded, nothing returned", self.__class__.__name__)
         return df
 
+    @root_validator
+    def _only_one_of_sid_or_service_name(cls, values):
+        sid = values.get("sid")
+        service_name = values.get("service_name")
+
+        if sid and service_name:
+            raise ValueError("Only one of parameters ``sid``, ``service_name`` can be set, got both")
+
+        if not sid and not service_name:
+            raise ValueError("One of parameters ``sid``, ``service_name`` should be set, got none")
+
+        return values
+
     def _parse_create_statement(self, statement: str) -> tuple[str, str, str] | None:
         """
         Parses ``CREATE ... type_name [schema.]object_name ...`` statement
diff --git a/onetl/connection/db_connection/oracle/dialect.py b/onetl/connection/db_connection/oracle/dialect.py
new file mode 100644
index 000000000..fb3fa715d
--- /dev/null
+++ b/onetl/connection/db_connection/oracle/dialect.py
@@ -0,0 +1,39 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class OracleDialect(JDBCDialect):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.strftime("%Y-%m-%d %H:%M:%S")
+        return f"TO_DATE('{result}', 'YYYY-MM-DD HH24:MI:SS')"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.strftime("%Y-%m-%d")
+        return f"TO_DATE('{result}', 'YYYY-MM-DD')"
+
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"ora_hash({partition_column}, {num_partitions})"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"MOD({partition_column}, {num_partitions})"
diff --git a/onetl/connection/db_connection/postgres/__init__.py b/onetl/connection/db_connection/postgres/__init__.py
new file mode 100644
index 000000000..42bad7d54
--- /dev/null
+++ b/onetl/connection/db_connection/postgres/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.postgres.connection import Postgres, PostgresExtra
+from onetl.connection.db_connection.postgres.dialect import PostgresDialect
diff --git a/onetl/connection/db_connection/postgres.py b/onetl/connection/db_connection/postgres/connection.py
similarity index 73%
rename from onetl/connection/db_connection/postgres.py
rename to onetl/connection/db_connection/postgres/connection.py
index dfad6d77c..eb07a68f6 100644
--- a/onetl/connection/db_connection/postgres.py
+++ b/onetl/connection/db_connection/postgres/connection.py
@@ -15,30 +15,12 @@
 from __future__ import annotations
 
 import warnings
-from datetime import date, datetime
 from typing import ClassVar
 
-from deprecated import deprecated
-
 from onetl._util.classproperty import classproperty
-from onetl.connection.db_connection.db_connection.dialect import DBDialect
-from onetl.connection.db_connection.dialect_mixins import (
-    SupportColumnsList,
-    SupportDfSchemaNone,
-    SupportHintNone,
-    SupportHWMColumnStr,
-    SupportHWMExpressionStr,
-    SupportWhereStr,
-)
-from onetl.connection.db_connection.dialect_mixins.support_table_with_dbschema import (
-    SupportTableWithDBSchema,
-)
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
 from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.connection.db_connection.postgres.dialect import PostgresDialect
 from onetl.hooks import slot, support_hooks
 from onetl.impl import GenericOptions
 
@@ -146,6 +128,7 @@ class Postgres(JDBCConnection):
     extra: PostgresExtra = PostgresExtra()
 
     Extra = PostgresExtra
+    Dialect = PostgresDialect
 
     DRIVER: ClassVar[str] = "org.postgresql.Driver"
 
@@ -174,46 +157,6 @@ def package(cls) -> str:
         warnings.warn(msg, UserWarning, stacklevel=3)
         return "org.postgresql:postgresql:42.6.0"
 
-    class Dialect(  # noqa: WPS215
-        SupportTableWithDBSchema,
-        SupportColumnsList,
-        SupportDfSchemaNone,
-        SupportWhereStr,
-        SupportHWMExpressionStr,
-        SupportHWMColumnStr,
-        SupportHintNone,
-        DBDialect,
-    ):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.isoformat()
-            return f"'{result}'::timestamp"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.isoformat()
-            return f"'{result}'::date"
-
-    class ReadOptions(JDBCReadOptions):
-        # https://stackoverflow.com/a/9812029
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"('x'||right(md5('{partition_column}'), 16))::bit(32)::bigint % {num_partitions}"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"{partition_column} % {num_partitions}"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
-
     @property
     def jdbc_url(self) -> str:
         extra = self.extra.dict(by_alias=True)
diff --git a/onetl/connection/db_connection/postgres/dialect.py b/onetl/connection/db_connection/postgres/dialect.py
new file mode 100644
index 000000000..05a44471e
--- /dev/null
+++ b/onetl/connection/db_connection/postgres/dialect.py
@@ -0,0 +1,41 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.dialect_mixins import SupportHintNone
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class PostgresDialect(SupportHintNone, JDBCDialect):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.isoformat()
+        return f"'{result}'::timestamp"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.isoformat()
+        return f"'{result}'::date"
+
+    # https://stackoverflow.com/a/9812029
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"('x'||right(md5('{partition_column}'), 16))::bit(32)::bigint % {num_partitions}"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"{partition_column} % {num_partitions}"
diff --git a/onetl/connection/db_connection/teradata/__init__.py b/onetl/connection/db_connection/teradata/__init__.py
new file mode 100644
index 000000000..9a70a22f8
--- /dev/null
+++ b/onetl/connection/db_connection/teradata/__init__.py
@@ -0,0 +1,16 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from onetl.connection.db_connection.teradata.connection import Teradata, TeradataExtra
+from onetl.connection.db_connection.teradata.dialect import TeradataDialect
diff --git a/onetl/connection/db_connection/teradata.py b/onetl/connection/db_connection/teradata/connection.py
similarity index 74%
rename from onetl/connection/db_connection/teradata.py
rename to onetl/connection/db_connection/teradata/connection.py
index c304788a4..7e730f9eb 100644
--- a/onetl/connection/db_connection/teradata.py
+++ b/onetl/connection/db_connection/teradata/connection.py
@@ -15,25 +15,29 @@
 from __future__ import annotations
 
 import warnings
-from datetime import date, datetime
 from typing import ClassVar, Optional
 
-from deprecated import deprecated
-
 from onetl._util.classproperty import classproperty
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
-from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
-from onetl.connection.db_connection.jdbc_connection.options import (
-    JDBCReadOptions,
-    JDBCWriteOptions,
-)
-from onetl.connection.db_connection.jdbc_mixin.options import JDBCOptions
+from onetl.connection.db_connection.teradata.dialect import TeradataDialect
 from onetl.hooks import slot
 from onetl.impl import GenericOptions
 
 # do not import PySpark here, as we allow user to use `Teradata.get_packages()` for creating Spark session
 
 
+class TeradataExtra(GenericOptions):
+    CHARSET: str = "UTF8"
+    COLUMN_NAME: str = "ON"
+    FLATTEN: str = "ON"
+    MAYBENULL: str = "ON"
+    STRICT_NAMES: str = "OFF"
+
+    class Config:
+        extra = "allow"
+        prohibited_options = frozenset(("DATABASE", "DBS_PORT"))
+
+
 class Teradata(JDBCConnection):
     """Teradata JDBC connection. |support_hooks|
 
@@ -140,20 +144,12 @@ class Teradata(JDBCConnection):
 
     """
 
-    class Extra(GenericOptions):
-        CHARSET: str = "UTF8"
-        COLUMN_NAME: str = "ON"
-        FLATTEN: str = "ON"
-        MAYBENULL: str = "ON"
-        STRICT_NAMES: str = "OFF"
-
-        class Config:
-            extra = "allow"
-            prohibited_options = frozenset(("DATABASE", "DBS_PORT"))
-
     port: int = 1025
     database: Optional[str] = None
-    extra: Extra = Extra()
+    extra: TeradataExtra = TeradataExtra()
+
+    Extra = TeradataExtra
+    Dialect = TeradataDialect
 
     DRIVER: ClassVar[str] = "com.teradata.jdbc.TeraDriver"
     _CHECK_QUERY: ClassVar[str] = "SELECT 1 AS check_result"
@@ -194,34 +190,3 @@ def jdbc_url(self) -> str:
 
         conn = ",".join(f"{k}={v}" for k, v in sorted(prop.items()))
         return f"jdbc:teradata://{self.host}/{conn}"
-
-    class Dialect(JDBCDialect):
-        @classmethod
-        def _get_datetime_value_sql(cls, value: datetime) -> str:
-            result = value.isoformat()
-            return f"CAST('{result}' AS TIMESTAMP)"
-
-        @classmethod
-        def _get_date_value_sql(cls, value: date) -> str:
-            result = value.isoformat()
-            return f"CAST('{result}' AS DATE)"
-
-    class ReadOptions(JDBCReadOptions):
-        # https://docs.teradata.com/r/w4DJnG9u9GdDlXzsTXyItA/lkaegQT4wAakj~K_ZmW1Dg
-        @classmethod
-        def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
-            return f"HASHAMP(HASHBUCKET(HASHROW({partition_column}))) mod {num_partitions}"
-
-        @classmethod
-        def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
-            return f"{partition_column} mod {num_partitions}"
-
-    @deprecated(
-        version="0.5.0",
-        reason="Please use 'ReadOptions' or 'WriteOptions' class instead. Will be removed in v1.0.0",
-        action="always",
-        category=UserWarning,
-    )
-    class Options(ReadOptions, JDBCWriteOptions):
-        class Config:
-            prohibited_options = JDBCOptions.Config.prohibited_options
diff --git a/onetl/connection/db_connection/teradata/dialect.py b/onetl/connection/db_connection/teradata/dialect.py
new file mode 100644
index 000000000..c449debc6
--- /dev/null
+++ b/onetl/connection/db_connection/teradata/dialect.py
@@ -0,0 +1,40 @@
+#  Copyright 2023 MTS (Mobile Telesystems)
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from datetime import date, datetime
+
+from onetl.connection.db_connection.jdbc_connection import JDBCDialect
+
+
+class TeradataDialect(JDBCDialect):
+    @classmethod
+    def _get_datetime_value_sql(cls, value: datetime) -> str:
+        result = value.isoformat()
+        return f"CAST('{result}' AS TIMESTAMP)"
+
+    @classmethod
+    def _get_date_value_sql(cls, value: date) -> str:
+        result = value.isoformat()
+        return f"CAST('{result}' AS DATE)"
+
+    # https://docs.teradata.com/r/w4DJnG9u9GdDlXzsTXyItA/lkaegQT4wAakj~K_ZmW1Dg
+    @classmethod
+    def _get_partition_column_hash(cls, partition_column: str, num_partitions: int) -> str:
+        return f"HASHAMP(HASHBUCKET(HASHROW({partition_column}))) mod {num_partitions}"
+
+    @classmethod
+    def _get_partition_column_mod(cls, partition_column: str, num_partitions: int) -> str:
+        return f"{partition_column} mod {num_partitions}"
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
index 303f43d14..643187e22 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_clickhouse_reader_integration.py
@@ -1,7 +1,6 @@
 import pytest
 
 from onetl.connection import Clickhouse
-from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.clickhouse
@@ -30,7 +29,15 @@ def test_clickhouse_reader_snapshot(spark, processing, load_table_data):
     )
 
 
-def test_clickhouse_reader_snapshot_partitioning_mode_mod(spark, processing, load_table_data):
+@pytest.mark.parametrize(
+    "mode, column",
+    [
+        ("range", "id_int"),
+        ("hash", "text_string"),
+        ("mod", "id_int"),
+    ],
+)
+def test_clickhouse_reader_snapshot_partitioning_mode(mode, column, spark, processing, load_table_data):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -44,8 +51,8 @@ def test_clickhouse_reader_snapshot_partitioning_mode_mod(spark, processing, loa
         connection=clickhouse,
         source=load_table_data.full_name,
         options=Clickhouse.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.MOD,
-            partition_column="id_int",
+            partitioning_mode=mode,
+            partition_column=column,
             num_partitions=5,
         ),
     )
@@ -59,35 +66,7 @@ def test_clickhouse_reader_snapshot_partitioning_mode_mod(spark, processing, loa
         order_by="id_int",
     )
 
-
-def test_clickhouse_reader_snapshot_partitioning_mode_hash(spark, processing, load_table_data):
-    clickhouse = Clickhouse(
-        host=processing.host,
-        port=processing.port,
-        user=processing.user,
-        password=processing.password,
-        database=processing.database,
-        spark=spark,
-    )
-
-    reader = DBReader(
-        connection=clickhouse,
-        source=load_table_data.full_name,
-        options=Clickhouse.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.HASH,
-            partition_column="text_string",
-            num_partitions=5,
-        ),
-    )
-
-    table_df = reader.run()
-
-    processing.assert_equal_df(
-        schema=load_table_data.schema,
-        table=load_table_data.table,
-        df=table_df,
-        order_by="id_int",
-    )
+    assert table_df.rdd.getNumPartitions() == 5
 
 
 def test_clickhouse_reader_snapshot_without_set_database(spark, processing, load_table_data):
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
index b707ffb8d..12ace609d 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mssql_reader_integration.py
@@ -1,7 +1,6 @@
 import pytest
 
 from onetl.connection import MSSQL
-from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.mssql
@@ -31,7 +30,15 @@ def test_mssql_reader_snapshot(spark, processing, load_table_data):
     )
 
 
-def test_mssql_reader_snapshot_partitioning_mode_mod(spark, processing, load_table_data):
+@pytest.mark.parametrize(
+    "mode, column",
+    [
+        ("range", "id_int"),
+        ("hash", "text_string"),
+        ("mod", "id_int"),
+    ],
+)
+def test_mssql_reader_snapshot_partitioning_mode(mode, column, spark, processing, load_table_data):
     mssql = MSSQL(
         host=processing.host,
         port=processing.port,
@@ -46,8 +53,8 @@ def test_mssql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
         connection=mssql,
         source=load_table_data.full_name,
         options=MSSQL.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.MOD,
-            partition_column="id_int",
+            partitioning_mode=mode,
+            partition_column=column,
             num_partitions=5,
         ),
     )
@@ -61,36 +68,7 @@ def test_mssql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
         order_by="id_int",
     )
 
-
-def test_mssql_reader_snapshot_partitioning_mode_hash(spark, processing, load_table_data):
-    mssql = MSSQL(
-        host=processing.host,
-        port=processing.port,
-        user=processing.user,
-        password=processing.password,
-        database=processing.database,
-        spark=spark,
-        extra={"trustServerCertificate": "true"},
-    )
-
-    reader = DBReader(
-        connection=mssql,
-        source=load_table_data.full_name,
-        options=MSSQL.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.HASH,
-            partition_column="text_string",
-            num_partitions=5,
-        ),
-    )
-
-    table_df = reader.run()
-
-    processing.assert_equal_df(
-        schema=load_table_data.schema,
-        table=load_table_data.table,
-        df=table_df,
-        order_by="id_int",
-    )
+    assert table_df.rdd.getNumPartitions() == 5
 
 
 def test_mssql_reader_snapshot_with_columns(spark, processing, load_table_data):
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
index d633ebf13..e0865866a 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_mysql_reader_integration.py
@@ -1,7 +1,6 @@
 import pytest
 
 from onetl.connection import MySQL
-from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.mysql
@@ -31,7 +30,15 @@ def test_mysql_reader_snapshot(spark, processing, load_table_data):
     )
 
 
-def test_mysql_reader_snapshot_partitioning_mode_mod(spark, processing, load_table_data):
+@pytest.mark.parametrize(
+    "mode, column",
+    [
+        ("range", "id_int"),
+        ("hash", "text_string"),
+        ("mod", "id_int"),
+    ],
+)
+def test_mysql_reader_snapshot_partitioning_mode(mode, column, spark, processing, load_table_data):
     mysql = MySQL(
         host=processing.host,
         port=processing.port,
@@ -45,8 +52,8 @@ def test_mysql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
         connection=mysql,
         source=load_table_data.full_name,
         options=MySQL.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.MOD,
-            partition_column="id_int",
+            partitioning_mode=mode,
+            partition_column=column,
             num_partitions=5,
         ),
     )
@@ -60,35 +67,7 @@ def test_mysql_reader_snapshot_partitioning_mode_mod(spark, processing, load_tab
         order_by="id_int",
     )
 
-
-def test_mysql_reader_snapshot_partitioning_mode_hash(spark, processing, load_table_data):
-    mysql = MySQL(
-        host=processing.host,
-        port=processing.port,
-        user=processing.user,
-        password=processing.password,
-        database=processing.database,
-        spark=spark,
-    )
-
-    reader = DBReader(
-        connection=mysql,
-        source=load_table_data.full_name,
-        options=MySQL.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.HASH,
-            partition_column="text_string",
-            num_partitions=5,
-        ),
-    )
-
-    table_df = reader.run()
-
-    processing.assert_equal_df(
-        schema=load_table_data.schema,
-        table=load_table_data.table,
-        df=table_df,
-        order_by="id_int",
-    )
+    assert table_df.rdd.getNumPartitions() == 5
 
 
 def test_mysql_reader_snapshot_with_not_set_database(spark, processing, load_table_data):
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
index 7b7aa5ae7..b379923ef 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_oracle_reader_integration.py
@@ -1,7 +1,6 @@
 import pytest
 
 from onetl.connection import Oracle
-from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.oracle
@@ -31,7 +30,15 @@ def test_oracle_reader_snapshot(spark, processing, load_table_data):
     )
 
 
-def test_oracle_reader_snapshot_partitioning_mode_mod(spark, processing, load_table_data):
+@pytest.mark.parametrize(
+    "mode, column",
+    [
+        ("range", "id_int"),
+        ("hash", "text_string"),
+        ("mod", "id_int"),
+    ],
+)
+def test_oracle_reader_snapshot_partitioning_mode(mode, column, spark, processing, load_table_data):
     oracle = Oracle(
         host=processing.host,
         port=processing.port,
@@ -46,8 +53,8 @@ def test_oracle_reader_snapshot_partitioning_mode_mod(spark, processing, load_ta
         connection=oracle,
         source=load_table_data.full_name,
         options=Oracle.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.MOD,
-            partition_column="id_int",
+            partitioning_mode=mode,
+            partition_column=column,
             num_partitions=5,
         ),
     )
@@ -61,36 +68,7 @@ def test_oracle_reader_snapshot_partitioning_mode_mod(spark, processing, load_ta
         order_by="id_int",
     )
 
-
-def test_oracle_reader_snapshot_partitioning_mode_hash(spark, processing, load_table_data):
-    oracle = Oracle(
-        host=processing.host,
-        port=processing.port,
-        user=processing.user,
-        password=processing.password,
-        spark=spark,
-        sid=processing.sid,
-        service_name=processing.service_name,
-    )
-
-    reader = DBReader(
-        connection=oracle,
-        source=load_table_data.full_name,
-        options=Oracle.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.HASH,
-            partition_column="text_string",
-            num_partitions=5,
-        ),
-    )
-
-    table_df = reader.run()
-
-    processing.assert_equal_df(
-        schema=load_table_data.schema,
-        table=load_table_data.table,
-        df=table_df,
-        order_by="id_int",
-    )
+    assert table_df.rdd.getNumPartitions() == 5
 
 
 def test_oracle_reader_snapshot_with_columns(spark, processing, load_table_data):
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
index c40a51ef7..617eba903 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_postgres_reader_integration.py
@@ -1,7 +1,6 @@
 import pytest
 
 from onetl.connection import Postgres
-from onetl.connection.db_connection.jdbc_connection import JDBCPartitioningMode
 from onetl.db import DBReader
 
 pytestmark = pytest.mark.postgres
@@ -30,7 +29,15 @@ def test_postgres_reader_snapshot(spark, processing, load_table_data):
     )
 
 
-def test_postgres_reader_snapshot_partitioning_mode_mod(spark, processing, load_table_data):
+@pytest.mark.parametrize(
+    "mode, column",
+    [
+        ("range", "id_int"),
+        ("hash", "text_string"),
+        ("mod", "id_int"),
+    ],
+)
+def test_postgres_reader_snapshot_partitioning_mode(mode, column, spark, processing, load_table_data):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -44,8 +51,8 @@ def test_postgres_reader_snapshot_partitioning_mode_mod(spark, processing, load_
         connection=postgres,
         source=load_table_data.full_name,
         options=Postgres.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.MOD,
-            partition_column="id_int",
+            partitioning_mode=mode,
+            partition_column=column,
             num_partitions=5,
         ),
     )
@@ -59,35 +66,7 @@ def test_postgres_reader_snapshot_partitioning_mode_mod(spark, processing, load_
         order_by="id_int",
     )
 
-
-def test_postgres_reader_snapshot_partitioning_mode_hash(spark, processing, load_table_data):
-    postgres = Postgres(
-        host=processing.host,
-        port=processing.port,
-        user=processing.user,
-        password=processing.password,
-        database=processing.database,
-        spark=spark,
-    )
-
-    reader = DBReader(
-        connection=postgres,
-        source=load_table_data.full_name,
-        options=Postgres.ReadOptions(
-            partitioning_mode=JDBCPartitioningMode.HASH,
-            partition_column="text_string",
-            num_partitions=5,
-        ),
-    )
-
-    table_df = reader.run()
-
-    processing.assert_equal_df(
-        schema=load_table_data.schema,
-        table=load_table_data.table,
-        df=table_df,
-        order_by="id_int",
-    )
+    assert table_df.rdd.getNumPartitions() == 5
 
 
 def test_postgres_reader_snapshot_with_columns(spark, processing, load_table_data):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
index 2d0dd60b4..8e51d0a89 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_db_options_unit.py
@@ -38,9 +38,9 @@ def test_db_options_connection_parameters_cannot_be_passed(options_class, arg, v
     [
         (Hive.WriteOptions, "HiveWriteOptions", {"if_exists": "replace_overlapping_partitions"}),
         (Hive.Options, "HiveLegacyOptions", {"if_exists": "replace_overlapping_partitions"}),
-        (Postgres.ReadOptions, "ReadOptions", {"fetchsize": 10, "keytab": "a/b/c"}),
+        (Postgres.ReadOptions, "JDBCReadOptions", {"fetchsize": 10, "keytab": "a/b/c"}),
         (Postgres.WriteOptions, "JDBCWriteOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
-        (Postgres.Options, "Options", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
+        (Postgres.Options, "JDBCLegacyOptions", {"if_exists": "replace_entire_table", "keytab": "a/b/c"}),
         (Greenplum.ReadOptions, "GreenplumReadOptions", {"partitions": 10}),
         (Greenplum.WriteOptions, "GreenplumWriteOptions", {"if_exists": "replace_entire_table"}),
     ],
diff --git a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
index 985f43aae..ae81402cc 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
@@ -45,7 +45,7 @@ def test_jdbc_options_default():
     ],
 )
 def test_jdbc_read_write_options_populated_by_connection_class(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a ReadOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a JDBCReadOptions"
     with pytest.raises(ValueError, match=error_msg):
         Postgres.ReadOptions.parse({arg: value})
 
@@ -73,7 +73,7 @@ def test_jdbc_read_write_options_populated_by_connection_class(arg, value):
     ],
 )
 def test_jdbc_write_options_cannot_be_used_in_read_options(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a ReadOptions"
+    error_msg = rf"Options \['{arg}'\] are not allowed to use in a JDBCReadOptions"
     with pytest.raises(ValueError, match=error_msg):
         Postgres.ReadOptions.parse({arg: value})
 
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
index 6846b94c6..e6cd8eb89 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
@@ -110,7 +110,7 @@ def test_mssql_with_extra(spark_mock):
 
 
 def test_mssql_with_extra_prohibited(spark_mock):
-    with pytest.raises(ValueError, match=r"Options \['databaseName'\] are not allowed to use in a Extra"):
+    with pytest.raises(ValueError, match=r"Options \['databaseName'\] are not allowed to use in a MSSQLExtra"):
         MSSQL(
             host="some_host",
             user="user",
diff --git a/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py b/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
index 92d5b7f2c..1daf14dc4 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
@@ -115,7 +115,10 @@ def test_teradata_with_extra(spark_mock):
 
 
 def test_teradata_with_extra_prohibited(spark_mock):
-    with pytest.raises(ValueError, match=r"Options \['DATABASE', 'DBS_PORT'\] are not allowed to use in a Extra"):
+    with pytest.raises(
+        ValueError,
+        match=r"Options \['DATABASE', 'DBS_PORT'\] are not allowed to use in a TeradataExtra",
+    ):
         Teradata(
             host="some_host",
             user="user",

From 5f2c4fb11710d08904b74127dca0b3129e87e6cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 30 Aug 2023 08:37:43 +0000
Subject: [PATCH 14/30] [DOP-8140] Update documentation

---
 docs/connection/db_connection/greenplum/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/connection/db_connection/greenplum/index.rst b/docs/connection/db_connection/greenplum/index.rst
index a1d836766..b4ff40331 100644
--- a/docs/connection/db_connection/greenplum/index.rst
+++ b/docs/connection/db_connection/greenplum/index.rst
@@ -12,7 +12,7 @@ Greenplum
 
 .. toctree::
     :maxdepth: 1
-    :caption: Options
+    :caption: Operations
 
     read
     write

From 58f28b2ea45e9b9e916001f365075294788be5ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 30 Aug 2023 08:40:27 +0000
Subject: [PATCH 15/30] [DOP-8140] Update documentation

---
 onetl/connection/file_df_connection/spark_s3/connection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py
index 44a2955ef..0fd72a0ca 100644
--- a/onetl/connection/file_df_connection/spark_s3/connection.py
+++ b/onetl/connection/file_df_connection/spark_s3/connection.py
@@ -66,7 +66,7 @@ class SparkS3(SparkFileDFConnection):
 
     .. warning::
 
-        See :spark-s3-troubleshooting` guide.
+        See :ref:`spark-s3-troubleshooting` guide.
 
     .. warning::
 

From 1400e00da4b195ebcc2dcdd5573dad7153fa222a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 30 Aug 2023 11:39:58 +0000
Subject: [PATCH 16/30] Add link to Spark issue to fix_pyspark_df

---
 tests/util/to_pandas.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/util/to_pandas.py b/tests/util/to_pandas.py
index 3b3e3d9b2..142a024d6 100644
--- a/tests/util/to_pandas.py
+++ b/tests/util/to_pandas.py
@@ -23,6 +23,8 @@ def fix_pyspark_df(df: SparkDataFrame) -> SparkDataFrame:
         TypeError: Casting to unit-less dtype 'datetime64' is not supported. Pass e.g. 'datetime64[ns]' instead.
 
     This method converts dates and timestamps to strings, to convert them back to original type later.
+
+    TODO: remove after https://issues.apache.org/jira/browse/SPARK-43194
     """
     from pyspark.sql.functions import date_format
     from pyspark.sql.types import DateType, TimestampType

From 0d5e3cea272cfd3d5f315c69b81a59cb7c68e143 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 30 Aug 2023 12:45:59 +0000
Subject: [PATCH 17/30] Update CodeCov config

---
 codecov.yml | 83 +----------------------------------------------------
 1 file changed, 1 insertion(+), 82 deletions(-)

diff --git a/codecov.yml b/codecov.yml
index 6087b0edf..7291c7aad 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -2,86 +2,5 @@ coverage:
   status:
     project:
       default:
-        target: 93%
+        target: 94%
         threshold: 1%
-
-flags:
-  core:
-    paths:
-    - onetl/base/base_connection.py
-    - onetl/hwm/**
-    - onetl/impl/*model*.py
-    - onetl/impl/*options*.py
-    - onetl/strategy/**
-    - onetl/hooks/**
-    - onetl/plugins/**
-    - onetl/exception.py
-    - onetl/log.py
-    - onetl/_internal.py
-  db:
-    paths:
-    - onetl/base/*db*.py
-    - onetl/base/*df*.py
-    - onetl/core/db*/**
-    - onetl/db_connection/db_connection.py
-    - onetl/db_connection/dialect_mixins/**
-    - onetl/db_connection/jdbc*.py
-  clickhouse:
-    paths:
-    - onetl/db_connection/clickhouse.py
-  greenplum:
-    paths:
-    - onetl/db_connection/greenplum.py
-    carryforward: true  # if someone creates pull request from a fork, do not fail if Greenplum coverage is 0%
-  hive:
-    paths:
-    - onetl/db_connection/hive.py
-  mongodb:
-    paths:
-    - onetl/db_connection/mongodb.py
-  mssql:
-    paths:
-    - onetl/db_connection/mongodb.py
-  mysql:
-    paths:
-    - onetl/db_connection/mongodb.py
-  oracle:
-    paths:
-    - onetl/db_connection/oracle.py
-  postgres:
-    paths:
-    - onetl/db_connection/postgres.py
-  teradata:
-    paths:
-    - onetl/db_connection/teradata.py
-  file:
-    paths:
-    - onetl/base/*file*.py
-    - onetl/base/*path*.py
-    - onetl/base/contains_exception.py
-    - onetl/core/file*/**
-    - onetl/core/kerberos_helpers.py
-    - onetl/file_connection/file_connection.py
-    - onetl/impl/*path*.py
-    - onetl/impl/*file*.py
-    - onetl/impl/*directory*.py
-  ftp:
-    paths:
-    - onetl/file_connection/ftp.py
-  ftps:
-    paths:
-    - onetl/file_connection/ftps.py
-  hdfs:
-    paths:
-    - onetl/file_connection/hdfs.py
-  s3:
-    paths:
-    - onetl/file_connection/s3.py
-  sftp:
-    paths:
-    - onetl/file_connection/sftp.py
-  webdav:
-    paths:
-    - onetl/file_connection/webdav.py
-  nightly:
-    joined: false

From fde6b3055fa32ba5249a888df8ec9fbf4d644c67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 30 Aug 2023 10:41:00 +0000
Subject: [PATCH 18/30] [DOP-8414] Improve Kafka documentation

---
 README.rst                                    |  2 +-
 docs/changelog/next_release/136.bugfix.rst    |  1 +
 .../next_release/136.improvement.rst          |  1 +
 docs/connection/db_connection/kafka/index.rst | 11 ++-
 docs/connection/db_connection/kafka/read.rst  | 72 +++++++++++++++++++
 .../db_connection/kafka/read_options.rst      | 11 ---
 docs/connection/db_connection/kafka/write.rst | 64 +++++++++++++++++
 .../db_connection/kafka/write_options.rst     | 11 ---
 .../file_df/file_df_reader/file_df_reader.rst |  4 +-
 docs/file_df/file_df_reader/index.rst         |  6 +-
 .../file_df/file_df_writer/file_df_writer.rst |  4 +-
 docs/file_df/file_df_writer/index.rst         |  6 +-
 .../db_connection/kafka/connection.py         | 15 ++--
 .../db_connection/kafka/kafka_ssl_protocol.py | 28 ++++----
 .../connection/db_connection/kafka/options.py | 13 +---
 .../test_kafka_reader_integration.py          |  9 +--
 16 files changed, 183 insertions(+), 75 deletions(-)
 create mode 100644 docs/changelog/next_release/136.bugfix.rst
 create mode 100644 docs/changelog/next_release/136.improvement.rst
 create mode 100644 docs/connection/db_connection/kafka/read.rst
 delete mode 100644 docs/connection/db_connection/kafka/read_options.rst
 create mode 100644 docs/connection/db_connection/kafka/write.rst
 delete mode 100644 docs/connection/db_connection/kafka/write_options.rst

diff --git a/README.rst b/README.rst
index a98368684..1e39a0734 100644
--- a/README.rst
+++ b/README.rst
@@ -577,7 +577,7 @@ Read files directly from S3 path, convert them to dataframe, transform it and th
         ],
     )
 
-    # Initialize file reader
+    # Initialize file df reader
     reader = FileDFReader(
         connection=spark_s3,
         source_path="/remote/tests/Report",  # path on S3 there *.csv files are located
diff --git a/docs/changelog/next_release/136.bugfix.rst b/docs/changelog/next_release/136.bugfix.rst
new file mode 100644
index 000000000..04d880e5e
--- /dev/null
+++ b/docs/changelog/next_release/136.bugfix.rst
@@ -0,0 +1 @@
+Update Kafka documentation with SSLProtocol usage.
diff --git a/docs/changelog/next_release/136.improvement.rst b/docs/changelog/next_release/136.improvement.rst
new file mode 100644
index 000000000..54489df60
--- /dev/null
+++ b/docs/changelog/next_release/136.improvement.rst
@@ -0,0 +1 @@
+Add notes about reading and writing to Kafka to documentation
diff --git a/docs/connection/db_connection/kafka/index.rst b/docs/connection/db_connection/kafka/index.rst
index 7edec8d10..a02aaecdc 100644
--- a/docs/connection/db_connection/kafka/index.rst
+++ b/docs/connection/db_connection/kafka/index.rst
@@ -5,11 +5,9 @@ Kafka
 
 .. toctree::
     :maxdepth: 1
-    :caption: Connection & options
+    :caption: Connection
 
     connection
-    read_options
-    write_options
 
 .. toctree::
     :maxdepth: 1
@@ -26,6 +24,13 @@ Kafka
     kerberos_auth
     scram_auth
 
+.. toctree::
+    :maxdepth: 1
+    :caption: Operations
+
+    read
+    write
+
 .. toctree::
     :maxdepth: 1
     :caption: For developers
diff --git a/docs/connection/db_connection/kafka/read.rst b/docs/connection/db_connection/kafka/read.rst
new file mode 100644
index 000000000..a19c5e57b
--- /dev/null
+++ b/docs/connection/db_connection/kafka/read.rst
@@ -0,0 +1,72 @@
+.. _kafka-read:
+
+Reading from Kafka
+==================
+
+For reading data from Kafka, use :obj:`DBReader <onetl.db.db_reader.db_reader.DBReader>` with specific options (see below).
+
+.. warning::
+
+    Currently, Kafka does not support :ref:`strategy`. You can only read the whole topic.
+
+.. note::
+
+    Unlike other connection classes, Kafka always return dataframe with fixed schema
+    (see `documentation <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_):
+
+    .. dropdown:: DataFrame Schema
+
+        .. code:: python
+
+            from pyspark.sql.types import (
+                ArrayType,
+                BinaryType,
+                IntegerType,
+                LongType,
+                StringType,
+                StructField,
+                StructType,
+                TimestampType,
+            )
+
+            schema = StructType(
+                [
+                    StructField("value", BinaryType(), nullable=True),
+                    StructField("key", BinaryType(), nullable=True),
+                    StructField("topic", StringType(), nullable=False),
+                    StructField("partition", IntegerType(), nullable=False),
+                    StructField("offset", LongType(), nullable=False),
+                    StructField("timestamp", TimestampType(), nullable=False),
+                    StructField("timestampType", IntegerType(), nullable=False),
+                    # this field is returned only with ``include_headers=True``
+                    StructField(
+                        "headers",
+                        ArrayType(
+                            StructType(
+                                [
+                                    StructField("key", StringType(), nullable=False),
+                                    StructField("value", BinaryType(), nullable=True),
+                                ],
+                            ),
+                        ),
+                        nullable=True,
+                    ),
+                ],
+            )
+
+.. warning::
+
+    Columns:
+
+    * ``value``
+    * ``key``
+    * ``headers[*].value``
+
+    are always returned as raw bytes. If they contain values of custom type, these values should be deserialized manually.
+
+.. currentmodule:: onetl.connection.db_connection.kafka.options
+
+.. autopydantic_model:: KafkaReadOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/kafka/read_options.rst b/docs/connection/db_connection/kafka/read_options.rst
deleted file mode 100644
index 37071c27d..000000000
--- a/docs/connection/db_connection/kafka/read_options.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-.. _kafka-read-options:
-
-Kafka ReadOptions
-=================
-
-.. currentmodule:: onetl.connection.db_connection.kafka.options
-
-.. autopydantic_model:: KafkaReadOptions
-    :member-order: bysource
-    :model-show-field-summary: false
-    :field-show-constraints: false
diff --git a/docs/connection/db_connection/kafka/write.rst b/docs/connection/db_connection/kafka/write.rst
new file mode 100644
index 000000000..eb04ecccb
--- /dev/null
+++ b/docs/connection/db_connection/kafka/write.rst
@@ -0,0 +1,64 @@
+.. _kafka-write:
+
+Writing to Kafka
+================
+
+For writing data to Kafka, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>` with specific options (see below).
+
+.. note::
+
+    Unlike other connection classes, Kafka only accepts dataframe with fixed schema
+    (see `documentation <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_):
+
+    .. dropdown:: DataFrame Schema
+
+        .. code:: python
+
+            from pyspark.sql.types import (
+                ArrayType,
+                BinaryType,
+                IntegerType,
+                StringType,
+                StructField,
+                StructType,
+            )
+
+            schema = StructType(
+                [
+                    # mandatory fields:
+                    StructField("value", BinaryType(), nullable=True),
+                    # optional fields, can be omitted:
+                    StructField("key", BinaryType(), nullable=True),
+                    StructField("partition", IntegerType(), nullable=True),
+                    StructField(
+                        "headers",
+                        ArrayType(
+                            StructType(
+                                [
+                                    StructField("key", StringType(), nullable=False),
+                                    StructField("value", BinaryType(), nullable=True),
+                                ],
+                            ),
+                        ),
+                        nullable=True,
+                    ),
+                ],
+            )
+    You cannot pass dataframe with other column names or types.
+
+.. warning::
+
+    Columns:
+
+    * ``value``
+    * ``key``
+    * ``headers[*].value``
+
+    can only be string or raw bytes. If they contain values of custom type, these values should be serialized manually.
+
+.. currentmodule:: onetl.connection.db_connection.kafka.options
+
+.. autopydantic_model:: KafkaWriteOptions
+    :member-order: bysource
+    :model-show-field-summary: false
+    :field-show-constraints: false
diff --git a/docs/connection/db_connection/kafka/write_options.rst b/docs/connection/db_connection/kafka/write_options.rst
deleted file mode 100644
index a3b678951..000000000
--- a/docs/connection/db_connection/kafka/write_options.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-.. _kafka-write-options:
-
-Kafka WriteOptions
-==================
-
-.. currentmodule:: onetl.connection.db_connection.kafka.options
-
-.. autopydantic_model:: KafkaWriteOptions
-    :member-order: bysource
-    :model-show-field-summary: false
-    :field-show-constraints: false
diff --git a/docs/file_df/file_df_reader/file_df_reader.rst b/docs/file_df/file_df_reader/file_df_reader.rst
index d0c3db6d7..f9e2ef739 100644
--- a/docs/file_df/file_df_reader/file_df_reader.rst
+++ b/docs/file_df/file_df_reader/file_df_reader.rst
@@ -1,7 +1,7 @@
 .. _file-df-reader:
 
-File Reader
-===========
+FileDF Reader
+=============
 
 .. currentmodule:: onetl.file.file_df_reader.file_df_reader
 
diff --git a/docs/file_df/file_df_reader/index.rst b/docs/file_df/file_df_reader/index.rst
index c135b618d..e151c7376 100644
--- a/docs/file_df/file_df_reader/index.rst
+++ b/docs/file_df/file_df_reader/index.rst
@@ -1,11 +1,11 @@
 .. _file-df-reader-root:
 
-File Reader
-===============
+FileDF Reader
+=============
 
 .. toctree::
     :maxdepth: 1
-    :caption: File Reader
+    :caption: FileDF Reader
 
     file_df_reader
     options
diff --git a/docs/file_df/file_df_writer/file_df_writer.rst b/docs/file_df/file_df_writer/file_df_writer.rst
index 73f46bdb6..b7482cd26 100644
--- a/docs/file_df/file_df_writer/file_df_writer.rst
+++ b/docs/file_df/file_df_writer/file_df_writer.rst
@@ -1,7 +1,7 @@
 .. _file-df-writer:
 
-File Writer
-===========
+FileDF Writer
+=============
 
 .. currentmodule:: onetl.file.file_df_writer.file_df_writer
 
diff --git a/docs/file_df/file_df_writer/index.rst b/docs/file_df/file_df_writer/index.rst
index 22c02c09b..e9b74ee0f 100644
--- a/docs/file_df/file_df_writer/index.rst
+++ b/docs/file_df/file_df_writer/index.rst
@@ -1,11 +1,11 @@
 .. _file-df-writer-root:
 
-File Writer
-===============
+FileDF Writer
+=============
 
 .. toctree::
     :maxdepth: 1
-    :caption: File Writer
+    :caption: FileDF Writer
 
     file_df_writer
     options
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index d49630139..54e283617 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -192,14 +192,12 @@ class Kafka(DBConnection):
         kafka = Kafka(
             addresses=["mybroker:9092", "anotherbroker:9092"],
             cluster="my-cluster",
-            protocol=(
-                Kafka.SSLProtocol(
-                    keystore_type="PEM",
-                    keystore_certificate_chain=Path("path/to/user.crt").read_text(),
-                    keystore_key=Path("path/to/user.key").read_text(),
-                    truststore_type="PEM",
-                    truststore_certificates=Path("/path/to/server.crt").read_text(),
-                ),
+            protocol=Kafka.SSLProtocol(
+                keystore_type="PEM",
+                keystore_certificate_chain=Path("path/to/user.crt").read_text(),
+                keystore_key=Path("path/to/user.key").read_text(),
+                truststore_type="PEM",
+                truststore_certificates=Path("/path/to/server.crt").read_text(),
             ),
             auth=Kafka.ScramAuth(
                 user="me",
@@ -357,6 +355,7 @@ def get_df_schema(
                             ],
                         ),
                     ),
+                    nullable=True,
                 ),
             ],
         )
diff --git a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
index b5a73c357..600d56876 100644
--- a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
+++ b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
@@ -48,28 +48,24 @@ class KafkaSSLProtocol(KafkaProtocol, GenericOptions):
         from pathlib import Path
 
         # Just read existing files located on host, and pass key and certificates as strings
-        protocol = (
-            Kafka.SSLProtocol(
-                keystore_type="PEM",
-                keystore_certificate_chain=Path("path/to/user.crt").read_text(),
-                keystore_key=Path("path/to/user.key").read_text(),
-                truststore_type="PEM",
-                truststore_certificates=Path("/path/to/server.crt").read_text(),
-            ),
+        protocol = Kafka.SSLProtocol(
+            keystore_type="PEM",
+            keystore_certificate_chain=Path("path/to/user.crt").read_text(),
+            keystore_key=Path("path/to/user.key").read_text(),
+            truststore_type="PEM",
+            truststore_certificates=Path("/path/to/server.crt").read_text(),
         )
 
     Pass PEM key and certificates as raw strings:
 
     .. code:: python
 
-        protocol = (
-            Kafka.SSLProtocol(
-                keystore_type="PEM",
-                keystore_certificate_chain="-----BEGIN CERTIFICATE-----\\nMIIDZjC...\\n-----END CERTIFICATE-----",
-                keystore_key="-----BEGIN PRIVATE KEY-----\\nMIIEvg..\\n-----END PRIVATE KEY-----",
-                truststore_type="PEM",
-                truststore_certificates="-----BEGIN CERTIFICATE-----\\nMICC...\\n-----END CERTIFICATE-----",
-            ),
+        protocol = Kafka.SSLProtocol(
+            keystore_type="PEM",
+            keystore_certificate_chain="-----BEGIN CERTIFICATE-----\\nMIIDZjC...\\n-----END CERTIFICATE-----",
+            keystore_key="-----BEGIN PRIVATE KEY-----\\nMIIEvg..\\n-----END PRIVATE KEY-----",
+            truststore_type="PEM",
+            truststore_certificates="-----BEGIN CERTIFICATE-----\\nMICC...\\n-----END CERTIFICATE-----",
         )
 
     Pass custom options:
diff --git a/onetl/connection/db_connection/kafka/options.py b/onetl/connection/db_connection/kafka/options.py
index 88f5a15b6..5e3a3b142 100644
--- a/onetl/connection/db_connection/kafka/options.py
+++ b/onetl/connection/db_connection/kafka/options.py
@@ -88,9 +88,8 @@ class KafkaReadOptions(GenericOptions):
             * ``startingTimestamp``
             * ``subscribe``
             * ``subscribePattern``
-            * ``topic``
 
-        populated from connection attributes, and cannot be set in ``KafkaReadOptions`` class and be overridden
+        are populated from connection attributes, and cannot be set in ``KafkaReadOptions`` class and be overridden
         by the user to avoid issues.
 
     Examples
@@ -126,18 +125,10 @@ class KafkaWriteOptions(GenericOptions):
     .. warning::
 
         Options:
-            * ``assign``
-            * ``endingOffsets``
-            * ``endingOffsetsByTimestamp``
             * ``kafka.*``
-            * ``startingOffsets``
-            * ``startingOffsetsByTimestamp``
-            * ``startingTimestamp``
-            * ``subscribe``
-            * ``subscribePattern``
             * ``topic``
 
-        populated from connection attributes, and cannot be set in ``KafkaWriteOptions`` class and be overridden
+        are populated from connection attributes, and cannot be set in ``KafkaWriteOptions`` class and be overridden
         by the user to avoid issues.
 
     Examples
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
index 3294a6fa1..56e76067e 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
@@ -22,10 +22,10 @@ def dataframe_schema():
 
     return StructType(
         [
-            StructField("id_int", LongType(), True),
-            StructField("text_string", StringType(), True),
-            StructField("hwm_int", LongType(), True),
-            StructField("float_value", FloatType(), True),
+            StructField("id_int", LongType(), nullable=True),
+            StructField("text_string", StringType(), nullable=True),
+            StructField("hwm_int", LongType(), nullable=True),
+            StructField("float_value", FloatType(), nullable=True),
         ],
     )
 
@@ -88,6 +88,7 @@ def kafka_schema_with_headers():
                         ],
                     ),
                 ),
+                nullable=True,
             ),
         ],
     )

From 1d6e0a4997c96f60e2c5f0ed0020993944474ddc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 4 Sep 2023 12:34:55 +0000
Subject: [PATCH 19/30] [DOP-8511] Remove restriction for sphinx version

---
 requirements/docs.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements/docs.txt b/requirements/docs.txt
index 463d9c398..3e8e1e0e8 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -3,8 +3,7 @@ furo
 importlib-resources<6
 numpydoc
 pygments-csv-lexer
-# https://github.com/pradyunsg/furo/discussions/693
-sphinx<7.2.0
+sphinx
 sphinx-copybutton
 sphinx-design
 sphinx-tabs

From be00697248a87b657a1d65edc3153929cc3cc99d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 1 Sep 2023 14:02:05 +0000
Subject: [PATCH 20/30] [DOP-8511] Check if Kafka topic exists before read

---
 docs/changelog/next_release/138.bugfix.rst    |  1 +
 .../db_connection/kafka/connection.py         |  3 +++
 .../test_kafka_reader_integration.py          | 21 ++++++++++++++++---
 3 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelog/next_release/138.bugfix.rst

diff --git a/docs/changelog/next_release/138.bugfix.rst b/docs/changelog/next_release/138.bugfix.rst
new file mode 100644
index 000000000..4ea6c3a5c
--- /dev/null
+++ b/docs/changelog/next_release/138.bugfix.rst
@@ -0,0 +1 @@
+Raise exception if someone tries to read data from Kafka topic which does not exist.
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index 54e283617..6cb97a1c8 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -269,6 +269,9 @@ def read_source_as_df(
         options: KafkaReadOptions = KafkaReadOptions(),  # noqa: B008, WPS404
     ) -> DataFrame:
         log.info("|%s| Reading data from topic %r", self.__class__.__name__, source)
+        if source not in self._get_topics():
+            raise ValueError(f"Topic {source!r} doesn't exist")
+
         result_options = {f"kafka.{key}": value for key, value in self._get_connection_properties().items()}
         result_options.update(options.dict(by_alias=True, exclude_none=True))
         result_options["subscribe"] = source
diff --git a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
index 56e76067e..6f5d3e545 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_reader_integration/test_kafka_reader_integration.py
@@ -113,10 +113,8 @@ def create_kafka_data(spark):
 
 
 def test_kafka_reader(spark, kafka_processing, schema):
-    # Arrange
     topic, processing, expected_df = kafka_processing
 
-    # Act
     kafka = Kafka(
         spark=spark,
         addresses=[f"{processing.host}:{processing.port}"],
@@ -129,7 +127,6 @@ def test_kafka_reader(spark, kafka_processing, schema):
     )
     df = reader.run()
 
-    # Assert
     processing.assert_equal_df(processing.json_deserialize(df, df_schema=schema), other_frame=expected_df)
 
 
@@ -174,3 +171,21 @@ def test_kafka_reader_columns_and_types_with_headers(spark, kafka_processing, ka
     df = reader.run()
 
     assert df.schema == kafka_schema_with_headers
+
+
+def test_kafka_reader_topic_does_not_exist(spark, kafka_processing):
+    _, processing, _ = kafka_processing
+
+    kafka = Kafka(
+        spark=spark,
+        addresses=[f"{processing.host}:{processing.port}"],
+        cluster="cluster",
+    )
+
+    reader = DBReader(
+        connection=kafka,
+        source="missing",
+    )
+
+    with pytest.raises(ValueError, match="Topic 'missing' doesn't exist"):
+        reader.run()

From f98cc29fb9e337907cc9a4e82dfaeb5dc1201a57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 4 Sep 2023 14:38:34 +0000
Subject: [PATCH 21/30] [DOP-8511] Fix Greenplum connector link

---
 docs/connection/db_connection/greenplum/prerequisites.rst | 2 +-
 onetl/connection/db_connection/greenplum/connection.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/connection/db_connection/greenplum/prerequisites.rst b/docs/connection/db_connection/greenplum/prerequisites.rst
index 166a936aa..964d9cdcf 100644
--- a/docs/connection/db_connection/greenplum/prerequisites.rst
+++ b/docs/connection/db_connection/greenplum/prerequisites.rst
@@ -30,7 +30,7 @@ Downloading Pivotal package
 ---------------------------
 
 To use Greenplum connector you should download connector ``.jar`` file from
-`Pivotal website <https://network.tanzu.vmware.com/products/vmware-greenplum#/releases/1287433/file_groups/13260>`_
+`Pivotal website <https://network.tanzu.vmware.com/products/vmware-greenplum#/releases/1341690/file_groups/14993>`_
 and then pass it to Spark session.
 
 There are several ways to do that.
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index 0b7dcc504..780848614 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -77,7 +77,7 @@ class Greenplum(JDBCMixin, DBConnection):
     """Greenplum connection. |support_hooks|
 
     Based on package ``io.pivotal:greenplum-spark:2.1.4``
-    (`Pivotal connector for Spark <https://network.tanzu.vmware.com/products/vmware-greenplum#/releases/1287433/file_groups/13260>`_).
+    (`Pivotal connector for Spark <https://network.tanzu.vmware.com/products/vmware-greenplum#/releases/1341690/file_groups/14993>`_).
 
     .. warning::
 

From 6e736542935414313be64114ab3caecb508bf601 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Sep 2023 04:39:13 +0000
Subject: [PATCH 22/30] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/PyCQA/autoflake: v2.2.0 → v2.2.1](https://github.com/PyCQA/autoflake/compare/v2.2.0...v2.2.1)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0145b90c6..fd0c89d6b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -77,7 +77,7 @@ repos:
   - id: check-hooks-apply
   - id: check-useless-excludes
 - repo: https://github.com/PyCQA/autoflake
-  rev: v2.2.0
+  rev: v2.2.1
   hooks:
   - id: autoflake
     args:

From 0c55572286f8bc3dbd19403523b373e3d622d741 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 4 Sep 2023 12:50:20 +0000
Subject: [PATCH 23/30] [DOP-8511] Allow to pass Kafka topic name in format
 'some.topic.name'

---
 docs/changelog/next_release/139.bugfix.rst    |  1 +
 .../db_connection/dialect_mixins/__init__.py  |  8 +++----
 .../dialect_mixins/support_name_any.py        | 11 +++++++++
 ...ma.py => support_name_with_schema_only.py} |  7 +++---
 .../support_table_with_dbschema.py            | 17 -------------
 .../db_connection/greenplum/dialect.py        |  4 ++--
 .../connection/db_connection/hive/dialect.py  |  4 ++--
 .../db_connection/jdbc_connection/dialect.py  |  4 ++--
 .../connection/db_connection/kafka/dialect.py |  4 ++--
 .../db_connection/mongodb/dialect.py          |  4 ++--
 requirements/core.txt                         |  2 +-
 .../test_clickhouse_reader_unit.py            |  2 +-
 .../test_greenplum_reader_unit.py             |  2 +-
 .../test_kafka_reader_unit.py                 | 24 -------------------
 .../test_mongodb_reader_unit.py               | 18 --------------
 .../test_mssql_reader_unit.py                 |  2 +-
 .../test_mysql_reader_unit.py                 |  2 +-
 .../test_oracle_reader_unit.py                |  2 +-
 .../test_postgres_reader_unit.py              |  2 +-
 .../test_teradata_reader_unit.py              |  2 +-
 .../test_clickhouse_writer_unit.py            |  2 +-
 .../test_greenplum_writer_unit.py             |  2 +-
 .../test_hive_writer_unit.py                  |  2 +-
 .../test_mongodb_writer_unit.py               | 22 -----------------
 .../test_mssql_writer_unit.py                 |  2 +-
 .../test_mysql_writer_unit.py                 |  2 +-
 .../test_oracle_writer_unit.py                |  2 +-
 .../test_postgres_writer_unit.py              |  2 +-
 .../test_teradata_writer_unit.py              |  2 +-
 29 files changed, 46 insertions(+), 114 deletions(-)
 create mode 100644 docs/changelog/next_release/139.bugfix.rst
 create mode 100644 onetl/connection/db_connection/dialect_mixins/support_name_any.py
 rename onetl/connection/db_connection/dialect_mixins/{support_table_without_dbschema.py => support_name_with_schema_only.py} (60%)
 delete mode 100644 onetl/connection/db_connection/dialect_mixins/support_table_with_dbschema.py
 delete mode 100644 tests/tests_unit/test_db/test_db_writer_unit/test_mongodb_writer_unit.py

diff --git a/docs/changelog/next_release/139.bugfix.rst b/docs/changelog/next_release/139.bugfix.rst
new file mode 100644
index 000000000..a29b5f44d
--- /dev/null
+++ b/docs/changelog/next_release/139.bugfix.rst
@@ -0,0 +1 @@
+Allow to pass Kafka topics with name like ``some.topic.name`` to DBReader. Same for MongoDB collections.
diff --git a/onetl/connection/db_connection/dialect_mixins/__init__.py b/onetl/connection/db_connection/dialect_mixins/__init__.py
index 1eb0d7f17..4d889b276 100644
--- a/onetl/connection/db_connection/dialect_mixins/__init__.py
+++ b/onetl/connection/db_connection/dialect_mixins/__init__.py
@@ -25,11 +25,11 @@
 from onetl.connection.db_connection.dialect_mixins.support_hwm_expression_str import (
     SupportHWMExpressionStr,
 )
-from onetl.connection.db_connection.dialect_mixins.support_table_with_dbschema import (
-    SupportTableWithDBSchema,
+from onetl.connection.db_connection.dialect_mixins.support_name_any import (
+    SupportNameAny,
 )
-from onetl.connection.db_connection.dialect_mixins.support_table_without_dbschema import (
-    SupportTableWithoutDBSchema,
+from onetl.connection.db_connection.dialect_mixins.support_name_with_schema_only import (
+    SupportNameWithSchemaOnly,
 )
 from onetl.connection.db_connection.dialect_mixins.support_where_none import (
     SupportWhereNone,
diff --git a/onetl/connection/db_connection/dialect_mixins/support_name_any.py b/onetl/connection/db_connection/dialect_mixins/support_name_any.py
new file mode 100644
index 000000000..8ecb34fd6
--- /dev/null
+++ b/onetl/connection/db_connection/dialect_mixins/support_name_any.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from etl_entities import Table
+
+from onetl.base import BaseDBConnection
+
+
+class SupportNameAny:
+    @classmethod
+    def validate_name(cls, connection: BaseDBConnection, value: Table) -> Table:
+        return value
diff --git a/onetl/connection/db_connection/dialect_mixins/support_table_without_dbschema.py b/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
similarity index 60%
rename from onetl/connection/db_connection/dialect_mixins/support_table_without_dbschema.py
rename to onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
index 456181411..eb374ca3a 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_table_without_dbschema.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
@@ -5,11 +5,12 @@
 from onetl.base import BaseDBConnection
 
 
-class SupportTableWithoutDBSchema:
+class SupportNameWithSchemaOnly:
     @classmethod
     def validate_name(cls, connection: BaseDBConnection, value: Table) -> Table:
-        if value.db is not None:
+        if value.name.count(".") != 1:
             raise ValueError(
-                f"Table name should be passed in `mytable` format (not `myschema.mytable`), got '{value}'",
+                f"Name should be passed in `schema.name` format, got '{value}'",
             )
+
         return value
diff --git a/onetl/connection/db_connection/dialect_mixins/support_table_with_dbschema.py b/onetl/connection/db_connection/dialect_mixins/support_table_with_dbschema.py
deleted file mode 100644
index 3fa81ad54..000000000
--- a/onetl/connection/db_connection/dialect_mixins/support_table_with_dbschema.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from etl_entities import Table
-
-from onetl.base import BaseDBConnection
-
-
-class SupportTableWithDBSchema:
-    @classmethod
-    def validate_name(cls, connection: BaseDBConnection, value: Table) -> Table:
-        if value.db is None:
-            # Same error text as in etl_entites.Table value error.
-            raise ValueError(
-                f"Table name should be passed in `schema.name` format, got '{value}'",
-            )
-
-        return value
diff --git a/onetl/connection/db_connection/greenplum/dialect.py b/onetl/connection/db_connection/greenplum/dialect.py
index ddf882273..a998811aa 100644
--- a/onetl/connection/db_connection/greenplum/dialect.py
+++ b/onetl/connection/db_connection/greenplum/dialect.py
@@ -23,13 +23,13 @@
     SupportHintNone,
     SupportHWMColumnStr,
     SupportHWMExpressionStr,
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportWhereStr,
 )
 
 
 class GreenplumDialect(  # noqa: WPS215
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportColumnsList,
     SupportDfSchemaNone,
     SupportWhereStr,
diff --git a/onetl/connection/db_connection/hive/dialect.py b/onetl/connection/db_connection/hive/dialect.py
index b74279ec8..552e66559 100644
--- a/onetl/connection/db_connection/hive/dialect.py
+++ b/onetl/connection/db_connection/hive/dialect.py
@@ -21,13 +21,13 @@
     SupportHintStr,
     SupportHWMColumnStr,
     SupportHWMExpressionStr,
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportWhereStr,
 )
 
 
 class HiveDialect(  # noqa: WPS215
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportColumnsList,
     SupportDfSchemaNone,
     SupportWhereStr,
diff --git a/onetl/connection/db_connection/jdbc_connection/dialect.py b/onetl/connection/db_connection/jdbc_connection/dialect.py
index ae49738ec..790a0c300 100644
--- a/onetl/connection/db_connection/jdbc_connection/dialect.py
+++ b/onetl/connection/db_connection/jdbc_connection/dialect.py
@@ -23,13 +23,13 @@
     SupportHintStr,
     SupportHWMColumnStr,
     SupportHWMExpressionStr,
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportWhereStr,
 )
 
 
 class JDBCDialect(  # noqa: WPS215
-    SupportTableWithDBSchema,
+    SupportNameWithSchemaOnly,
     SupportColumnsList,
     SupportDfSchemaNone,
     SupportWhereStr,
diff --git a/onetl/connection/db_connection/kafka/dialect.py b/onetl/connection/db_connection/kafka/dialect.py
index d6cc9bf56..e8c35ccaa 100644
--- a/onetl/connection/db_connection/kafka/dialect.py
+++ b/onetl/connection/db_connection/kafka/dialect.py
@@ -25,7 +25,7 @@
     SupportDfSchemaNone,
     SupportHintNone,
     SupportHWMExpressionNone,
-    SupportTableWithoutDBSchema,
+    SupportNameAny,
     SupportWhereNone,
 )
 
@@ -37,7 +37,7 @@ class KafkaDialect(  # noqa: WPS215
     SupportDfSchemaNone,
     SupportHintNone,
     SupportWhereNone,
-    SupportTableWithoutDBSchema,
+    SupportNameAny,
     SupportHWMExpressionNone,
     DBDialect,
 ):
diff --git a/onetl/connection/db_connection/mongodb/dialect.py b/onetl/connection/db_connection/mongodb/dialect.py
index 865288dea..d3d388f72 100644
--- a/onetl/connection/db_connection/mongodb/dialect.py
+++ b/onetl/connection/db_connection/mongodb/dialect.py
@@ -26,7 +26,7 @@
     SupportDfSchemaStruct,
     SupportHWMColumnStr,
     SupportHWMExpressionNone,
-    SupportTableWithoutDBSchema,
+    SupportNameAny,
 )
 
 _upper_level_operators = frozenset(  # noqa: WPS527
@@ -74,7 +74,7 @@
 
 
 class MongoDBDialect(  # noqa: WPS215
-    SupportTableWithoutDBSchema,
+    SupportNameAny,
     SupportHWMExpressionNone,
     SupportColumnsNone,
     SupportDfSchemaStruct,
diff --git a/requirements/core.txt b/requirements/core.txt
index edd3095a6..96c8959e5 100644
--- a/requirements/core.txt
+++ b/requirements/core.txt
@@ -1,5 +1,5 @@
 deprecated
-etl-entities>=1.3,<1.4
+etl-entities>=1.4,<1.5
 evacuator>=1.0,<1.1
 frozendict
 humanize
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_clickhouse_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_clickhouse_reader_unit.py
index 6a46f25c1..ba16f231b 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_clickhouse_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_clickhouse_reader_unit.py
@@ -39,7 +39,7 @@ def test_clickhouse_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_clickhouse_reader_wrong_table_name(spark_mock, table):
     clickhouse = Clickhouse(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=clickhouse,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_greenplum_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_greenplum_reader_unit.py
index 57d60f8a2..3770bcf19 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_greenplum_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_greenplum_reader_unit.py
@@ -40,7 +40,7 @@ def test_greenplum_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_greenplum_reader_wrong_table_name(spark_mock, table):
     greenplum = Greenplum(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=greenplum,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_kafka_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_kafka_reader_unit.py
index a6493c57a..5fd228f5b 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_kafka_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_kafka_reader_unit.py
@@ -29,30 +29,6 @@ def df_schema():
     )
 
 
-def test_kafka_reader_invalid_table(spark_mock):
-    kafka = Kafka(
-        addresses=["localhost:9092"],
-        cluster="my_cluster",
-        spark=spark_mock,
-    )
-    with pytest.raises(
-        ValueError,
-        match="Table name should be passed in `mytable` format",
-    ):
-        DBReader(
-            connection=kafka,
-            table="schema.table",  # Includes schema. Required format: table="table"
-        )
-    with pytest.raises(
-        ValueError,
-        match="Table name should be passed in `schema.name` format",
-    ):
-        DBReader(
-            connection=kafka,
-            table="schema.table.subtable",  # Includes subtable. Required format: table="table"
-        )
-
-
 def test_kafka_reader_unsupported_parameters(spark_mock, df_schema):
     kafka = Kafka(
         addresses=["localhost:9092"],
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_mongodb_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_mongodb_reader_unit.py
index bd92a8638..2a164c16d 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_mongodb_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_mongodb_reader_unit.py
@@ -28,24 +28,6 @@ def df_schema():
     )
 
 
-def test_mongodb_reader_with_dbschema(spark_mock):
-    mongo = MongoDB(
-        host="host",
-        user="user",
-        password="password",
-        database="database",
-        spark=spark_mock,
-    )
-    with pytest.raises(
-        ValueError,
-        match="Table name should be passed in `mytable` format",
-    ):
-        DBReader(
-            connection=mongo,
-            table="schema.table",  # Includes schema. Required format: table="table"
-        )
-
-
 def test_mongodb_reader_wrong_hint_type(spark_mock, df_schema):
     mongo = MongoDB(
         host="host",
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_mssql_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_mssql_reader_unit.py
index febcf8d8c..c8c483447 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_mssql_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_mssql_reader_unit.py
@@ -40,7 +40,7 @@ def test_mssql_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_mssql_reader_wrong_table_name(spark_mock, table):
     mssql = MSSQL(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=mssql,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_mysql_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_mysql_reader_unit.py
index 7e5848544..19c4c662f 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_mysql_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_mysql_reader_unit.py
@@ -40,7 +40,7 @@ def test_mysql_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_mysql_reader_wrong_table_name(spark_mock, table):
     mysql = MySQL(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=mysql,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_oracle_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_oracle_reader_unit.py
index 3fa67b130..1011f17ec 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_oracle_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_oracle_reader_unit.py
@@ -39,7 +39,7 @@ def test_oracle_reader_error_df_schema(spark_mock):
 @pytest.mark.parametrize("table", ["table", "table.table.table"])
 def test_oracle_reader_wrong_table_name(spark_mock, table):
     oracle = Oracle(host="some_host", user="user", sid="sid", password="passwd", spark=spark_mock)
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=oracle,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_postgres_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_postgres_reader_unit.py
index 1b15437e8..c541e33cd 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_postgres_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_postgres_reader_unit.py
@@ -40,7 +40,7 @@ def test_postgres_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_postgres_reader_wrong_table_name(spark_mock, table):
     postgres = Postgres(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=postgres,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_teradata_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_teradata_reader_unit.py
index bc5140d27..262897fde 100644
--- a/tests/tests_unit/test_db/test_db_reader_unit/test_teradata_reader_unit.py
+++ b/tests/tests_unit/test_db/test_db_reader_unit/test_teradata_reader_unit.py
@@ -40,7 +40,7 @@ def test_teradata_reader_snapshot_error_pass_df_schema(spark_mock):
 def test_teradata_reader_wrong_table_name(spark_mock, table):
     teradata = Teradata(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBReader(
             connection=teradata,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_clickhouse_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_clickhouse_writer_unit.py
index c82ff4af4..eb4b34028 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_clickhouse_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_clickhouse_writer_unit.py
@@ -10,7 +10,7 @@
 def test_clickhouse_writer_wrong_table_name(spark_mock, table):
     clickhouse = Clickhouse(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=clickhouse,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_greenplum_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_greenplum_writer_unit.py
index 6b2c63e0a..fb3614cdd 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_greenplum_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_greenplum_writer_unit.py
@@ -10,7 +10,7 @@
 def test_greenplum_writer_wrong_table_name(spark_mock, table):
     greenplum = Greenplum(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=greenplum,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_hive_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_hive_writer_unit.py
index f480d4b6a..2fbfdb573 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_hive_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_hive_writer_unit.py
@@ -10,7 +10,7 @@
 def test_hive_writer_wrong_table_name(spark_mock, table):
     hive = Hive(cluster="rnd-dwh", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=hive,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_mongodb_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_mongodb_writer_unit.py
deleted file mode 100644
index 99ce4218a..000000000
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_mongodb_writer_unit.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-
-from onetl.connection import MongoDB
-from onetl.db import DBWriter
-
-pytestmark = pytest.mark.mongodb
-
-
-def test_mongodb_writer_wrong_table_name(spark_mock):
-    mongo = MongoDB(
-        host="host",
-        user="user",
-        password="password",
-        database="database",
-        spark=spark_mock,
-    )
-
-    with pytest.raises(ValueError, match="Table name should be passed in `mytable` format"):
-        DBWriter(
-            connection=mongo,
-            table="schema.table",  # Includes schema. Required format: table="table"
-        )
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_mssql_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_mssql_writer_unit.py
index 57de4faaf..44618ff11 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_mssql_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_mssql_writer_unit.py
@@ -10,7 +10,7 @@
 def test_mssql_writer_wrong_table_name(spark_mock, table):
     mssql = MSSQL(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=mssql,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_mysql_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_mysql_writer_unit.py
index a35754457..8eb54f397 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_mysql_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_mysql_writer_unit.py
@@ -10,7 +10,7 @@
 def test_mysql_writer_wrong_table_name(spark_mock, table):
     mysql = MySQL(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=mysql,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_oracle_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_oracle_writer_unit.py
index 2a734e6e1..63668cacf 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_oracle_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_oracle_writer_unit.py
@@ -10,7 +10,7 @@
 def test_oracle_writer_wrong_table_name(spark_mock, table):
     oracle = Oracle(host="some_host", user="user", sid="sid", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=oracle,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_postgres_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_postgres_writer_unit.py
index db17a0119..d7322b5f1 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_postgres_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_postgres_writer_unit.py
@@ -10,7 +10,7 @@
 def test_postgres_writer_wrong_table_name(spark_mock, table):
     postgres = Postgres(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=postgres,
             table=table,  # Required format: table="shema.table"
diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_teradata_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_teradata_writer_unit.py
index f13bb1957..6b9434ba7 100644
--- a/tests/tests_unit/test_db/test_db_writer_unit/test_teradata_writer_unit.py
+++ b/tests/tests_unit/test_db/test_db_writer_unit/test_teradata_writer_unit.py
@@ -10,7 +10,7 @@
 def test_teradata_writer_wrong_table_name(spark_mock, table):
     teradata = Teradata(host="some_host", user="user", database="database", password="passwd", spark=spark_mock)
 
-    with pytest.raises(ValueError, match="Table name should be passed in `schema.name` format"):
+    with pytest.raises(ValueError, match="Name should be passed in `schema.name` format"):
         DBWriter(
             connection=teradata,
             table=table,  # Required format: table="shema.table"

From ea32421e6c1e39effb1cefbfb879021541e6966c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 5 Sep 2023 08:07:18 +0000
Subject: [PATCH 24/30] [DOP-8511] Update checkout action from v3 to v4

---
 .github/workflows/changelog.yml       | 2 +-
 .github/workflows/codeql-analysis.yml | 4 ++--
 .github/workflows/dev-release.yml     | 2 +-
 .github/workflows/get-matrix.yml      | 2 +-
 .github/workflows/nightly.yml         | 2 +-
 .github/workflows/release.yml         | 2 +-
 .github/workflows/test-clickhouse.yml | 2 +-
 .github/workflows/test-core.yml       | 2 +-
 .github/workflows/test-ftp.yml        | 2 +-
 .github/workflows/test-ftps.yml       | 2 +-
 .github/workflows/test-greenplum.yml  | 2 +-
 .github/workflows/test-hdfs.yml       | 2 +-
 .github/workflows/test-hive.yml       | 2 +-
 .github/workflows/test-kafka.yml      | 2 +-
 .github/workflows/test-local-fs.yml   | 2 +-
 .github/workflows/test-mongodb.yml    | 2 +-
 .github/workflows/test-mssql.yml      | 2 +-
 .github/workflows/test-mysql.yml      | 2 +-
 .github/workflows/test-oracle.yml     | 2 +-
 .github/workflows/test-postgres.yml   | 2 +-
 .github/workflows/test-s3.yml         | 2 +-
 .github/workflows/test-sftp.yml       | 2 +-
 .github/workflows/test-teradata.yml   | 2 +-
 .github/workflows/test-webdav.yml     | 2 +-
 .github/workflows/tests.yml           | 2 +-
 25 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
index 2490b579a..a3d39f471 100644
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@@ -20,7 +20,7 @@ jobs:
     if: "!contains(github.event.pull_request.labels.*.name, 'ci:skip-changelog') && github.event.pull_request.user.login != 'pre-commit-ci[bot]' && github.event.pull_request.user.login != 'dependabot[bot]'"
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 22bdc6b84..434905856 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -27,7 +27,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ env.DEFAULT_PYTHON }}
       uses: actions/setup-python@v4
@@ -84,7 +84,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ env.DEFAULT_PYTHON }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/dev-release.yml b/.github/workflows/dev-release.yml
index fc10af392..76f362733 100644
--- a/.github/workflows/dev-release.yml
+++ b/.github/workflows/dev-release.yml
@@ -29,7 +29,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/get-matrix.yml b/.github/workflows/get-matrix.yml
index d487e64e9..fd7e24aae 100644
--- a/.github/workflows/get-matrix.yml
+++ b/.github/workflows/get-matrix.yml
@@ -72,7 +72,7 @@ jobs:
       matrix-webdav: ${{ toJson(fromJson(steps.matrix-webdav.outputs.result)[steps.key-webdav.outputs.key]) }}
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 2
 
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index c8bd83c70..3286c5d34 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -342,7 +342,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ env.DEFAULT_PYTHON }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 39ed60801..e0d227ea9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/test-clickhouse.yml b/.github/workflows/test-clickhouse.yml
index bb36c5b0d..42170ed2e 100644
--- a/.github/workflows/test-clickhouse.yml
+++ b/.github/workflows/test-clickhouse.yml
@@ -40,7 +40,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-core.yml b/.github/workflows/test-core.yml
index 6d8cc93bd..0ed807efa 100644
--- a/.github/workflows/test-core.yml
+++ b/.github/workflows/test-core.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ inputs.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/test-ftp.yml b/.github/workflows/test-ftp.yml
index 000e200a6..4ed90ec89 100644
--- a/.github/workflows/test-ftp.yml
+++ b/.github/workflows/test-ftp.yml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ inputs.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/test-ftps.yml b/.github/workflows/test-ftps.yml
index f67abe96f..741bd7a0e 100644
--- a/.github/workflows/test-ftps.yml
+++ b/.github/workflows/test-ftps.yml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ inputs.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index 0a7812ec9..67026aab0 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -42,7 +42,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-hdfs.yml b/.github/workflows/test-hdfs.yml
index da4cbdc57..e48f4dd0a 100644
--- a/.github/workflows/test-hdfs.yml
+++ b/.github/workflows/test-hdfs.yml
@@ -29,7 +29,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-hive.yml b/.github/workflows/test-hive.yml
index 939fee079..cce56d675 100644
--- a/.github/workflows/test-hive.yml
+++ b/.github/workflows/test-hive.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-kafka.yml b/.github/workflows/test-kafka.yml
index ffd7c388d..b6641557b 100644
--- a/.github/workflows/test-kafka.yml
+++ b/.github/workflows/test-kafka.yml
@@ -73,7 +73,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-local-fs.yml b/.github/workflows/test-local-fs.yml
index 57873a5c9..f23beeb49 100644
--- a/.github/workflows/test-local-fs.yml
+++ b/.github/workflows/test-local-fs.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-mongodb.yml b/.github/workflows/test-mongodb.yml
index 0d9d80cca..38086671e 100644
--- a/.github/workflows/test-mongodb.yml
+++ b/.github/workflows/test-mongodb.yml
@@ -38,7 +38,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-mssql.yml b/.github/workflows/test-mssql.yml
index b84c2f7f1..d3b2a21a8 100644
--- a/.github/workflows/test-mssql.yml
+++ b/.github/workflows/test-mssql.yml
@@ -41,7 +41,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-mysql.yml b/.github/workflows/test-mysql.yml
index c7d4937b5..16745e904 100644
--- a/.github/workflows/test-mysql.yml
+++ b/.github/workflows/test-mysql.yml
@@ -40,7 +40,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-oracle.yml b/.github/workflows/test-oracle.yml
index 213d555cd..dcd51b42a 100644
--- a/.github/workflows/test-oracle.yml
+++ b/.github/workflows/test-oracle.yml
@@ -43,7 +43,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-postgres.yml b/.github/workflows/test-postgres.yml
index 819d3f533..30c91dfca 100644
--- a/.github/workflows/test-postgres.yml
+++ b/.github/workflows/test-postgres.yml
@@ -39,7 +39,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-s3.yml b/.github/workflows/test-s3.yml
index f2005b15c..99a51269e 100644
--- a/.github/workflows/test-s3.yml
+++ b/.github/workflows/test-s3.yml
@@ -40,7 +40,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-sftp.yml b/.github/workflows/test-sftp.yml
index e22f2301c..bd630710b 100644
--- a/.github/workflows/test-sftp.yml
+++ b/.github/workflows/test-sftp.yml
@@ -33,7 +33,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ inputs.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/test-teradata.yml b/.github/workflows/test-teradata.yml
index 31ec3712b..20ef294b7 100644
--- a/.github/workflows/test-teradata.yml
+++ b/.github/workflows/test-teradata.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Java ${{ inputs.java-version }}
       uses: actions/setup-java@v3
diff --git a/.github/workflows/test-webdav.yml b/.github/workflows/test-webdav.yml
index f2e6acf5c..fda365489 100644
--- a/.github/workflows/test-webdav.yml
+++ b/.github/workflows/test-webdav.yml
@@ -23,7 +23,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ inputs.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f027fb99d..44125d701 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -325,7 +325,7 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Set up Python ${{ env.DEFAULT_PYTHON }}
       uses: actions/setup-python@v4

From ebbeed2de8b944a7dc309ee875fc8c0dd1492d06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 5 Sep 2023 08:28:49 +0000
Subject: [PATCH 25/30] [DOP-8511] Update CHANGELOG

---
 docs/changelog/next_release/+.improvement.rst   | 4 ++++
 docs/changelog/next_release/136.improvement.rst | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelog/next_release/+.improvement.rst
 delete mode 100644 docs/changelog/next_release/136.improvement.rst

diff --git a/docs/changelog/next_release/+.improvement.rst b/docs/changelog/next_release/+.improvement.rst
new file mode 100644
index 000000000..864a91694
--- /dev/null
+++ b/docs/changelog/next_release/+.improvement.rst
@@ -0,0 +1,4 @@
+Improve documentation:
+
+* Add notes about reading and writing to database connections documentation
+* Add notes about executing statements in JDBC and Greenplum connections
diff --git a/docs/changelog/next_release/136.improvement.rst b/docs/changelog/next_release/136.improvement.rst
deleted file mode 100644
index 54489df60..000000000
--- a/docs/changelog/next_release/136.improvement.rst
+++ /dev/null
@@ -1 +0,0 @@
-Add notes about reading and writing to Kafka to documentation

From 2117f92c23e9206f0f1fb843668486df531791e8 Mon Sep 17 00:00:00 2001
From: Maxim Liksakov <67663774+maxim-lixakov@users.noreply.github.com>
Date: Wed, 6 Sep 2023 12:34:18 +0300
Subject: [PATCH 26/30] [DOP-8528] Allow modes "ignore" and "error" in
 GreenplumWriteOptions (#142)

* [DOP-8528] - update modes in GreenplumWriteOptions

* [DOP-8528] - updated tests

* [DOP-8528] - add changelog

* [DOP-8528] - renamed changelog/feature file
---
 CONTRIBUTING.rst                              |  2 +-
 docs/changelog/next_release/142.feature.rst   |  1 +
 .../db_connection/greenplum/connection.py     |  6 +-
 .../db_connection/greenplum/options.py        | 67 ++++++++++-----
 .../test_greenplum_writer_integration.py      | 82 +++++++++++++++++--
 .../test_greenplum_unit.py                    | 18 +++-
 6 files changed, 142 insertions(+), 34 deletions(-)
 create mode 100644 docs/changelog/next_release/142.feature.rst

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index c8e1fb52c..4dc2d824f 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -170,7 +170,7 @@ Without docker-compose
 
     To run Greenplum tests, you should:
 
-    * Download `Pivotal connector for Spark <https://onetl.org.readthedocs.build/en/latest/db_connection/greenplum/prerequisites.html>`_
+    * Download `Pivotal connector for Spark <https://onetl.readthedocs.io/en/latest/connection/db_connection/greenplum/prerequisites.html>`_
     * Either move it to ``~/.ivy2/jars/``, or pass file path to ``CLASSPATH``
     * Set environment variable ``ONETL_DB_WITH_GREENPLUM=true`` to enable adding connector to Spark session
 
diff --git a/docs/changelog/next_release/142.feature.rst b/docs/changelog/next_release/142.feature.rst
new file mode 100644
index 000000000..453fb814a
--- /dev/null
+++ b/docs/changelog/next_release/142.feature.rst
@@ -0,0 +1 @@
+Add ``ignore`` and ``error`` writing modes in ``Greenplum.WriteOptions``
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index 780848614..2a3a8952f 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -312,7 +312,11 @@ def write_df_to_target(
         self._check_expected_jobs_number(df, action="write")
 
         log.info("|%s| Saving data to a table %r", self.__class__.__name__, target)
-        mode = "overwrite" if write_options.if_exists == GreenplumTableExistBehavior.REPLACE_ENTIRE_TABLE else "append"
+        mode = (
+            "overwrite"
+            if write_options.if_exists == GreenplumTableExistBehavior.REPLACE_ENTIRE_TABLE
+            else write_options.if_exists.value
+        )
         df.write.format("greenplum").options(
             **self._connector_params(target),
             **options_dict,
diff --git a/onetl/connection/db_connection/greenplum/options.py b/onetl/connection/db_connection/greenplum/options.py
index 5a75db60f..86785155e 100644
--- a/onetl/connection/db_connection/greenplum/options.py
+++ b/onetl/connection/db_connection/greenplum/options.py
@@ -50,6 +50,8 @@
 
 class GreenplumTableExistBehavior(str, Enum):
     APPEND = "append"
+    IGNORE = "ignore"
+    ERROR = "error"
     REPLACE_ENTIRE_TABLE = "replace_entire_table"
 
     def __str__(self) -> str:
@@ -241,43 +243,64 @@ class Config:
 
             .. dropdown:: Behavior in details
 
-            * Table does not exist
-                Table is created using options provided by user
-                (``distributedBy`` and others).
+                * Table does not exist
+                    Table is created using options provided by user
+                    (``distributedBy`` and others).
 
-            * Table exists
-                Data is appended to a table. Table has the same DDL as before writing data.
+                * Table exists
+                    Data is appended to a table. Table has the same DDL as before writing data.
 
-                .. warning::
+                    .. warning::
 
-                    This mode does not check whether table already contains
-                    rows from dataframe, so duplicated rows can be created.
+                        This mode does not check whether table already contains
+                        rows from dataframe, so duplicated rows can be created.
 
-                    Also Spark does not support passing custom options to
-                    insert statement, like ``ON CONFLICT``, so don't try to
-                    implement deduplication using unique indexes or constraints.
+                        Also Spark does not support passing custom options to
+                        insert statement, like ``ON CONFLICT``, so don't try to
+                        implement deduplication using unique indexes or constraints.
 
-                    Instead, write to staging table and perform deduplication
-                    using :obj:`~execute` method.
+                        Instead, write to staging table and perform deduplication
+                        using :obj:`~execute` method.
 
         * ``replace_entire_table``
             **Table is dropped and then created**.
 
             .. dropdown:: Behavior in details
 
-            * Table does not exist
-                Table is created using options provided by user
-                (``distributedBy`` and others).
+                * Table does not exist
+                    Table is created using options provided by user
+                    (``distributedBy`` and others).
 
-            * Table exists
-                Table content is replaced with dataframe content.
+                * Table exists
+                    Table content is replaced with dataframe content.
 
-                After writing completed, target table could either have the same DDL as
-                before writing data (``truncate=True``), or can be recreated (``truncate=False``).
+                    After writing completed, target table could either have the same DDL as
+                    before writing data (``truncate=True``), or can be recreated (``truncate=False``).
 
-    .. note::
+        * ``ignore``
+            Ignores the write operation if the table already exists.
+
+            .. dropdown:: Behavior in details
+
+                * Table does not exist
+                    Table is created using options provided by user
+                    (``distributedBy`` and others).
+
+                * Table exists
+                    The write operation is ignored, and no data is written to the table.
+
+        * ``error``
+            Raises an error if the table already exists.
+
+            .. dropdown:: Behavior in details
+
+                * Table does not exist
+                    Table is created using options provided by user
+                    (``distributedBy`` and others).
+
+                * Table exists
+                    An error is raised, and no data is written to the table.
 
-        ``error`` and ``ignore`` modes are not supported.
     """
 
     @root_validator(pre=True)
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
index 7899b3257..c97105a44 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
@@ -6,7 +6,17 @@
 pytestmark = pytest.mark.greenplum
 
 
-def test_greenplum_writer_snapshot(spark, processing, prepare_schema_table):
+@pytest.mark.parametrize(
+    "options",
+    [
+        {},
+        {"if_exists": "append"},
+        {"if_exists": "replace_entire_table"},
+        {"if_exists": "error"},
+        {"if_exists": "ignore"},
+    ],
+)
+def test_greenplum_writer_snapshot(spark, processing, get_schema_table, options):
     df = processing.create_spark_df(spark=spark)
 
     greenplum = Greenplum(
@@ -21,20 +31,21 @@ def test_greenplum_writer_snapshot(spark, processing, prepare_schema_table):
 
     writer = DBWriter(
         connection=greenplum,
-        target=prepare_schema_table.full_name,
+        target=get_schema_table.full_name,
+        options=Greenplum.WriteOptions(**options),
     )
 
     writer.run(df)
 
     processing.assert_equal_df(
-        schema=prepare_schema_table.schema,
-        table=prepare_schema_table.table,
+        schema=get_schema_table.schema,
+        table=get_schema_table.table,
         df=df,
         order_by="id_int",
     )
 
 
-def test_greenplum_writer_mode_append(spark, processing, prepare_schema_table):
+def test_greenplum_writer_if_exists_append(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark, min_id=1, max_id=1500)
     df1 = df[df.id_int < 1001]
     df2 = df[df.id_int > 1000]
@@ -66,7 +77,7 @@ def test_greenplum_writer_mode_append(spark, processing, prepare_schema_table):
     )
 
 
-def test_greenplum_writer_mode(spark, processing, prepare_schema_table):
+def test_greenplum_writer_if_exists_overwrite(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark, min_id=1, max_id=1500)
     df1 = df[df.id_int < 1001]
     df2 = df[df.id_int > 1000]
@@ -96,3 +107,62 @@ def test_greenplum_writer_mode(spark, processing, prepare_schema_table):
         df=df2,
         order_by="id_int",
     )
+
+
+def test_greenplum_writer_if_exists_error(spark, processing, prepare_schema_table):
+    from py4j.java_gateway import Py4JJavaError
+
+    df = processing.create_spark_df(spark=spark, min_id=1, max_id=1500)
+
+    greenplum = Greenplum(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+        extra=processing.extra,
+    )
+
+    writer = DBWriter(
+        connection=greenplum,
+        target=prepare_schema_table.full_name,
+        options=Greenplum.WriteOptions(if_exists="error"),
+    )
+
+    with pytest.raises(
+        Py4JJavaError,
+        match=f'Table "{prepare_schema_table.schema}"."{prepare_schema_table.table}"'
+        f" exists, and SaveMode.ErrorIfExists was specified",
+    ):
+        writer.run(df)
+
+
+def test_greenplum_writer_if_exists_ignore(spark, processing, prepare_schema_table):
+    df = processing.create_spark_df(spark=spark, min_id=1, max_id=1500)
+
+    greenplum = Greenplum(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+        extra=processing.extra,
+    )
+
+    writer = DBWriter(
+        connection=greenplum,
+        target=prepare_schema_table.full_name,
+        options=Greenplum.WriteOptions(if_exists="ignore"),
+    )
+
+    writer.run(df)  # The write operation is ignored
+
+    empty_df = spark.createDataFrame([], df.schema)
+
+    processing.assert_equal_df(
+        schema=prepare_schema_table.schema,
+        table=prepare_schema_table.table,
+        df=empty_df,
+    )
diff --git a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
index 5574a3393..f54eec0a4 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
@@ -243,6 +243,8 @@ def test_greenplum_read_options_cannot_be_used_in_write_options(arg, value):
         ({}, GreenplumTableExistBehavior.APPEND),
         ({"if_exists": "append"}, GreenplumTableExistBehavior.APPEND),
         ({"if_exists": "replace_entire_table"}, GreenplumTableExistBehavior.REPLACE_ENTIRE_TABLE),
+        ({"if_exists": "error"}, GreenplumTableExistBehavior.ERROR),
+        ({"if_exists": "ignore"}, GreenplumTableExistBehavior.IGNORE),
     ],
 )
 def test_greenplum_write_options_if_exists(options, value):
@@ -270,6 +272,18 @@ def test_greenplum_write_options_if_exists(options, value):
             "Mode `overwrite` is deprecated since v0.9.0 and will be removed in v1.0.0. "
             "Use `replace_entire_table` instead",
         ),
+        (
+            {"mode": "ignore"},
+            GreenplumTableExistBehavior.IGNORE,
+            "Option `Greenplum.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+            "Use `Greenplum.WriteOptions(if_exists=...)` instead",
+        ),
+        (
+            {"mode": "error"},
+            GreenplumTableExistBehavior.ERROR,
+            "Option `Greenplum.WriteOptions(mode=...)` is deprecated since v0.9.0 and will be removed in v1.0.0. "
+            "Use `Greenplum.WriteOptions(if_exists=...)` instead",
+        ),
     ],
 )
 def test_greenplum_write_options_mode_deprecated(options, value, message):
@@ -281,10 +295,6 @@ def test_greenplum_write_options_mode_deprecated(options, value, message):
 @pytest.mark.parametrize(
     "options",
     [
-        # disallowed modes
-        {"mode": "error"},
-        {"mode": "ignore"},
-        # wrong mode
         {"mode": "wrong_mode"},
     ],
 )

From 93636741917705d9f59d51081b9e89e0b28e6fa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 4 Sep 2023 15:34:09 +0000
Subject: [PATCH 27/30] [DOP-8571] Make log messages more consistent

---
 README.rst                                           |  6 +++---
 docker-compose.yml                                   |  4 ++--
 docs/hooks/design.rst                                |  2 +-
 .../db_connection/db_connection/connection.py        |  5 ++---
 .../connection/db_connection/greenplum/connection.py |  6 +++---
 onetl/connection/db_connection/hive/connection.py    | 12 ++++++------
 .../db_connection/jdbc_connection/connection.py      |  9 +++++----
 .../db_connection/jdbc_mixin/connection.py           |  2 +-
 onetl/connection/db_connection/kafka/connection.py   | 11 +++++------
 onetl/connection/db_connection/mongodb/connection.py |  6 +++---
 onetl/connection/file_connection/file_connection.py  |  5 ++---
 onetl/connection/file_connection/hdfs/connection.py  |  6 +++---
 .../file_df_connection/spark_file_df_connection.py   |  9 ++++-----
 .../file_df_connection/spark_hdfs/connection.py      |  6 +++---
 onetl/file/file_downloader/file_downloader.py        |  2 +-
 onetl/file/file_mover/file_mover.py                  |  2 +-
 onetl/file/file_uploader/file_uploader.py            |  2 +-
 .../test_spark_hdfs_integration.py                   |  2 +-
 .../test_spark_local_fs_integration.py               |  2 +-
 .../test_spark_s3_integration.py                     |  2 +-
 .../test_clickhouse_integration.py                   |  2 +-
 .../test_greenplum_integration.py                    |  2 +-
 .../test_hive_integration.py                         |  2 +-
 .../test_kafka_integration.py                        |  6 +++---
 .../test_mongodb_integration.py                      |  2 +-
 .../test_mssql_integration.py                        |  2 +-
 .../test_mysql_integration.py                        |  2 +-
 .../test_oracle_integration.py                       |  2 +-
 .../test_postgres_integration.py                     |  2 +-
 .../test_teradata_integration.py                     |  2 +-
 .../test_ftp_file_connection_integration.py          |  4 ++--
 .../test_ftps_file_connection_integration.py         |  4 ++--
 .../test_hdfs_file_connection_integration.py         |  6 +++---
 .../test_s3_file_connection_integration.py           |  2 +-
 .../test_sftp_file_connection_integration.py         |  2 +-
 .../test_webdav_file_connection_integration.py       |  2 +-
 36 files changed, 71 insertions(+), 74 deletions(-)

diff --git a/README.rst b/README.rst
index 1e39a0734..13b280830 100644
--- a/README.rst
+++ b/README.rst
@@ -321,7 +321,7 @@ Read data from MSSQL, transform & write to Hive.
         extra={"ApplicationIntent": "ReadOnly"},
     ).check()
 
-    # >>> INFO:|MSSQL| Connection is available.
+    # >>> INFO:|MSSQL| Connection is available
 
     # Initialize DB reader
     reader = DBReader(
@@ -408,7 +408,7 @@ Download files from SFTP & upload them to HDFS.
         password="somepassword",
     ).check()
 
-    # >>> INFO:|SFTP| Connection is available.
+    # >>> INFO:|SFTP| Connection is available
 
     # Initialize downloader
     file_downloader = FileDownloader(
@@ -546,7 +546,7 @@ Read files directly from S3 path, convert them to dataframe, transform it and th
         spark=spark,
     ).check()
 
-    # >>> INFO:|SparkS3| Connection is available.
+    # >>> INFO:|SparkS3| Connection is available
 
     # Describe file format and parsing options
     csv = CSV(
diff --git a/docker-compose.yml b/docker-compose.yml
index d6f796d31..a08d8fc38 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -33,7 +33,7 @@ services:
     - onetl
 
   clickhouse:
-    image: ${CLICKHOUSE_IMAGE:-clickhouse/clickhouse-server:latest}
+    image: ${CLICKHOUSE_IMAGE:-clickhouse/clickhouse-server:latest-alpine}
     restart: unless-stopped
     ports:
     - 8123:8123
@@ -101,7 +101,7 @@ services:
     - onetl
 
   postgres:
-    image: ${POSTGRES_IMAGE:-postgres:15.2}
+    image: ${POSTGRES_IMAGE:-postgres:15.2-alpine}
     restart: unless-stopped
     env_file: .env.dependencies
     ports:
diff --git a/docs/hooks/design.rst b/docs/hooks/design.rst
index 1b5266e8b..b05f9d64e 100644
--- a/docs/hooks/design.rst
+++ b/docs/hooks/design.rst
@@ -696,7 +696,7 @@ But most of logs are emitted with even lower level ``NOTICE``, to make output le
     NOTICE  |Hooks| Calling hook 'mymodule.callback1' (1/2)
     NOTICE  |Hooks| Hook is finished with returning non-None result
     NOTICE  |Hooks| Calling hook 'mymodule.callback2' (2/2)
-    NOTICE  |Hooks| This is a context manager, entering...
+    NOTICE  |Hooks| This is a context manager, entering ...
     NOTICE  |Hooks|   Calling original method 'MyClass.method'
     NOTICE  |Hooks|   Method call is finished
     NOTICE  |Hooks| Method call result (*NOT* None) will be replaced with result of hook 'mymodule.callback1'
diff --git a/onetl/connection/db_connection/db_connection/connection.py b/onetl/connection/db_connection/db_connection/connection.py
index 14c5d4f9d..315f5b17c 100644
--- a/onetl/connection/db_connection/db_connection/connection.py
+++ b/onetl/connection/db_connection/db_connection/connection.py
@@ -49,8 +49,7 @@ def _forward_refs(cls) -> dict[str, type]:
         return refs
 
     def _log_parameters(self):
-        log.info("|Spark| Using connection parameters:")
-        log_with_indent(log, "type = %s", self.__class__.__name__)
+        log.info("|%s| Using connection parameters:", self.__class__.__name__)
         parameters = self.dict(exclude_none=True, exclude={"spark"})
-        for attr, value in sorted(parameters.items()):
+        for attr, value in parameters.items():
             log_with_indent(log, "%s = %r", attr, value)
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index 2a3a8952f..c0a7e3ef1 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -331,7 +331,7 @@ def get_df_schema(
         columns: list[str] | None = None,
         options: JDBCOptions | None = None,
     ) -> StructType:
-        log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
+        log.info("|%s| Fetching schema of table %r ...", self.__class__.__name__, source)
 
         query = get_sql_query(source, columns=columns, where="1=0", compact=True)
         jdbc_options = self.JDBCOptions.parse(options).copy(update={"fetchsize": 0})
@@ -354,7 +354,7 @@ def get_min_max_bounds(
         where: str | None = None,
         options: JDBCOptions | None = None,
     ) -> tuple[Any, Any]:
-        log.info("|Spark| Getting min and max values for column %r", column)
+        log.info("|%s| Getting min and max values for column %r ...", self.__class__.__name__, column)
 
         jdbc_options = self.JDBCOptions.parse(options).copy(update={"fetchsize": 1})
 
@@ -381,7 +381,7 @@ def get_min_max_bounds(
         min_value = row["min"]
         max_value = row["max"]
 
-        log.info("|Spark| Received values:")
+        log.info("|%s| Received values:", self.__class__.__name__)
         log_with_indent(log, "MIN(%r) = %r", column, min_value)
         log_with_indent(log, "MAX(%r) = %r", column, max_value)
 
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index bb727d65c..dda14a411 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -208,7 +208,7 @@ def check(self):
 
         try:
             self._execute_sql(self._CHECK_QUERY)
-            log.info("|%s| Connection is available.", self.__class__.__name__)
+            log.info("|%s| Connection is available", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
@@ -375,7 +375,7 @@ def get_df_schema(
         source: str,
         columns: list[str] | None = None,
     ) -> StructType:
-        log.info("|%s| Fetching schema of table table %r", self.__class__.__name__, source)
+        log.info("|%s| Fetching schema of table table %r ...", self.__class__.__name__, source)
         query = get_sql_query(source, columns=columns, where="1=0", compact=True)
 
         log.debug("|%s| Executing SQL query:", self.__class__.__name__)
@@ -394,7 +394,7 @@ def get_min_max_bounds(
         hint: str | None = None,
         where: str | None = None,
     ) -> Tuple[Any, Any]:
-        log.info("|Spark| Getting min and max values for column %r", column)
+        log.info("|%s| Getting min and max values for column %r ...", self.__class__.__name__, column)
 
         sql_text = get_sql_query(
             table=source,
@@ -420,7 +420,7 @@ def get_min_max_bounds(
         min_value = row["min"]
         max_value = row["max"]
 
-        log.info("|Spark| Received values:")
+        log.info("|%s| Received values:", self.__class__.__name__)
         log_with_indent(log, "MIN(%s) = %r", column, min_value)
         log_with_indent(log, "MAX(%s) = %r", column, max_value)
 
@@ -428,12 +428,12 @@ def get_min_max_bounds(
 
     @validator("cluster")
     def _validate_cluster_name(cls, cluster):
-        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
+        log.debug("|%s| Normalizing cluster %r name...", cls.__name__, cluster)
         validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
         if validated_cluster != cluster:
             log.debug("|%s|   Got %r", cls.__name__)
 
-        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
+        log.debug("|%s| Checking if cluster %r is a known cluster...", cls.__name__, validated_cluster)
         known_clusters = cls.Slots.get_known_clusters()
         if known_clusters and validated_cluster not in known_clusters:
             raise ValueError(
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 5c58bdee4..8246e9d19 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -230,7 +230,7 @@ def get_df_schema(
         columns: list[str] | None = None,
         options: JDBCReadOptions | None = None,
     ) -> StructType:
-        log.info("|%s| Fetching schema of table %r", self.__class__.__name__, source)
+        log.info("|%s| Fetching schema of table %r ...", self.__class__.__name__, source)
 
         query = get_sql_query(source, columns=columns, where="1=0", compact=True)
         read_options = self._exclude_partition_options(self.ReadOptions.parse(options), fetchsize=0)
@@ -285,7 +285,7 @@ def get_min_max_bounds(
         where: str | None = None,
         options: JDBCReadOptions | None = None,
     ) -> tuple[Any, Any]:
-        log.info("|Spark| Getting min and max values for column %r", column)
+        log.info("|%s| Getting min and max values for column %r ...", self.__class__.__name__, column)
 
         read_options = self._exclude_partition_options(self.ReadOptions.parse(options), fetchsize=1)
 
@@ -313,7 +313,7 @@ def get_min_max_bounds(
         min_value = row["min"]
         max_value = row["max"]
 
-        log.info("|Spark| Received values:")
+        log.info("|%s| Received values:", self.__class__.__name__)
         log_with_indent(log, "MIN(%s) = %r", column, min_value)
         log_with_indent(log, "MAX(%s) = %r", column, max_value)
 
@@ -365,8 +365,9 @@ def _set_lower_upper_bound(
             return options
 
         log.warning(
-            "|Spark| Passed numPartitions = %d, but values %r are not set. "
+            "|%s| Passed numPartitions = %d, but values %r are not set. "
             "They will be detected automatically based on values in partitionColumn %r",
+            self.__class__.__name__,
             options.num_partitions,
             missing_values,
             options.partition_column,
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index c02fb82f1..ff76e5e34 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -138,7 +138,7 @@ def check(self):
 
         try:
             self._query_optional_on_driver(self._CHECK_QUERY, self.JDBCOptions(fetchsize=1))  # type: ignore
-            log.info("|%s| Connection is available.", self.__class__.__name__)
+            log.info("|%s| Connection is available", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index 6cb97a1c8..0298830b3 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -250,7 +250,7 @@ def check(self):
 
         try:
             self._get_topics()
-            log.info("|%s| Connection is available.", self.__class__.__name__)
+            log.info("|%s| Connection is available", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
@@ -471,12 +471,12 @@ def _get_addresses_by_cluster(cls, values):
 
     @validator("cluster")
     def _validate_cluster_name(cls, cluster):
-        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
+        log.debug("|%s| Normalizing cluster %r name...", cls.__name__, cluster)
         validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
         if validated_cluster != cluster:
             log.debug("|%s|   Got %r", cls.__name__, validated_cluster)
 
-        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
+        log.debug("|%s| Checking if cluster %r is a known cluster...", cls.__name__, validated_cluster)
         known_clusters = cls.Slots.get_known_clusters()
         if known_clusters and validated_cluster not in known_clusters:
             raise ValueError(
@@ -489,7 +489,7 @@ def _validate_cluster_name(cls, cluster):
     def _validate_addresses(cls, value, values):
         cluster = values.get("cluster")
 
-        log.debug("|%s| Normalizing addresses %r names ...", cls.__name__, value)
+        log.debug("|%s| Normalizing addresses %r names...", cls.__name__, value)
 
         validated_addresses = [cls.Slots.normalize_address(address, cluster) or address for address in value]
         if validated_addresses != value:
@@ -564,8 +564,7 @@ def _get_topics(self, timeout: int = 10) -> set[str]:
             return set(topics)
 
     def _log_parameters(self):
-        log.info("|Spark| Using connection parameters:")
-        log_with_indent(log, "type = %s", self.__class__.__name__)
+        log.info("|%s| Using connection parameters:", self.__class__.__name__)
         log_with_indent(log, "cluster = %r", self.cluster)
         log_collection(log, "addresses", self.addresses, max_items=10)
         log_with_indent(log, "protocol = %r", self.protocol)
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index 98c579492..16320ebca 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -390,7 +390,7 @@ def check(self):
             jvm = self.spark._jvm  # type: ignore
             client = jvm.com.mongodb.client.MongoClients.create(self.connection_url)
             list(client.listDatabaseNames().iterator())
-            log.info("|%s| Connection is available.", self.__class__.__name__)
+            log.info("|%s| Connection is available", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
@@ -407,7 +407,7 @@ def get_min_max_bounds(
         where: dict | None = None,
         options: MongoDBReadOptions | dict | None = None,
     ) -> tuple[Any, Any]:
-        log.info("|Spark| Getting min and max values for column %r", column)
+        log.info("|%s| Getting min and max values for column %r ...", self.__class__.__name__, column)
 
         read_options = self.ReadOptions.parse(options).dict(by_alias=True, exclude_none=True)
 
@@ -435,7 +435,7 @@ def get_min_max_bounds(
         min_value = row["min"]
         max_value = row["max"]
 
-        log.info("|Spark| Received values:")
+        log.info("|%s| Received values:", self.__class__.__name__)
         log_with_indent(log, "MIN(%s) = %r", column, min_value)
         log_with_indent(log, "MAX(%s) = %r", column, max_value)
 
diff --git a/onetl/connection/file_connection/file_connection.py b/onetl/connection/file_connection/file_connection.py
index 914e989e2..b92805dec 100644
--- a/onetl/connection/file_connection/file_connection.py
+++ b/onetl/connection/file_connection/file_connection.py
@@ -719,10 +719,9 @@ def _extract_stat_from_entry(self, top: RemotePath, entry) -> PathStatProtocol:
         """
 
     def _log_parameters(self):
-        log.info("|onETL| Using connection parameters:")
-        log_with_indent(log, "type = %s", self.__class__.__name__)
+        log.info("|%s| Using connection parameters:", self.__class__.__name__)
         parameters = self.dict(exclude_none=True)
-        for attr, value in sorted(parameters.items()):
+        for attr, value in parameters.items():
             if isinstance(value, os.PathLike):
                 log_with_indent(log, "%s = %s", attr, path_repr(value))
             else:
diff --git a/onetl/connection/file_connection/hdfs/connection.py b/onetl/connection/file_connection/hdfs/connection.py
index 20869909c..2419aae2f 100644
--- a/onetl/connection/file_connection/hdfs/connection.py
+++ b/onetl/connection/file_connection/hdfs/connection.py
@@ -309,12 +309,12 @@ def _validate_cluster_or_hostname_set(cls, values):
 
     @validator("cluster")
     def _validate_cluster_name(cls, cluster):
-        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
+        log.debug("|%s| Normalizing cluster %r name...", cls.__name__, cluster)
         validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
         if validated_cluster != cluster:
             log.debug("|%s|   Got %r", cls.__name__, validated_cluster)
 
-        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
+        log.debug("|%s| Checking if cluster %r is a known cluster...", cls.__name__, validated_cluster)
         known_clusters = cls.Slots.get_known_clusters()
         if known_clusters and validated_cluster not in known_clusters:
             raise ValueError(
@@ -327,7 +327,7 @@ def _validate_cluster_name(cls, cluster):
     def _validate_host_name(cls, host, values):
         cluster = values.get("cluster")
 
-        log.debug("|%s| Normalizing namenode %r ...", cls.__name__, host)
+        log.debug("|%s| Normalizing namenode %r host...", cls.__name__, host)
         namenode = cls.Slots.normalize_namenode_host(host, cluster) or host
         if namenode != host:
             log.debug("|%s|   Got %r", cls.__name__, namenode)
diff --git a/onetl/connection/file_df_connection/spark_file_df_connection.py b/onetl/connection/file_df_connection/spark_file_df_connection.py
index 206cf9e27..96d51e4c9 100644
--- a/onetl/connection/file_df_connection/spark_file_df_connection.py
+++ b/onetl/connection/file_df_connection/spark_file_df_connection.py
@@ -58,7 +58,7 @@ def check(self):
         try:
             fs = self._get_spark_fs()
             fs.exists(path)
-            log.info("|%s| Connection is available.", self.__class__.__name__)
+            log.info("|%s| Connection is available", self.__class__.__name__)
         except Exception as e:
             raise RuntimeError("Connection is unavailable") from e
         return self
@@ -81,7 +81,7 @@ def read_files_as_df(
         if root:
             log.info("|%s| Reading data from '%s' ...", self.__class__.__name__, root)
         else:
-            log.info("|%s| Reading data ...", self.__class__.__name__)
+            log.info("|%s| Reading data...", self.__class__.__name__)
 
         reader: DataFrameReader = self.spark.read
         with ExitStack() as stack:
@@ -183,8 +183,7 @@ def _forward_refs(cls) -> dict[str, type]:
         return refs
 
     def _log_parameters(self):
-        log.info("|Spark| Using connection parameters:")
-        log_with_indent(log, "type = %s", self.__class__.__name__)
+        log.info("|%s| Using connection parameters:", self.__class__.__name__)
         parameters = self.dict(exclude_none=True, exclude={"spark"})
-        for attr, value in sorted(parameters.items()):
+        for attr, value in parameters.items():
             log_with_indent(log, "%s = %r", attr, value)
diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py
index 78083bbe7..04bdfae48 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/connection.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py
@@ -271,12 +271,12 @@ def get_current(cls, spark: SparkSession):
 
     @validator("cluster")
     def _validate_cluster_name(cls, cluster):
-        log.debug("|%s| Normalizing cluster %r name ...", cls.__name__, cluster)
+        log.debug("|%s| Normalizing cluster %r name...", cls.__name__, cluster)
         validated_cluster = cls.Slots.normalize_cluster_name(cluster) or cluster
         if validated_cluster != cluster:
             log.debug("|%s|   Got %r", cls.__name__, validated_cluster)
 
-        log.debug("|%s| Checking if cluster %r is a known cluster ...", cls.__name__, validated_cluster)
+        log.debug("|%s| Checking if cluster %r is a known cluster...", cls.__name__, validated_cluster)
         known_clusters = cls.Slots.get_known_clusters()
         if known_clusters and validated_cluster not in known_clusters:
             raise ValueError(
@@ -289,7 +289,7 @@ def _validate_cluster_name(cls, cluster):
     def _validate_host_name(cls, host, values):
         cluster = values.get("cluster")
 
-        log.debug("|%s| Normalizing namenode %r ...", cls.__name__, host)
+        log.debug("|%s| Normalizing namenode %r host...", cls.__name__, host)
         namenode = cls.Slots.normalize_namenode_host(host, cluster) or host
         if namenode != host:
             log.debug("|%s|   Got %r", cls.__name__, namenode)
diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index eb067f9a1..4a51cc74b 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -636,7 +636,7 @@ def _download_files(
         log.info("|%s| Files to be downloaded:", self.__class__.__name__)
         log_lines(log, str(files))
         log_with_indent(log, "")
-        log.info("|%s| Starting the download process ...", self.__class__.__name__)
+        log.info("|%s| Starting the download process...", self.__class__.__name__)
 
         self._create_dirs(to_download)
 
diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py
index 633ce0814..7d27eb8a8 100644
--- a/onetl/file/file_mover/file_mover.py
+++ b/onetl/file/file_mover/file_mover.py
@@ -444,7 +444,7 @@ def _move_files(
         log.info("|%s| Files to be moved:", self.__class__.__name__)
         log_lines(log, str(files))
         log_with_indent(log, "")
-        log.info("|%s| Starting the move process ...", self.__class__.__name__)
+        log.info("|%s| Starting the move process...", self.__class__.__name__)
 
         self._create_dirs(to_move)
 
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index 4fa32c670..e9e8c550a 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -460,7 +460,7 @@ def _upload_files(self, to_upload: UPLOAD_ITEMS_TYPE) -> UploadResult:
         log.info("|%s| Files to be uploaded:", self.__class__.__name__)
         log_lines(log, str(files))
         log_with_indent(log, "")
-        log.info("|%s| Starting the upload process ...", self.__class__.__name__)
+        log.info("|%s| Starting the upload process...", self.__class__.__name__)
 
         self._create_dirs(to_upload)
 
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
index e145fcc7f..6d6c90a46 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
@@ -15,7 +15,7 @@ def test_spark_hdfs_check(hdfs_file_df_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert hdfs.check() == hdfs
 
-    assert "type = SparkHDFS" in caplog.text
+    assert "|SparkHDFS|" in caplog.text
     assert f"cluster = '{hdfs.cluster}'" in caplog.text
     assert f"host = '{hdfs.host}'" in caplog.text
     assert f"ipc_port = {hdfs.ipc_port}" in caplog.text
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
index c5d433b75..d14f9fcb2 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
@@ -13,6 +13,6 @@ def test_spark_local_fs_check(spark, caplog):
     with caplog.at_level(logging.INFO):
         assert local_fs.check() == local_fs
 
-    assert "type = SparkLocalFS" in caplog.text
+    assert "|SparkLocalFS|" in caplog.text
 
     assert "Connection is available" in caplog.text
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
index 15fa6d5a4..cd10e67ba 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
@@ -15,7 +15,7 @@ def test_spark_s3_check(s3_file_df_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert s3.check() == s3
 
-    assert "type = SparkS3" in caplog.text
+    assert "|SparkS3|" in caplog.text
     assert f"host = '{s3.host}'" in caplog.text
     assert f"port = {s3.port}" in caplog.text
     assert f"protocol = '{s3.protocol}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
index 96671563a..134afdd7b 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
@@ -27,7 +27,7 @@ def test_clickhouse_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert clickhouse.check() == clickhouse
 
-    assert "type = Clickhouse" in caplog.text
+    assert "|Clickhouse|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"database = '{processing.database}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
index 8a31d828e..59c1b57ad 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
@@ -27,7 +27,7 @@ def test_greenplum_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert greenplum.check() == greenplum
 
-    assert "type = Greenplum" in caplog.text
+    assert "|Greenplum|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"user = '{processing.user}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
index 19ae816f9..b06d85755 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
@@ -20,7 +20,7 @@ def test_hive_check(spark, caplog):
     with caplog.at_level(logging.INFO):
         assert hive.check() == hive
 
-    assert "type = Hive" in caplog.text
+    assert "|Hive|" in caplog.text
     assert "spark = " not in caplog.text
 
     assert "Connection is available" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py b/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
index 5081307cf..aaeb75977 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
@@ -20,7 +20,7 @@ def test_kafka_check_plaintext_anonymous(spark, caplog):
     with caplog.at_level(logging.INFO):
         assert kafka.check() == kafka
 
-    assert "type = Kafka" in caplog.text
+    assert "|Kafka|" in caplog.text
     assert "addresses = [" in caplog.text
     assert f"'{kafka_processing.host}:{kafka_processing.port}'" in caplog.text
     assert "cluster = 'cluster'" in caplog.text
@@ -48,7 +48,7 @@ def test_kafka_check_plaintext_basic_auth(spark, caplog):
     with caplog.at_level(logging.INFO):
         assert kafka.check() == kafka
 
-    assert "type = Kafka" in caplog.text
+    assert "|Kafka|" in caplog.text
     assert "addresses = [" in caplog.text
     assert f"'{kafka_processing.host}:{kafka_processing.sasl_port}'" in caplog.text
     assert "cluster = 'cluster'" in caplog.text
@@ -78,7 +78,7 @@ def test_kafka_check_plaintext_scram_auth(digest, spark, caplog):
     with caplog.at_level(logging.INFO):
         assert kafka.check() == kafka
 
-    assert "type = Kafka" in caplog.text
+    assert "|Kafka|" in caplog.text
     assert "addresses = [" in caplog.text
     assert f"'{kafka_processing.host}:{kafka_processing.sasl_port}'" in caplog.text
     assert "cluster = 'cluster'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
index 4ea0221bf..532f3b773 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
@@ -20,7 +20,7 @@ def test_mongodb_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert mongo.check() == mongo
 
-    assert "type = MongoDB" in caplog.text
+    assert "|MongoDB|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"database = '{processing.database}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
index 45a732711..c217eea36 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
@@ -27,7 +27,7 @@ def test_mssql_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert mssql.check() == mssql
 
-    assert "type = MSSQL" in caplog.text
+    assert "|MSSQL|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"database = '{processing.database}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
index d96b1f397..87fc4f58d 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
@@ -26,7 +26,7 @@ def test_mysql_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert mysql.check() == mysql
 
-    assert "type = MySQL" in caplog.text
+    assert "|MySQL|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"database = '{processing.database}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
index d6fdad022..74b9497d9 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
@@ -28,7 +28,7 @@ def test_oracle_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert oracle.check() == oracle
 
-    assert "type = Oracle" in caplog.text
+    assert "|Oracle|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert "database" not in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
index f53ec976f..e9d719f93 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
@@ -26,7 +26,7 @@ def test_postgres_connection_check(spark, processing, caplog):
     with caplog.at_level(logging.INFO):
         assert postgres.check() == postgres
 
-    assert "type = Postgres" in caplog.text
+    assert "|Postgres|" in caplog.text
     assert f"host = '{processing.host}'" in caplog.text
     assert f"port = {processing.port}" in caplog.text
     assert f"user = '{processing.user}'" in caplog.text
diff --git a/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py b/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
index 26ca462f1..118b1336f 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
@@ -29,7 +29,7 @@ def test_teradata_connection_check(spark, mocker, caplog):
     with caplog.at_level(logging.INFO):
         assert teradata.check() == teradata
 
-    assert "type = Teradata" in caplog.text
+    assert "|Teradata|" in caplog.text
     assert f"host = '{host}'" in caplog.text
     assert f"port = {port}" in caplog.text
     assert f"database = '{database}" in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
index df99d72d9..d66f2dd31 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
@@ -10,7 +10,7 @@ def test_ftp_file_connection_check_success(ftp_file_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert ftp.check() == ftp
 
-    assert "type = FTP" in caplog.text
+    assert "|FTP|" in caplog.text
     assert f"host = '{ftp.host}'" in caplog.text
     assert f"port = {ftp.port}" in caplog.text
     assert f"user = '{ftp.user}'" in caplog.text
@@ -28,7 +28,7 @@ def test_ftp_file_connection_check_anonymous(ftp_server, caplog):
     with caplog.at_level(logging.INFO):
         assert anonymous.check() == anonymous
 
-    assert "type = FTP" in caplog.text
+    assert "|FTP|" in caplog.text
     assert f"host = '{anonymous.host}'" in caplog.text
     assert f"port = {anonymous.port}" in caplog.text
     assert "user = " not in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
index f3756801e..a504a002b 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
@@ -10,7 +10,7 @@ def test_ftps_file_connection_check_success(ftps_file_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert ftps.check() == ftps
 
-    assert "type = FTPS" in caplog.text
+    assert "|FTPS|" in caplog.text
     assert f"host = '{ftps.host}'" in caplog.text
     assert f"port = {ftps.port}" in caplog.text
     assert f"user = '{ftps.user}'" in caplog.text
@@ -28,7 +28,7 @@ def test_ftps_file_connection_check_anonymous(ftps_server, caplog):
     with caplog.at_level(logging.INFO):
         assert anonymous.check() == anonymous
 
-    assert "type = FTP" in caplog.text
+    assert "|FTPS|" in caplog.text
     assert f"host = '{anonymous.host}'" in caplog.text
     assert f"port = {anonymous.port}" in caplog.text
     assert "user = " not in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
index ba742acdb..febf024ed 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
@@ -17,7 +17,7 @@ def test_hdfs_file_connection_check_anonymous(hdfs_file_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert hdfs.check() == hdfs
 
-    assert "type = HDFS" in caplog.text
+    assert "|HDFS|" in caplog.text
     assert f"host = '{hdfs.host}'" in caplog.text
     assert f"webhdfs_port = {hdfs.webhdfs_port}" in caplog.text
     assert "timeout = 10" in caplog.text
@@ -50,7 +50,7 @@ def finalizer():
     with caplog.at_level(logging.INFO):
         assert hdfs.check()
 
-    assert "type = HDFS" in caplog.text
+    assert "|HDFS|" in caplog.text
     assert f"host = '{hdfs.host}'" in caplog.text
     assert f"webhdfs_port = {hdfs.webhdfs_port}" in caplog.text
     assert f"user = '{hdfs.user}'" in caplog.text
@@ -72,7 +72,7 @@ def test_hdfs_file_connection_check_with_password(mocker, hdfs_server, caplog):
     with caplog.at_level(logging.INFO):
         assert hdfs.check()
 
-    assert "type = HDFS" in caplog.text
+    assert "|HDFS|" in caplog.text
     assert f"host = '{hdfs.host}'" in caplog.text
     assert f"webhdfs_port = {hdfs.webhdfs_port}" in caplog.text
     assert "timeout = 10" in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
index 6f9276919..e6eceeb70 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
@@ -11,7 +11,7 @@ def test_s3_file_connection_check_success(caplog, s3_file_connection):
     with caplog.at_level(logging.INFO):
         assert s3.check() == s3
 
-    assert "type = S3" in caplog.text
+    assert "|S3|" in caplog.text
     assert f"host = '{s3.host}'" in caplog.text
     assert f"port = {s3.port}" in caplog.text
     assert f"protocol = '{s3.protocol}'" in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
index 70faf498e..c14f7fddd 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
@@ -10,7 +10,7 @@ def test_sftp_file_connection_check_success(sftp_file_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert sftp.check() == sftp
 
-    assert "type = SFTP" in caplog.text
+    assert "|SFTP|" in caplog.text
     assert f"host = '{sftp.host}'" in caplog.text
     assert f"port = {sftp.port}" in caplog.text
     assert f"user = '{sftp.user}'" in caplog.text
diff --git a/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
index 3a91523ce..781621349 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
@@ -10,7 +10,7 @@ def test_webdav_file_connection_check_success(webdav_file_connection, caplog):
     with caplog.at_level(logging.INFO):
         assert webdav.check() == webdav
 
-    assert "type = WebDAV" in caplog.text
+    assert "|WebDAV|" in caplog.text
     assert f"host = '{webdav.host}'" in caplog.text
     assert f"port = {webdav.port}" in caplog.text
     assert f"protocol = '{webdav.protocol}'" in caplog.text

From f48448ca635e878e33b48a6e5a5d02bf6d29ae7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 5 Sep 2023 08:47:32 +0000
Subject: [PATCH 28/30] [DOP-8208] Fix validation ot includeHeaders

---
 docs/changelog/next_release/131.bugfix.rst    |  2 +
 .../next_release/131.improvement.rst          |  1 +
 .../db_connection/kafka/connection.py         | 28 +++---
 .../connection/db_connection/kafka/options.py | 32 ++++---
 .../test_kafka_writer_integration.py          | 90 +++++++++++--------
 .../test_kafka_unit.py                        | 76 +++++++++-------
 6 files changed, 137 insertions(+), 92 deletions(-)
 create mode 100644 docs/changelog/next_release/131.bugfix.rst
 create mode 100644 docs/changelog/next_release/131.improvement.rst

diff --git a/docs/changelog/next_release/131.bugfix.rst b/docs/changelog/next_release/131.bugfix.rst
new file mode 100644
index 000000000..6b70fc3e5
--- /dev/null
+++ b/docs/changelog/next_release/131.bugfix.rst
@@ -0,0 +1,2 @@
+Fixed validation of ``headers`` column is written to Kafka with default ``Kafka.WriteOptions()`` - default value was ``False``,
+but instead of raising an exception, column value was just ignored.
diff --git a/docs/changelog/next_release/131.improvement.rst b/docs/changelog/next_release/131.improvement.rst
new file mode 100644
index 000000000..5bea833a5
--- /dev/null
+++ b/docs/changelog/next_release/131.improvement.rst
@@ -0,0 +1 @@
+Improve validation messages while writing dataframe to Kafka.
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index 0298830b3..bb754a79d 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -287,21 +287,27 @@ def write_df_to_target(
         options: KafkaWriteOptions = KafkaWriteOptions(),  # noqa: B008, WPS404
     ) -> None:
         # Check that the DataFrame doesn't contain any columns not in the schema
-        schema: StructType = self.get_df_schema(target)
-        required_columns = [field.name for field in schema.fields if not field.nullable]
-        optional_columns = [field.name for field in schema.fields if field.nullable]
-        schema_field_names = {field.name for field in schema.fields}
-        df_column_names = set(df.columns)
-        if not df_column_names.issubset(schema_field_names):
-            invalid_columns = df_column_names - schema_field_names
+        required_columns = {"value"}
+        optional_columns = {"key", "partition", "headers"}
+        allowed_columns = required_columns | optional_columns | {"topic"}
+        df_columns = set(df.columns)
+        if not df_columns.issubset(allowed_columns):
+            invalid_columns = df_columns - allowed_columns
             raise ValueError(
-                f"Invalid column names: {invalid_columns}. Expected columns: {required_columns} (required),"
-                f" {optional_columns} (optional)",
+                f"Invalid column names: {sorted(invalid_columns)}. "
+                f"Expected columns: {sorted(required_columns)} (required),"
+                f" {sorted(optional_columns)} (optional)",
             )
 
         # Check that the DataFrame doesn't contain a 'headers' column with includeHeaders=False
-        if not getattr(options, "includeHeaders", True) and "headers" in df.columns:
-            raise ValueError("Cannot write 'headers' column with kafka.WriteOptions(includeHeaders=False)")
+        if not options.include_headers and "headers" in df.columns:
+            raise ValueError("Cannot write 'headers' column with kafka.WriteOptions(include_headers=False)")
+
+        spark_version = get_spark_version(self.spark)
+        if options.include_headers and spark_version.major < 3:
+            raise ValueError(
+                f"kafka.WriteOptions(include_headers=True) requires Spark 3.x, got {spark_version}",
+            )
 
         if "topic" in df.columns:
             log.warning("The 'topic' column in the DataFrame will be overridden with value %r", target)
diff --git a/onetl/connection/db_connection/kafka/options.py b/onetl/connection/db_connection/kafka/options.py
index 5e3a3b142..6d7e4ef4c 100644
--- a/onetl/connection/db_connection/kafka/options.py
+++ b/onetl/connection/db_connection/kafka/options.py
@@ -12,6 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+from __future__ import annotations
+
 from enum import Enum
 
 from pydantic import Field, root_validator
@@ -34,14 +36,6 @@
     ),
 )
 
-KNOWN_READ_WRITE_OPTIONS = frozenset(
-    (
-        # not adding this to class itself because headers support was added to Spark only in 3.0
-        # https://issues.apache.org/jira/browse/SPARK-23539
-        "includeHeaders",
-    ),
-)
-
 KNOWN_READ_OPTIONS = frozenset(
     (
         "failOnDataLoss",
@@ -100,14 +94,21 @@ class KafkaReadOptions(GenericOptions):
     .. code:: python
 
         options = Kafka.ReadOptions(
+            include_headers=False,
             minPartitions=50,
-            includeHeaders=True,
         )
     """
 
+    include_headers: bool = Field(default=False, alias="includeHeaders")
+    """
+    If ``True``, add ``headers`` column to output DataFrame.
+
+    If ``False``, column will not be added.
+    """
+
     class Config:
         prohibited_options = PROHIBITED_OPTIONS
-        known_options = KNOWN_READ_OPTIONS | KNOWN_READ_WRITE_OPTIONS
+        known_options = KNOWN_READ_OPTIONS
         extra = "allow"
 
 
@@ -140,7 +141,7 @@ class KafkaWriteOptions(GenericOptions):
 
         options = Kafka.WriteOptions(
             if_exists="append",
-            includeHeaders=False,
+            include_headers=True,
         )
     """
 
@@ -154,9 +155,16 @@ class KafkaWriteOptions(GenericOptions):
         * ``error`` - Raises an error if topic already exists.
     """
 
+    include_headers: bool = Field(default=False, alias="includeHeaders")
+    """
+    If ``True``, ``headers`` column from dataframe can be written to Kafka (requires Kafka 2.0+).
+
+    If ``False`` and dataframe contains ``headers`` column, an exception will be raised.
+    """
+
     class Config:
         prohibited_options = PROHIBITED_OPTIONS | KNOWN_READ_OPTIONS
-        known_options = KNOWN_READ_WRITE_OPTIONS
+        known_options: frozenset[str] = frozenset()
         extra = "allow"
 
     @root_validator(pre=True)
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
index 8a9427983..b35bfabad 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
@@ -1,3 +1,4 @@
+import contextlib
 import logging
 import re
 import secrets
@@ -97,7 +98,7 @@ def test_kafka_writer_no_value_column_error(spark, kafka_processing, kafka_spark
     from pyspark.sql.utils import AnalysisException
 
     topic, processing = kafka_processing
-    df = kafka_spark_df.drop("value")
+    df = kafka_spark_df.select("key")
 
     kafka = Kafka(
         spark=spark,
@@ -114,13 +115,28 @@ def test_kafka_writer_no_value_column_error(spark, kafka_processing, kafka_spark
         writer.run(df)
 
 
-def test_kafka_writer_invalid_column_error(spark, kafka_processing, kafka_spark_df):
+@pytest.mark.parametrize(
+    "column, value",
+    [
+        ("offset", 0),
+        ("timestamp", 10000),
+        ("timestampType", 1),
+        ("unknown", "str"),
+    ],
+)
+def test_kafka_writer_invalid_column_error(
+    column,
+    value,
+    spark,
+    kafka_processing,
+    kafka_spark_df,
+):
     from pyspark.sql.functions import lit
 
     topic, processing = kafka_processing
 
     # Add an unexpected column to the DataFrame
-    df = kafka_spark_df.withColumn("invalid_column", lit("invalid_value"))
+    df = kafka_spark_df.withColumn(column, lit(value))
 
     kafka = Kafka(
         spark=spark,
@@ -134,32 +150,13 @@ def test_kafka_writer_invalid_column_error(spark, kafka_processing, kafka_spark_
     )
 
     error_msg = (
-        "Invalid column names: {'invalid_column'}. Expected columns: ['value'] (required), "
-        "['key', 'topic', 'partition', 'offset', 'timestamp', 'timestampType', 'headers'] (optional)"
+        f"Invalid column names: ['{column}']. "
+        "Expected columns: ['value'] (required), ['headers', 'key', 'partition'] (optional)"
     )
     with pytest.raises(ValueError, match=re.escape(error_msg)):
         writer.run(df)
 
 
-def test_kafka_writer_with_include_headers_error(spark, kafka_processing, kafka_spark_df):
-    topic, processing = kafka_processing
-
-    kafka = Kafka(
-        spark=spark,
-        addresses=[f"{processing.host}:{processing.port}"],
-        cluster="cluster",
-    )
-
-    writer = DBWriter(
-        connection=kafka,
-        table=topic,
-        options=kafka.WriteOptions(includeHeaders=False),
-    )
-
-    with pytest.raises(ValueError, match="Cannot write 'headers' column"):
-        writer.run(kafka_spark_df)
-
-
 def test_kafka_writer_key_column(spark, kafka_processing, kafka_spark_df):
     topic, processing = kafka_processing
     df = kafka_spark_df.select("value", "key")
@@ -185,6 +182,7 @@ def test_kafka_writer_topic_column(spark, kafka_processing, caplog, kafka_spark_
     from pyspark.sql.functions import lit
 
     topic, processing = kafka_processing
+    original_df = kafka_spark_df.select("value")
 
     kafka = Kafka(
         spark=spark,
@@ -196,12 +194,10 @@ def test_kafka_writer_topic_column(spark, kafka_processing, caplog, kafka_spark_
         connection=kafka,
         table=topic,
     )
-    writer.run(kafka_spark_df)
-
+    writer.run(original_df)
     assert processing.topic_exists(topic)
 
-    df = kafka_spark_df.withColumn("topic", lit("other_topic"))
-
+    df = original_df.withColumn("topic", lit("other_topic"))
     with caplog.at_level(logging.WARNING):
         writer.run(df)
         assert f"The 'topic' column in the DataFrame will be overridden with value '{topic}'" in caplog.text
@@ -234,7 +230,10 @@ def test_kafka_writer_partition_column(spark, kafka_processing, kafka_spark_df):
 
 def test_kafka_writer_headers(spark, kafka_processing, kafka_spark_df):
     if get_spark_version(spark).major < 3:
-        pytest.skip("Spark 3.x or later is required to write/read 'headers' from Kafka messages")
+        msg = f"kafka.WriteOptions(include_headers=True) requires Spark 3.x, got {spark.version}"
+        context_manager = pytest.raises(ValueError, match=re.escape(msg))
+    else:
+        context_manager = contextlib.nullcontext()
 
     topic, processing = kafka_processing
 
@@ -247,19 +246,39 @@ def test_kafka_writer_headers(spark, kafka_processing, kafka_spark_df):
     writer = DBWriter(
         connection=kafka,
         table=topic,
-        options=kafka.WriteOptions(includeHeaders=True),
+        options=kafka.WriteOptions(include_headers=True),
     )
 
     df = kafka_spark_df.select("value", "headers")
-    writer.run(df)
+    with context_manager:
+        writer.run(df)
+
+        pd_df = processing.get_expected_df(topic, num_messages=kafka_spark_df.count())
+
+        processing.assert_equal_df(
+            df,
+            other_frame=pd_df.drop(columns=["key", "partition", "topic"], axis=1),
+        )
 
-    pd_df = processing.get_expected_df(topic, num_messages=kafka_spark_df.count())
 
-    processing.assert_equal_df(
-        df,
-        other_frame=pd_df.drop(columns=["key", "partition", "topic"], axis=1),
+def test_kafka_writer_headers_without_include_headers_fail(spark, kafka_processing, kafka_spark_df):
+    topic, processing = kafka_processing
+
+    kafka = Kafka(
+        spark=spark,
+        addresses=[f"{processing.host}:{processing.port}"],
+        cluster="cluster",
     )
 
+    writer = DBWriter(
+        connection=kafka,
+        table=topic,
+        options=kafka.WriteOptions(include_headers=False),
+    )
+
+    with pytest.raises(ValueError, match="Cannot write 'headers' column"):
+        writer.run(kafka_spark_df)
+
 
 def test_kafka_writer_mode(spark, kafka_processing, kafka_spark_df):
     from pyspark.sql.functions import lit
@@ -276,7 +295,6 @@ def test_kafka_writer_mode(spark, kafka_processing, kafka_spark_df):
     writer = DBWriter(
         connection=kafka,
         table=topic,
-        options=kafka.WriteOptions(includeHeaders=True),
     )
 
     writer.run(df)
diff --git a/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py b/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
index d47270e39..1524d8ad7 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
@@ -71,7 +71,7 @@ def test_kafka_missing_package(spark_no_packages):
 
 
 @pytest.mark.parametrize(
-    "arg, value",
+    "option, value",
     [
         ("assign", "assign_value"),
         ("subscribe", "subscribe_value"),
@@ -87,17 +87,36 @@ def test_kafka_missing_package(spark_no_packages):
         ("topic", "topic_value"),
     ],
 )
-def test_kafka_prohibited_options_error(arg, value):
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a KafkaReadOptions"
-    with pytest.raises(ValueError, match=error_msg):
-        Kafka.ReadOptions.parse({arg: value})
-    error_msg = rf"Options \['{arg}'\] are not allowed to use in a KafkaWriteOptions"
+@pytest.mark.parametrize(
+    "options_class, class_name",
+    [
+        (Kafka.ReadOptions, "KafkaReadOptions"),
+        (Kafka.WriteOptions, "KafkaWriteOptions"),
+    ],
+)
+def test_kafka_options_prohibited(option, value, options_class, class_name):
+    error_msg = rf"Options \['{option}'\] are not allowed to use in a {class_name}"
     with pytest.raises(ValueError, match=error_msg):
-        Kafka.WriteOptions.parse({arg: value})
+        options_class.parse({option: value})
+
+
+@pytest.mark.parametrize(
+    "options_class, class_name",
+    [
+        (Kafka.ReadOptions, "KafkaReadOptions"),
+        (Kafka.WriteOptions, "KafkaWriteOptions"),
+    ],
+)
+def test_kafka_options_unknown(caplog, options_class, class_name):
+    with caplog.at_level(logging.WARNING):
+        options = options_class(unknown="abc")
+        assert options.unknown == "abc"
+
+    assert f"Options ['unknown'] are not known by {class_name}, are you sure they are valid?" in caplog.text
 
 
 @pytest.mark.parametrize(
-    "arg, value",
+    "option, value",
     [
         ("failOnDataLoss", "false"),
         ("kafkaConsumer.pollTimeoutMs", "30000"),
@@ -108,30 +127,21 @@ def test_kafka_prohibited_options_error(arg, value):
         ("maxTriggerDelay", "2000"),
         ("minPartitions", "2"),
         ("groupIdPrefix", "testPrefix"),
-        ("includeHeaders", "true"),
     ],
 )
-def test_kafka_allowed_read_options_no_error(arg, value):
-    try:
-        Kafka.ReadOptions.parse({arg: value})
-    except ValidationError:
-        pytest.fail("ValidationError for ReadOptions raised unexpectedly!")
+def test_kafka_read_options_allowed(option, value):
+    options = Kafka.ReadOptions.parse({option: value})
+    assert getattr(options, option) == value
 
 
-@pytest.mark.parametrize(
-    "arg, value",
-    [
-        ("includeHeaders", "true"),
-    ],
-)
-def test_kafka_allowed_write_options_no_error(arg, value):
-    try:
-        Kafka.WriteOptions.parse({arg: value})
-    except ValidationError:
-        pytest.fail("ValidationError for Write options raised unexpectedly!")
+@pytest.mark.parametrize("value", [True, False])
+@pytest.mark.parametrize("options_class", [Kafka.ReadOptions, Kafka.WriteOptions])
+def test_kafka_options_include_headers(options_class, value):
+    options = options_class(includeHeaders=value)
+    assert options.include_headers == value
 
 
-def test_kafka_basic_auth(spark_mock):
+def test_kafka_basic_auth_get_jaas_conf(spark_mock):
     conn = Kafka(
         spark=spark_mock,
         cluster="some_cluster",
@@ -237,7 +247,7 @@ def test_kafka_empty_cluster(spark_mock):
 
 
 @pytest.mark.parametrize(
-    "arg, value",
+    "option, value",
     [
         ("bootstrap.servers", "kafka.bootstrap.servers_value"),
         ("security.protocol", "ssl"),
@@ -251,23 +261,23 @@ def test_kafka_empty_cluster(spark_mock):
         ("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"),
     ],
 )
-def test_kafka_invalid_extras(arg, value):
+def test_kafka_invalid_extras(option, value):
     msg = re.escape("are not allowed to use in a KafkaExtra")
     with pytest.raises(ValueError, match=msg):
-        KafkaExtra.parse({arg: value})
+        KafkaExtra.parse({option: value})
     with pytest.raises(ValueError, match=msg):
-        KafkaExtra.parse({"kafka." + arg: value})
+        KafkaExtra.parse({"kafka." + option: value})
 
 
 @pytest.mark.parametrize(
-    "arg, value",
+    "option, value",
     [
         ("kafka.group.id", "group_id"),
         ("group.id", "group_id"),
     ],
 )
-def test_kafka_valid_extras(arg, value):
-    extra_dict = KafkaExtra.parse({arg: value}).dict()
+def test_kafka_valid_extras(option, value):
+    extra_dict = KafkaExtra.parse({option: value}).dict()
     assert extra_dict["group.id"] == value
 
 

From 5241724863626fe6e80ece180ec33b465a1a54ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 6 Sep 2023 12:28:49 +0000
Subject: [PATCH 29/30] [DOP-8571] Make log messages more consistent

---
 .../db_connection/greenplum/connection.py        |  2 +-
 .../connection/db_connection/hive/connection.py  | 16 ++++++++--------
 .../db_connection/jdbc_connection/connection.py  |  2 +-
 .../db_connection/jdbc_mixin/connection.py       |  2 +-
 .../connection/db_connection/kafka/connection.py |  6 +++---
 .../db_connection/mongodb/connection.py          |  2 +-
 .../file_connection/file_connection.py           |  2 +-
 .../spark_file_df_connection.py                  |  4 ++--
 .../test_spark_hdfs_integration.py               |  2 +-
 .../test_spark_local_fs_integration.py           |  2 +-
 .../test_spark_s3_integration.py                 |  2 +-
 .../test_hive_writer_integration.py              | 12 ++++++------
 .../test_clickhouse_integration.py               |  2 +-
 .../test_greenplum_integration.py                |  2 +-
 .../test_hive_integration.py                     |  2 +-
 .../test_kafka_integration.py                    |  6 +++---
 .../test_mongodb_integration.py                  |  2 +-
 .../test_mssql_integration.py                    |  2 +-
 .../test_mysql_integration.py                    |  2 +-
 .../test_oracle_integration.py                   |  2 +-
 .../test_postgres_integration.py                 |  2 +-
 .../test_teradata_integration.py                 |  2 +-
 .../test_ftp_file_connection_integration.py      |  4 ++--
 .../test_ftps_file_connection_integration.py     |  4 ++--
 .../test_hdfs_file_connection_integration.py     |  6 +++---
 .../test_s3_file_connection_integration.py       |  2 +-
 .../test_sftp_file_connection_integration.py     |  2 +-
 .../test_webdav_file_connection_integration.py   |  2 +-
 28 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index c0a7e3ef1..99de7d90c 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -340,7 +340,7 @@ def get_df_schema(
         log_lines(log, query, level=logging.DEBUG)
 
         df = self._query_on_driver(query, jdbc_options)
-        log.info("|%s| Schema fetched", self.__class__.__name__)
+        log.info("|%s| Schema fetched.", self.__class__.__name__)
 
         return df.schema
 
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index dda14a411..d0bc08d29 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -208,7 +208,7 @@ def check(self):
 
         try:
             self._execute_sql(self._CHECK_QUERY)
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
@@ -382,7 +382,7 @@ def get_df_schema(
         log_lines(log, query, level=logging.DEBUG)
 
         df = self._execute_sql(query)
-        log.info("|%s| Schema fetched", self.__class__.__name__)
+        log.info("|%s| Schema fetched.", self.__class__.__name__)
         return df.schema
 
     @slot
@@ -499,8 +499,6 @@ def _insert_into(
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
-        log.info("|%s| Inserting data into existing table %r", self.__class__.__name__, table)
-
         unsupported_options = write_options.dict(by_alias=True, exclude_unset=True, exclude={"if_exists"})
         if unsupported_options:
             log.warning(
@@ -519,9 +517,11 @@ def _insert_into(
         # so using a workaround with patching Spark config and then setting up the previous value
         with inject_spark_param(self.spark.conf, PARTITION_OVERWRITE_MODE_PARAM, "dynamic"):
             overwrite = write_options.if_exists != HiveTableExistBehavior.APPEND
+
+            log.info("|%s| Inserting data into existing table %r ...", self.__class__.__name__, table)
             writer.insertInto(table, overwrite=overwrite)
 
-        log.info("|%s| Data is successfully inserted into table %r", self.__class__.__name__, table)
+        log.info("|%s| Data is successfully inserted into table %r.", self.__class__.__name__, table)
 
     def _save_as_table(
         self,
@@ -531,8 +531,6 @@ def _save_as_table(
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
-        log.info("|%s| Saving data to a table %r", self.__class__.__name__, table)
-
         writer = df.write
         for method, value in write_options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"}).items():
             # <value> is the arguments that will be passed to the <method>
@@ -546,6 +544,8 @@ def _save_as_table(
                 writer = writer.option(method, value)
 
         mode = "append" if write_options.if_exists == HiveTableExistBehavior.APPEND else "overwrite"
+
+        log.info("|%s| Saving data to a table %r ...", self.__class__.__name__, table)
         writer.mode(mode).saveAsTable(table)
 
-        log.info("|%s| Table %r is successfully created", self.__class__.__name__, table)
+        log.info("|%s| Table %r is successfully created.", self.__class__.__name__, table)
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 8246e9d19..3eb83f538 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -239,7 +239,7 @@ def get_df_schema(
         log_lines(log, query, level=logging.DEBUG)
 
         df = self._query_on_driver(query, read_options)
-        log.info("|%s| Schema fetched", self.__class__.__name__)
+        log.info("|%s| Schema fetched.", self.__class__.__name__)
 
         return df.schema
 
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index ff76e5e34..c02fb82f1 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -138,7 +138,7 @@ def check(self):
 
         try:
             self._query_optional_on_driver(self._CHECK_QUERY, self.JDBCOptions(fetchsize=1))  # type: ignore
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index bb754a79d..3aa8f0fd2 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -250,7 +250,7 @@ def check(self):
 
         try:
             self._get_topics()
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
@@ -276,7 +276,7 @@ def read_source_as_df(
         result_options.update(options.dict(by_alias=True, exclude_none=True))
         result_options["subscribe"] = source
         df = self.spark.read.format("kafka").options(**result_options).load()
-        log.info("|%s| Dataframe is successfully created", self.__class__.__name__)
+        log.info("|%s| Dataframe is successfully created.", self.__class__.__name__)
         return df
 
     @slot
@@ -325,7 +325,7 @@ def write_df_to_target(
 
         log.info("|%s| Saving data to a topic %r", self.__class__.__name__, target)
         df.write.format("kafka").mode(mode).options(**write_options).save()
-        log.info("|%s| Data is successfully written to topic %r", self.__class__.__name__, target)
+        log.info("|%s| Data is successfully written to topic %r.", self.__class__.__name__, target)
 
     @slot
     def get_df_schema(
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index 16320ebca..8e6110f14 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -390,7 +390,7 @@ def check(self):
             jvm = self.spark._jvm  # type: ignore
             client = jvm.com.mongodb.client.MongoClients.create(self.connection_url)
             list(client.listDatabaseNames().iterator())
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
             raise RuntimeError("Connection is unavailable") from e
diff --git a/onetl/connection/file_connection/file_connection.py b/onetl/connection/file_connection/file_connection.py
index b92805dec..39e27f2c6 100644
--- a/onetl/connection/file_connection/file_connection.py
+++ b/onetl/connection/file_connection/file_connection.py
@@ -129,7 +129,7 @@ def check(self):
 
         try:
             self.list_dir("/")
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except (RuntimeError, ValueError):
             # left validation errors intact
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
diff --git a/onetl/connection/file_df_connection/spark_file_df_connection.py b/onetl/connection/file_df_connection/spark_file_df_connection.py
index 96d51e4c9..7c1994182 100644
--- a/onetl/connection/file_df_connection/spark_file_df_connection.py
+++ b/onetl/connection/file_df_connection/spark_file_df_connection.py
@@ -58,7 +58,7 @@ def check(self):
         try:
             fs = self._get_spark_fs()
             fs.exists(path)
-            log.info("|%s| Connection is available", self.__class__.__name__)
+            log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             raise RuntimeError("Connection is unavailable") from e
         return self
@@ -138,7 +138,7 @@ def write_df_as_files(
             url = self._convert_to_url(path)
             writer.save(url)
 
-        log.info("|%s| Data is successfully saved to '%s'", self.__class__.__name__, path)
+        log.info("|%s| Data is successfully saved to '%s'.", self.__class__.__name__, path)
 
     @abstractmethod
     def _convert_to_url(self, path: PurePathProtocol) -> str:
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
index 6d6c90a46..3b47df0d0 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_hdfs_integration.py
@@ -20,7 +20,7 @@ def test_spark_hdfs_check(hdfs_file_df_connection, caplog):
     assert f"host = '{hdfs.host}'" in caplog.text
     assert f"ipc_port = {hdfs.ipc_port}" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_spark_hdfs_file_connection_check_failed(spark):
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
index d14f9fcb2..1f9751e45 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_local_fs_integration.py
@@ -15,4 +15,4 @@ def test_spark_local_fs_check(spark, caplog):
 
     assert "|SparkLocalFS|" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
diff --git a/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py b/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
index cd10e67ba..c1c9d9b97 100644
--- a/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
+++ b/tests/tests_integration/test_file_df_connection_integration/test_spark_s3_integration.py
@@ -27,7 +27,7 @@ def test_spark_s3_check(s3_file_df_connection, caplog):
     assert "extra = {" in caplog.text
     assert "'path.style.access': True" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_spark_s3_check_failed(spark, s3_server):
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
index c6a43bb24..44553539b 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
@@ -346,8 +346,8 @@ def test_hive_writer_insert_into_append(spark, processing, get_schema_table, ori
     with caplog.at_level(logging.INFO):
         writer2.run(df1.union(df3))
 
-        assert f"|Hive| Inserting data into existing table '{get_schema_table.full_name}'" in caplog.text
-        assert f"|Hive| Data is successfully inserted into table '{get_schema_table.full_name}'" in caplog.text
+        assert f"|Hive| Inserting data into existing table '{get_schema_table.full_name}' ..." in caplog.text
+        assert f"|Hive| Data is successfully inserted into table '{get_schema_table.full_name}'." in caplog.text
 
     new_ddl = hive.sql(f"SHOW CREATE TABLE {get_schema_table.full_name}").collect()[0][0]
 
@@ -405,8 +405,8 @@ def test_hive_writer_insert_into_replace_entire_table(
         writer2.run(df2.select(*reversed(df2.columns)))
 
         # unlike other modes, this creates new table
-        assert f"|Hive| Saving data to a table '{get_schema_table.full_name}'" in caplog.text
-        assert f"|Hive| Table '{get_schema_table.full_name}' is successfully created" in caplog.text
+        assert f"|Hive| Saving data to a table '{get_schema_table.full_name}' ..." in caplog.text
+        assert f"|Hive| Table '{get_schema_table.full_name}' is successfully created." in caplog.text
 
     new_ddl = hive.sql(f"SHOW CREATE TABLE {get_schema_table.full_name}").collect()[0][0]
 
@@ -456,8 +456,8 @@ def test_hive_writer_insert_into_replace_overlapping_partitions_in_non_partition
     with caplog.at_level(logging.INFO):
         writer2.run(df2_reversed)
 
-        assert f"|Hive| Inserting data into existing table '{get_schema_table.full_name}'" in caplog.text
-        assert f"|Hive| Data is successfully inserted into table '{get_schema_table.full_name}'" in caplog.text
+        assert f"|Hive| Inserting data into existing table '{get_schema_table.full_name}' ..." in caplog.text
+        assert f"|Hive| Data is successfully inserted into table '{get_schema_table.full_name}'." in caplog.text
 
     new_ddl = hive.sql(f"SHOW CREATE TABLE {get_schema_table.full_name}").collect()[0][0]
 
diff --git a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
index 134afdd7b..410a6a02d 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
@@ -37,7 +37,7 @@ def test_clickhouse_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_clickhouse_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
index 59c1b57ad..91a52bb9d 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_greenplum_integration.py
@@ -39,7 +39,7 @@ def test_greenplum_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_greenplum_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
index b06d85755..8a90578cb 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_hive_integration.py
@@ -23,7 +23,7 @@ def test_hive_check(spark, caplog):
     assert "|Hive|" in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
diff --git a/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py b/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
index aaeb75977..b07d6b99e 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_kafka_integration.py
@@ -28,7 +28,7 @@ def test_kafka_check_plaintext_anonymous(spark, caplog):
     assert "auth = None" in caplog.text
     assert "extra = {}" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_kafka_check_plaintext_basic_auth(spark, caplog):
@@ -56,7 +56,7 @@ def test_kafka_check_plaintext_basic_auth(spark, caplog):
     assert f"auth = KafkaBasicAuth(user='{kafka_processing.user}', password=SecretStr('**********'))" in caplog.text
     assert "extra = {}" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 @pytest.mark.parametrize("digest", ["SHA-256", "SHA-512"])
@@ -89,7 +89,7 @@ def test_kafka_check_plaintext_scram_auth(digest, spark, caplog):
     )
     assert "extra = {}" in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_kafka_check_error(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
index 532f3b773..6283df1a1 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mongodb_integration.py
@@ -31,7 +31,7 @@ def test_mongodb_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_mongodb_connection_check_fail(processing, spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
index c217eea36..96e23183b 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mssql_integration.py
@@ -39,7 +39,7 @@ def test_mssql_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_mssql_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
index 87fc4f58d..dad717b1c 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_mysql_integration.py
@@ -37,7 +37,7 @@ def test_mysql_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_mysql_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
index 74b9497d9..40e813279 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_oracle_integration.py
@@ -46,7 +46,7 @@ def test_oracle_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_oracle_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
index e9d719f93..9f2d2253b 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
@@ -38,7 +38,7 @@ def test_postgres_connection_check(spark, processing, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_postgres_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py b/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
index 118b1336f..f3224025e 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_teradata_integration.py
@@ -40,7 +40,7 @@ def test_teradata_connection_check(spark, mocker, caplog):
     assert "package = " not in caplog.text
     assert "spark = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_teradata_connection_check_fail(spark):
diff --git a/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
index d66f2dd31..87e69227b 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_ftp_file_connection_integration.py
@@ -17,7 +17,7 @@ def test_ftp_file_connection_check_success(ftp_file_connection, caplog):
     assert "password = SecretStr('**********')" in caplog.text
     assert ftp.password.get_secret_value() not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_ftp_file_connection_check_anonymous(ftp_server, caplog):
@@ -34,7 +34,7 @@ def test_ftp_file_connection_check_anonymous(ftp_server, caplog):
     assert "user = " not in caplog.text
     assert "password = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_ftp_file_connection_check_failed(ftp_server):
diff --git a/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
index a504a002b..e82d4b47a 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_ftps_file_connection_integration.py
@@ -17,7 +17,7 @@ def test_ftps_file_connection_check_success(ftps_file_connection, caplog):
     assert "password = SecretStr('**********')" in caplog.text
     assert ftps.password.get_secret_value() not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_ftps_file_connection_check_anonymous(ftps_server, caplog):
@@ -34,7 +34,7 @@ def test_ftps_file_connection_check_anonymous(ftps_server, caplog):
     assert "user = " not in caplog.text
     assert "password = " not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_ftps_file_connection_check_failed(ftps_server):
diff --git a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
index febf024ed..f92a1917a 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_hdfs_file_connection_integration.py
@@ -25,7 +25,7 @@ def test_hdfs_file_connection_check_anonymous(hdfs_file_connection, caplog):
     assert "keytab =" not in caplog.text
     assert "password =" not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_hdfs_file_connection_check_with_keytab(mocker, hdfs_server, caplog, request, tmp_path_factory):
@@ -58,7 +58,7 @@ def finalizer():
     assert f"keytab = '{keytab}' (kind='file'" in caplog.text
     assert "password =" not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_hdfs_file_connection_check_with_password(mocker, hdfs_server, caplog):
@@ -81,7 +81,7 @@ def test_hdfs_file_connection_check_with_password(mocker, hdfs_server, caplog):
     assert "password = SecretStr('**********')" in caplog.text
     assert "somepass" not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_hdfs_file_connection_check_failed():
diff --git a/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
index e6eceeb70..1d6c2a407 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_s3_file_connection_integration.py
@@ -21,7 +21,7 @@ def test_s3_file_connection_check_success(caplog, s3_file_connection):
     assert s3.secret_key.get_secret_value() not in caplog.text
     assert "session_token =" not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_s3_file_connection_check_failed(s3_server):
diff --git a/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
index c14f7fddd..8c95659ba 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_sftp_file_connection_integration.py
@@ -21,7 +21,7 @@ def test_sftp_file_connection_check_success(sftp_file_connection, caplog):
     assert "password = SecretStr('**********')" in caplog.text
     assert sftp.password.get_secret_value() not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_sftp_file_connection_check_failed(sftp_server):
diff --git a/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py b/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
index 781621349..435d424aa 100644
--- a/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
+++ b/tests/tests_integration/tests_file_connection_integration/test_webdav_file_connection_integration.py
@@ -19,7 +19,7 @@ def test_webdav_file_connection_check_success(webdav_file_connection, caplog):
     assert "password = SecretStr('**********')" in caplog.text
     assert webdav.password.get_secret_value() not in caplog.text
 
-    assert "Connection is available" in caplog.text
+    assert "Connection is available." in caplog.text
 
 
 def test_webdav_file_connection_check_failed(webdav_server):

From a6dc39801c49b210ce6bc93ab834babf5d258ba9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 6 Sep 2023 12:34:49 +0000
Subject: [PATCH 30/30] [DOP-8511] Update CHANGELOG

---
 docs/changelog/0.9.2.rst                      | 28 +++++++++++++++++++
 docs/changelog/next_release/+.improvement.rst |  4 ---
 docs/changelog/next_release/131.bugfix.rst    |  2 --
 .../next_release/131.improvement.rst          |  1 -
 docs/changelog/next_release/133.bugfix.rst    |  1 -
 docs/changelog/next_release/136.bugfix.rst    |  1 -
 docs/changelog/next_release/138.bugfix.rst    |  1 -
 docs/changelog/next_release/139.bugfix.rst    |  1 -
 docs/changelog/next_release/142.feature.rst   |  1 -
 9 files changed, 28 insertions(+), 12 deletions(-)
 create mode 100644 docs/changelog/0.9.2.rst
 delete mode 100644 docs/changelog/next_release/+.improvement.rst
 delete mode 100644 docs/changelog/next_release/131.bugfix.rst
 delete mode 100644 docs/changelog/next_release/131.improvement.rst
 delete mode 100644 docs/changelog/next_release/133.bugfix.rst
 delete mode 100644 docs/changelog/next_release/136.bugfix.rst
 delete mode 100644 docs/changelog/next_release/138.bugfix.rst
 delete mode 100644 docs/changelog/next_release/139.bugfix.rst
 delete mode 100644 docs/changelog/next_release/142.feature.rst

diff --git a/docs/changelog/0.9.2.rst b/docs/changelog/0.9.2.rst
new file mode 100644
index 000000000..4a865da81
--- /dev/null
+++ b/docs/changelog/0.9.2.rst
@@ -0,0 +1,28 @@
+0.9.2 (2023-09-06)
+==================
+
+Features
+--------
+
+- Add ``if_exists="ignore"`` and ``error`` to ``Greenplum.WriteOptions`` (:github:pull:`142`)
+
+
+Improvements
+------------
+
+- Improve validation messages while writing dataframe to Kafka. (:github:pull:`131`)
+- Improve documentation:
+
+  * Add notes about reading and writing to database connections documentation
+  * Add notes about executing statements in JDBC and Greenplum connections
+
+
+Bug Fixes
+---------
+
+- Fixed validation of ``headers`` column is written to Kafka with default ``Kafka.WriteOptions()`` - default value was ``False``,
+  but instead of raising an exception, column value was just ignored. (:github:pull:`131`)
+- Fix reading data from Oracle with ``partitioningMode="range"`` without explicitly set ``lowerBound`` / ``upperBound``. (:github:pull:`133`)
+- Update Kafka documentation with SSLProtocol usage. (:github:pull:`136`)
+- Raise exception if someone tries to read data from Kafka topic which does not exist. (:github:pull:`138`)
+- Allow to pass Kafka topics with name like ``some.topic.name`` to DBReader. Same for MongoDB collections. (:github:pull:`139`)
diff --git a/docs/changelog/next_release/+.improvement.rst b/docs/changelog/next_release/+.improvement.rst
deleted file mode 100644
index 864a91694..000000000
--- a/docs/changelog/next_release/+.improvement.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Improve documentation:
-
-* Add notes about reading and writing to database connections documentation
-* Add notes about executing statements in JDBC and Greenplum connections
diff --git a/docs/changelog/next_release/131.bugfix.rst b/docs/changelog/next_release/131.bugfix.rst
deleted file mode 100644
index 6b70fc3e5..000000000
--- a/docs/changelog/next_release/131.bugfix.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Fixed validation of ``headers`` column is written to Kafka with default ``Kafka.WriteOptions()`` - default value was ``False``,
-but instead of raising an exception, column value was just ignored.
diff --git a/docs/changelog/next_release/131.improvement.rst b/docs/changelog/next_release/131.improvement.rst
deleted file mode 100644
index 5bea833a5..000000000
--- a/docs/changelog/next_release/131.improvement.rst
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation messages while writing dataframe to Kafka.
diff --git a/docs/changelog/next_release/133.bugfix.rst b/docs/changelog/next_release/133.bugfix.rst
deleted file mode 100644
index 37068bc5b..000000000
--- a/docs/changelog/next_release/133.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Fix reading data from Oracle with ``partitioningMode="range"`` without explicitly set ``lowerBound`` / ``upperBound``.
diff --git a/docs/changelog/next_release/136.bugfix.rst b/docs/changelog/next_release/136.bugfix.rst
deleted file mode 100644
index 04d880e5e..000000000
--- a/docs/changelog/next_release/136.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Update Kafka documentation with SSLProtocol usage.
diff --git a/docs/changelog/next_release/138.bugfix.rst b/docs/changelog/next_release/138.bugfix.rst
deleted file mode 100644
index 4ea6c3a5c..000000000
--- a/docs/changelog/next_release/138.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Raise exception if someone tries to read data from Kafka topic which does not exist.
diff --git a/docs/changelog/next_release/139.bugfix.rst b/docs/changelog/next_release/139.bugfix.rst
deleted file mode 100644
index a29b5f44d..000000000
--- a/docs/changelog/next_release/139.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Allow to pass Kafka topics with name like ``some.topic.name`` to DBReader. Same for MongoDB collections.
diff --git a/docs/changelog/next_release/142.feature.rst b/docs/changelog/next_release/142.feature.rst
deleted file mode 100644
index 453fb814a..000000000
--- a/docs/changelog/next_release/142.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-Add ``ignore`` and ``error`` writing modes in ``Greenplum.WriteOptions``