From 2a280e9597960c5688f40b6ce2ff8f5c6587ce7a Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 02:18:26 +0700
Subject: [PATCH 1/9] Migrate CE

---
 .coveragerc                                   |  13 +
 .github/workflows/main.yml                    |   2 +-
 .gitignore                                    |   5 +-
 datamimic_ce/cli.py                           |   2 +-
 datamimic_ce/clients/rdbms_client.py          |  12 +-
 datamimic_ce/config.py                        |   6 +-
 datamimic_ce/contexts/setup_context.py        |   4 +
 datamimic_ce/exporters/exporter_util.py       | 110 ++-
 datamimic_ce/exporters/mongodb_exporter.py    |  26 +-
 datamimic_ce/logger/__init__.py               |   4 +-
 datamimic_ce/parsers/parser_util.py           |  83 +-
 datamimic_ce/statements/statement_util.py     |  16 +-
 datamimic_ce/tasks/generate_task.py           | 878 +++++++++---------
 datamimic_ce/tasks/task_util.py               | 219 ++---
 tests_ce/conftest.py                          |   4 +-
 .../consumer_csv/test_csv_consumer.xml        |   2 +-
 .../datamimic_demo/j-json/datamimic.xml       |   2 +-
 .../datamimic_demo/p-xml/datamimic.xml        |   2 +-
 .../test_exporters/data/people.json           |  20 +
 .../test_exporters/data/products.ent.csv      |   4 +
 .../test_exporters/multi_json.xml             |  56 ++
 .../test_exporters/multi_opensearch_bulk.xml  |  46 +
 .../test_exporters/script/template_xyz.json   |  32 +
 .../test_exporters/single_cascaded_cases.xml  |  30 +
 .../test_exporters/single_combine_all.xml     |  63 ++
 .../test_exporters/single_csv.xml             |  77 ++
 .../test_exporters/single_json.xml            |  63 ++
 .../single_json_single_cascaded_cases.xml     |  56 ++
 .../test_exporters/single_opensearch_bulk.xml |  60 ++
 .../test_exporters/single_txt.xml             |  63 ++
 .../test_exporters/single_xml.xml             |  63 ++
 .../test_exporters/test_exporters.py          |  56 ++
 .../test_mongodb/test_mongodb_intergration.py |   6 +-
 .../test_rdbms/test_rdbms.py                  |   4 +-
 tests_ce/test_exporter_util.py                | 117 +++
 tests_ce/unit_tests/exporter/__init__.py      |   7 +
 .../unit_tests/exporter/test_csv_exporter.py  | 456 +++++++++
 .../unit_tests/exporter/test_json_exporter.py | 428 +++++++++
 .../exporter/test_opeansearch_bulk.py         | 371 ++++++++
 .../unit_tests/exporter/test_txt_exporter.py  | 452 +++++++++
 .../unit_tests/exporter/test_xml_exporter.py  | 286 ++++++
 update_copyright.py                           | 141 +++
 42 files changed, 3684 insertions(+), 663 deletions(-)
 create mode 100644 .coveragerc
 create mode 100644 tests_ce/integration_tests/test_exporters/data/people.json
 create mode 100644 tests_ce/integration_tests/test_exporters/data/products.ent.csv
 create mode 100644 tests_ce/integration_tests/test_exporters/multi_json.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/script/template_xyz.json
 create mode 100644 tests_ce/integration_tests/test_exporters/single_cascaded_cases.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_combine_all.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_csv.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_json.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_txt.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/single_xml.xml
 create mode 100644 tests_ce/integration_tests/test_exporters/test_exporters.py
 create mode 100644 tests_ce/test_exporter_util.py
 create mode 100644 tests_ce/unit_tests/exporter/__init__.py
 create mode 100644 tests_ce/unit_tests/exporter/test_csv_exporter.py
 create mode 100644 tests_ce/unit_tests/exporter/test_json_exporter.py
 create mode 100644 tests_ce/unit_tests/exporter/test_opeansearch_bulk.py
 create mode 100644 tests_ce/unit_tests/exporter/test_txt_exporter.py
 create mode 100644 tests_ce/unit_tests/exporter/test_xml_exporter.py
 create mode 100644 update_copyright.py

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..328d727
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,13 @@
+[run]
+source =
+    datamimic
+parallel = True
+[report]
+exclude_lines =
+    pragma: no cover
+omit =
+    docker/*
+    script/*
+    target/*
+    venv/*
+    tests/*
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 07e1b25..7c9ae25 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -11,7 +11,7 @@ on:
       - development
 
 env:
-  DATAMIMIC_LIB_ENVIRONMENT: lib_staging
+  RUNTIME_ENVIRONMENT: production
 
 jobs:
   setup:
diff --git a/.gitignore b/.gitignore
index 3af70db..dbbdeac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,4 +59,7 @@ uv.lock
 #datamimic
 datamimic.log
 datamimic.log.*
-exported_data/
\ No newline at end of file
+exported_data/
+
+# Temporary result files
+**/**/temp_result*
\ No newline at end of file
diff --git a/datamimic_ce/cli.py b/datamimic_ce/cli.py
index d60db19..2f2b6ca 100644
--- a/datamimic_ce/cli.py
+++ b/datamimic_ce/cli.py
@@ -3,7 +3,7 @@
 # Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
 # For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
 # Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+import argparse
 import json
 import os
 from importlib.resources import files
diff --git a/datamimic_ce/clients/rdbms_client.py b/datamimic_ce/clients/rdbms_client.py
index 6654223..e689e98 100644
--- a/datamimic_ce/clients/rdbms_client.py
+++ b/datamimic_ce/clients/rdbms_client.py
@@ -17,6 +17,7 @@
 from sqlalchemy.orm import sessionmaker
 
 from datamimic_ce.clients.database_client import DatabaseClient
+from datamimic_ce.config import settings
 from datamimic_ce.credentials.rdbms_credential import RdbmsCredential
 from datamimic_ce.data_sources.data_source_pagination import \
     DataSourcePagination
@@ -86,8 +87,8 @@ def create_sqlalchemy_engine(driver, user, password, host, port, db):
         # Match the DBMS type and create the appropriate SQLAlchemy engine
         match dbms:
             case "sqlite":
-                environment = os.getenv("DATAMIMIC_LIB_ENVIRONMENT")
-                if environment in {"local", "staging", "production"}:
+                environment = settings.RUNTIME_ENVIRONMENT
+                if environment in {"development", "production"}:
                     if not self._task_id:
                         raise ValueError(
                             "Task ID is required to create SQLite db in task folder"
@@ -95,9 +96,10 @@ def create_sqlalchemy_engine(driver, user, password, host, port, db):
                     # Construct the database path within the task folder
                     db_path = Path("data") / "task" / self._task_id / f"{db}.sqlite"
                     if not db_path.exists():
-                        logger.info(
-                            f"Creating SQLite db file in task folder: {db_path}"
-                        )
+                        # Ensure the parent directory exists
+                        logger.info(f"Creating SQLite db file in task folder: {db_path}")
+                        db_path.parent.mkdir(parents=True, exist_ok=True)
+
                 else:
                     # Use a simple file-based SQLite database
                     db_path = Path(f"{db}.sqlite")
diff --git a/datamimic_ce/config.py b/datamimic_ce/config.py
index 50ac073..04104fd 100644
--- a/datamimic_ce/config.py
+++ b/datamimic_ce/config.py
@@ -12,12 +12,12 @@
 # ENV Vars are automatically retrieved from .env or
 # ENVIRONMENT VARS by using extending Class BaseSettings
 class Settings(BaseSettings):
-    # Should be lib_local (for lib testing on local)
-    DATAMIMIC_LIB_ENVIRONMENT: Literal["lib_local", "lib_staging"] = "lib_local"
+    # Should be development or production
+    RUNTIME_ENVIRONMENT: Literal["development", "production"] = "production"
 
     SC_PAGE_SIZE: int = 1000
 
-    DEFEAULT_LOGGER: str = "DATAMIMIC"
+    DEFAULT_LOGGER: str = "DATAMIMIC"
 
     LIB_EDITION: str = "CE"
     model_config = SettingsConfigDict(
diff --git a/datamimic_ce/contexts/setup_context.py b/datamimic_ce/contexts/setup_context.py
index e380665..6be3331 100644
--- a/datamimic_ce/contexts/setup_context.py
+++ b/datamimic_ce/contexts/setup_context.py
@@ -393,6 +393,10 @@ def report_logging(self) -> bool:
     def report_logging(self, value) -> None:
         self._report_logging = value
 
+    @property
+    def default_encoding(self):
+        return self._default_encoding
+
     def add_client(self, client_id: str, client: Client):
         """
         Add client info to context
diff --git a/datamimic_ce/exporters/exporter_util.py b/datamimic_ce/exporters/exporter_util.py
index fcf0935..5327134 100644
--- a/datamimic_ce/exporters/exporter_util.py
+++ b/datamimic_ce/exporters/exporter_util.py
@@ -76,11 +76,21 @@ def create_exporter_list(
 
         exporter_str_list = list(stmt.targets)
 
-        for exporter_str in exporter_str_list:
-            # Handle consumers with operation
-            if "." in exporter_str:
-                # consumer, operation = ConsumerUtil.get_consumer_with_operation()
-                consumer_name, operation = exporter_str.split(".")
+        # Join the list back into a string
+        target_str = ",".join(exporter_str_list)
+
+        # Parse the target string using the parse_function_string function
+        try:
+            parsed_targets = ExporterUtil.parse_function_string(target_str)
+        except ValueError as e:
+            raise ValueError(f"Error parsing target string: {e}")
+
+        # Now loop over the parsed functions and create exporters
+        for target in parsed_targets:
+            exporter_name = target["function_name"]
+            params = target["params"] if target["params"] else {}            # Handle consumers with operation
+            if "." in exporter_name:
+                consumer_name, operation = exporter_name.split(".", 1)
                 client = setup_context.get_client_by_id(consumer_name)
                 consumer = ExporterUtil._create_exporter_from_client(
                     client, consumer_name
@@ -90,13 +100,97 @@ def create_exporter_list(
             else:
                 consumer = ExporterUtil.get_exporter_by_name(
                     setup_context=setup_context,
-                    name=exporter_str,
+                    name=exporter_name,
+                    params=params,
                 )
                 if consumer is not None:
                     consumers_without_operation.append(consumer)
 
         return consumers_with_operation, consumers_without_operation
 
+    def parse_function_string(function_string):
+        parsed_functions = []
+        # Remove spaces and check if only commas or blank string are provided
+        if function_string.strip() == "" or all(char in ", " for char in function_string):
+            return parsed_functions
+
+        # Wrap the function string in a list to make it valid Python code
+        code_to_parse = f"[{function_string}]"
+
+        try:
+            # Parse the code into an AST node
+            module = ast.parse(code_to_parse, mode="eval")
+        except SyntaxError as e:
+            raise ValueError(f"Error parsing function string: {e}")
+
+        # Ensure the parsed node is a list
+        if not isinstance(module.body, ast.List):
+            raise ValueError("Function string is not a valid list of function calls.")
+
+        # Iterate over each element in the list
+        for element in module.body.elts:
+            # Handle function calls with parameters
+            if isinstance(element, ast.Call):
+                # Extract function name, including dot notation (e.g., mongodb.upsert)
+                if isinstance(element.func, ast.Name):
+                    function_name = element.func.id
+                elif isinstance(element.func, ast.Attribute):
+                    # Capture the full dotted name
+                    parts = []
+                    current = element.func
+                    while isinstance(current, ast.Attribute):
+                        parts.append(current.attr)
+                        current = current.value
+                    if isinstance(current, ast.Name):
+                        parts.append(current.id)
+                    function_name = ".".join(reversed(parts))
+                else:
+                    raise ValueError("Unsupported function type in function call.")
+
+                params = {}
+                # Extract keyword arguments
+                for keyword in element.keywords:
+                    key = keyword.arg
+                    try:
+                        # Safely evaluate the value using ast.literal_eval
+                        value = ast.literal_eval(keyword.value)
+                    except (ValueError, SyntaxError):
+                        # If evaluation fails, raise error for non-literal parameters
+                        raise ValueError(f"Non-literal parameter found: {keyword.value}")
+                    params[key] = value
+
+                parsed_functions.append({"function_name": function_name, "params": params})
+            # Handle function names without parameters, including dotted names like mongodb.delete
+            elif isinstance(element, ast.Attribute):
+                # For dotted names like mongodb.delete
+                parts = []
+                current = element
+                while isinstance(current, ast.Attribute):
+                    parts.append(current.attr)
+                    current = current.value
+                if isinstance(current, ast.Name):
+                    parts.append(current.id)
+                function_name = ".".join(reversed(parts))
+                parsed_functions.append({"function_name": function_name, "params": None})
+            elif isinstance(element, ast.Name):
+                # For single names like CSV
+                function_name = element.id
+                parsed_functions.append({"function_name": function_name, "params": None})
+            elif isinstance(element, ast.Constant):  # For Python 3.8+, for older versions use ast.Str or ast.Num
+                # This handles cases like 'CSV' and 'JSON' if they are given as strings
+                function_name = element.value
+                parsed_functions.append({"function_name": function_name, "params": None})
+            else:
+                # Attempt to evaluate other expressions (e.g., strings, numbers)
+                try:
+                    value = ast.literal_eval(element)
+                    function_name = str(value)
+                    parsed_functions.append({"function_name": function_name, "params": None})
+                except Exception:
+                    raise ValueError("Unsupported expression in function string.")
+
+        return parsed_functions
+
     @staticmethod
     def get_exporter_by_name(setup_context: SetupContext, name: str) -> Exporter:
         """
@@ -146,11 +240,11 @@ def _create_exporter_from_client(client: Client, client_name: str):
             raise ValueError(f"Cannot create target for client {client_name}")
 
     @staticmethod
-    def json_dumps(data: object) -> str:
+    def json_dumps(data: object, indent=4) -> str:
         """
         JSON dump with default custom serializer
         """
-        return json.dumps(data, default=custom_serializer, ensure_ascii=False, indent=4)
+        return json.dumps(data, default=custom_serializer, ensure_ascii=False, indent=indent)
 
     @staticmethod
     def check_path_format(path) -> str:
diff --git a/datamimic_ce/exporters/mongodb_exporter.py b/datamimic_ce/exporters/mongodb_exporter.py
index 3aad04b..53db3d0 100644
--- a/datamimic_ce/exporters/mongodb_exporter.py
+++ b/datamimic_ce/exporters/mongodb_exporter.py
@@ -3,7 +3,7 @@
 # Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
 # For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
 # Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+import copy
 from typing import Tuple
 
 from datamimic_ce.clients.mongodb_client import MongoDBClient
@@ -16,8 +16,9 @@ def __init__(self, client: MongoDBClient):
 
     def consume(self, product) -> None:
         """Write data into MongoDB database"""
-        name = product[0]
-        data = product[1]
+        temp_product = copy.deepcopy(product)
+        name = temp_product[0]
+        data = temp_product[1]
         self._client.insert(name, data, False)
 
     def update(self, product: Tuple) -> int:
@@ -25,8 +26,9 @@ def update(self, product: Tuple) -> int:
         Update data into MongoDB database
         :return: The number of documents matched for an update.
         """
-        if len(product) > 2:
-            data = product[1]
+        temp_product = copy.deepcopy(product)
+        if len(temp_product) > 2:
+            data = temp_product[1]
             # product[2] contain "type" or "selector" attribute info
             target_query = product[2]
             return self._client.update(target_query, data)
@@ -37,17 +39,17 @@ def upsert(self, product: Tuple) -> Tuple:
         """
         Update MongoDB data with upsert {true}
         """
-        name, product_list, selector_dict = product
-        return name, self._client.upsert(
-            selector_dict=selector_dict, updated_data=product_list
-        )
+        temp_product = copy.deepcopy(product)
+        name, product_list, selector_dict = temp_product
+        return name, self._client.upsert(selector_dict=selector_dict, updated_data=product_list)
 
     def delete(self, product: Tuple):
         """
         Delete data from MongoDB database
         """
-        if len(product) > 2:
-            data = product[1]
+        temp_product = copy.deepcopy(product)
+        if len(temp_product) > 2:
+            data = temp_product[1]
             # product[2] contain "type" or "selector" attribute info
-            target_query = product[2]
+            target_query = temp_product[2]
             self._client.delete(target_query, data)
diff --git a/datamimic_ce/logger/__init__.py b/datamimic_ce/logger/__init__.py
index ec93271..eaeff46 100644
--- a/datamimic_ce/logger/__init__.py
+++ b/datamimic_ce/logger/__init__.py
@@ -43,7 +43,7 @@ def setup_logger(logger_name, log_file, task_id, level=logging.DEBUG):
 
     if not l.handlers:  # Avoid adding duplicate handlers
         l.setLevel(level)
-        if settings.DATAMIMIC_LIB_ENVIRONMENT in ["lib_local", "lib_staging"]:
+        if settings.RUNTIME_ENVIRONMENT in ["development", "production"]:
             l.addHandler(stream_handler)
             l.addHandler(file_handler)
         else:
@@ -52,4 +52,4 @@ def setup_logger(logger_name, log_file, task_id, level=logging.DEBUG):
     l.propagate = False  # Avoid propagation to the parent logger
 
 
-logger = logging.getLogger(settings.DEFEAULT_LOGGER)
+logger = logging.getLogger(settings.DEFAULT_LOGGER)
diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py
index bcbdb74..b6e89dd 100644
--- a/datamimic_ce/parsers/parser_util.py
+++ b/datamimic_ce/parsers/parser_util.py
@@ -11,20 +11,31 @@
 from typing import Dict, List, Set
 from xml.etree.ElementTree import Element
 
-from datamimic_ce.constants.attribute_constants import (ATTR_ENVIRONMENT,
-                                                        ATTR_ID, ATTR_SYSTEM)
-from datamimic_ce.constants.element_constants import (EL_ARRAY, EL_CONDITION,
-                                                      EL_DATABASE, EL_ECHO,
-                                                      EL_ELEMENT, EL_ELSE,
-                                                      EL_ELSE_IF, EL_EXECUTE,
-                                                      EL_GENERATE,
-                                                      EL_GENERATOR, EL_IF,
-                                                      EL_INCLUDE, EL_ITEM,
-                                                      EL_KEY, EL_LIST,
-                                                      EL_MEMSTORE, EL_MONGODB,
-                                                      EL_NESTED_KEY,
-                                                      EL_REFERENCE, EL_SETUP,
-                                                      EL_VARIABLE)
+from datamimic_ce.config import settings
+from datamimic_ce.constants.attribute_constants import ATTR_ENVIRONMENT, ATTR_ID, ATTR_SYSTEM
+from datamimic_ce.constants.element_constants import (
+    EL_ARRAY,
+    EL_CONDITION,
+    EL_DATABASE,
+    EL_ECHO,
+    EL_ELEMENT,
+    EL_ELSE,
+    EL_ELSE_IF,
+    EL_EXECUTE,
+    EL_GENERATE,
+    EL_GENERATOR,
+    EL_IF,
+    EL_INCLUDE,
+    EL_ITEM,
+    EL_KEY,
+    EL_LIST,
+    EL_MEMSTORE,
+    EL_MONGODB,
+    EL_NESTED_KEY,
+    EL_REFERENCE,
+    EL_SETUP,
+    EL_VARIABLE,
+)
 from datamimic_ce.logger import logger
 from datamimic_ce.parsers.array_parser import ArrayParser
 from datamimic_ce.parsers.condition_parser import ConditionParser
@@ -220,19 +231,13 @@ def parse_sub_elements(
         copied_props = copy.deepcopy(properties) or {}
 
         for child_ele in element:
-            parser = self.get_parser_by_element(
-                class_factory_util, child_ele, copied_props
-            )
+            parser = self.get_parser_by_element(class_factory_util, child_ele, copied_props)
             # TODO: add more child-element-able parsers such as attribute, reference, part,... (i.e. elements which have attribute 'name')
-            if isinstance(
-                parser, (VariableParser, GenerateParser, NestedKeyParser, ElementParser)
-            ):
+            if isinstance(parser, (VariableParser, GenerateParser, NestedKeyParser, ElementParser)):
                 if isinstance(parser, VariableParser) and element.tag == "setup":
                     stmt = parser.parse(parent_stmt=parent_stmt, has_parent_setup=True)
                 elif isinstance(parser, (GenerateParser, NestedKeyParser)):
-                    stmt = parser.parse(
-                        descriptor_dir=descriptor_dir, parent_stmt=parent_stmt
-                    )
+                    stmt = parser.parse(descriptor_dir=descriptor_dir, parent_stmt=parent_stmt)
                 else:
                     stmt = parser.parse(parent_stmt=parent_stmt)
             else:
@@ -249,10 +254,7 @@ def parse_sub_elements(
                     ),
                 ):
                     stmt = parser.parse()
-                elif isinstance(
-                    parser,
-                    (KeyParser, ConditionParser, IfParser, ElseIfParser, ElseParser),
-                ):
+                elif isinstance(parser, (KeyParser, ConditionParser, IfParser, ElseIfParser, ElseParser)):
                     stmt = parser.parse(descriptor_dir, parent_stmt)
                 else:
                     stmt = parser.parse(descriptor_dir)
@@ -270,9 +272,7 @@ def parse_sub_elements(
         return result
 
     @staticmethod
-    def retrieve_element_attributes(
-        attributes: Dict[str, str], properties: Dict[str, str]
-    ) -> Dict[str, str]:
+    def retrieve_element_attributes(attributes: Dict[str, str], properties: Dict[str, str]) -> Dict[str, str]:
         """
         Retrieve element's attributes using environment properties
         :param attributes:
@@ -284,11 +284,7 @@ def retrieve_element_attributes(
 
         # Look up element's attributes defined as variable then evaluate them
         for key, value in attributes.items():
-            if (
-                type(value) is str
-                and re.match(r"^\{[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z0-9_]+)*\}$", value)
-                is not None
-            ):
+            if type(value) is str and re.match(r"^\{[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z0-9_]+)*\}$", value) is not None:
                 prop_key = value[1:-1]
 
                 if "." not in prop_key:
@@ -329,11 +325,7 @@ def fulfill_credentials_v2(
 
         environment = (
             descriptor_attr.get(ATTR_ENVIRONMENT)
-            or (
-                "local"
-                if os.environ.get("DATAMIMIC_LIB_ENVIRONMENT") == "lib_local"
-                else None
-            )
+            or ("local" if os.environ.get("DATAMIMIC_LIB_ENVIRONMENT") == "lib_local" else None)
             or "environment"
         )
         system = descriptor_attr.get(ATTR_SYSTEM)
@@ -359,18 +351,13 @@ def fulfill_credentials_v2(
                 )
                 # Update env props from env file
                 conf_props.update(env_props_from_env_file)
-        except FileNotFoundError:
-            logger.info(
-                f"Environment file not found {str(descriptor_dir / f'conf/{environment}.env.properties')}"
-            )
+        except FileNotFoundError as e:
+            logger.info(f"Environment file not found {str(descriptor_dir / f'conf/{environment}.env.properties')}")
 
         credentials = copy.deepcopy(descriptor_attr)
 
         for attr_key, attr_value in conf_props.items():
-            if (
-                attr_key.startswith(f"{system}.{system_type}.")
-                and attr_value is not None
-            ):
+            if attr_key.startswith(f"{system}.{system_type}.") and attr_value is not None:
                 attr_name = "".join(attr_key.split(".")[2:])
                 credentials[attr_name] = attr_value
 
diff --git a/datamimic_ce/statements/statement_util.py b/datamimic_ce/statements/statement_util.py
index af336f0..ff28bd9 100644
--- a/datamimic_ce/statements/statement_util.py
+++ b/datamimic_ce/statements/statement_util.py
@@ -13,16 +13,24 @@ class StatementUtil:
     @staticmethod
     def parse_consumer(consumer_string: str) -> Set[str]:
         """
-        Parse attribute "consumer" to list of consumer
-        :param consumer_string:
-        :return:
+        Parse the 'consumer' attribute into a set of consumers.
+        Splits on commas not enclosed within parentheses.
         """
         consumer_list = (
             []
             if consumer_string is None
             else list(map(lambda ele: ele.strip(), consumer_string.split(",")))
         )
-        # Avoid duplicated consumer
+
+            # Pattern to split on commas not inside parentheses
+        pattern = r",\s*(?![^(]*\))"
+        consumer_list = re.split(pattern, consumer_string)
+
+        # Strip whitespace from each consumer
+        consumer_list = [consumer.strip() for consumer in consumer_list if consumer.strip()]
+
+        # Avoid duplicated consumers
+>>>>>>> e96c3c0 (Migrate CE)
         consumer_set = set(consumer_list)
 
         return consumer_set
diff --git a/datamimic_ce/tasks/generate_task.py b/datamimic_ce/tasks/generate_task.py
index 4729775..e73ed3b 100644
--- a/datamimic_ce/tasks/generate_task.py
+++ b/datamimic_ce/tasks/generate_task.py
@@ -25,19 +25,28 @@
 from datamimic_ce.clients.database_client import DatabaseClient
 from datamimic_ce.clients.rdbms_client import RdbmsClient
 from datamimic_ce.constants.exporter_constants import (
-    EXPORTER_CSV, EXPORTER_JSON, EXPORTER_TEST_RESULT_EXPORTER, EXPORTER_TXT,
-    EXPORTER_XML)
+    EXPORTER_CSV,
+    EXPORTER_JSON,
+    EXPORTER_TEST_RESULT_EXPORTER,
+    EXPORTER_TXT,
+    EXPORTER_XML,
+)
 from datamimic_ce.contexts.context import Context
 from datamimic_ce.contexts.geniter_context import GenIterContext
 from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.data_sources.data_source_pagination import \
-    DataSourcePagination
+from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination
 from datamimic_ce.data_sources.data_source_util import DataSourceUtil
 from datamimic_ce.exporters.mongodb_exporter import MongoDBExporter
 from datamimic_ce.logger import logger
+from datamimic_ce.statements.array_statement import ArrayStatement
 from datamimic_ce.statements.composite_statement import CompositeStatement
+from datamimic_ce.statements.element_statement import ElementStatement
 from datamimic_ce.statements.generate_statement import GenerateStatement
+from datamimic_ce.statements.if_statement import IfStatement
+from datamimic_ce.statements.item_statement import ItemStatement
 from datamimic_ce.statements.key_statement import KeyStatement
+from datamimic_ce.statements.list_statement import ListStatement
+from datamimic_ce.statements.nested_key_statement import NestedKeyStatement
 from datamimic_ce.statements.setup_statement import SetupStatement
 from datamimic_ce.statements.statement import Statement
 from datamimic_ce.tasks.task import Task
@@ -46,10 +55,10 @@
 
 def _wrapper(args):
     """
-    Wrapper mp function, deserialize args functions
+    Wrapper multiprocessing function to deserialize args and execute the generate function.
 
-    :param args:
-    :return:
+    :param args: Tuple containing necessary arguments.
+    :return: Result from single_process_execute_function.
     """
     (
         local_ctx,
@@ -62,25 +71,23 @@ def _wrapper(args):
         mp_chunk_size,
     ) = args
 
-    # Deserialized util functions
+    # Deserialize utility functions
     namespace_functions = dill.loads(namespace_functions)
     local_ctx.namespace.update(namespace_functions)
     local_ctx.generators = dill.loads(local_ctx.generators)
 
-    return single_process_execute_function(
-        (local_ctx, statement, chunk_data, mp_idx, page_size, mp_chunk_size)
-    )
+    return single_process_execute_function((local_ctx, statement, chunk_data, mp_idx, page_size, mp_chunk_size))
 
 
 def _geniter_single_process_generate(args: Tuple) -> Dict[str, List]:
     """
-    (IMPORTANT: Only be used as mp function) Generate product in each single process
+    (IMPORTANT: Only to be used as multiprocessing function) Generate product in each single process.
 
-    :param args:
-    :return:
+    :param args: Tuple containing context, statement, and index range.
+    :return: Dictionary with generated products.
     """
 
-    # Parse args list
+    # Parse args
     context: Context = args[0]
     root_context: SetupContext = context.root
     stmt: GenerateStatement = args[1]
@@ -101,16 +108,13 @@ def _geniter_single_process_generate(args: Tuple) -> Dict[str, List]:
 
     # 1: Build sub-tasks in GenIterStatement
     tasks = [
-        task_util_cls.get_task_by_statement(root_context, child_stmt, pagination)
-        for child_stmt in stmt.sub_statements
+        task_util_cls.get_task_by_statement(root_context, child_stmt, pagination) for child_stmt in stmt.sub_statements
     ]
 
     # 2: Load data source
     source_str = stmt.source
     source_scripted = (
-        stmt.source_script
-        if stmt.source_script is not None
-        else bool(root_context.default_source_scripted)
+        stmt.source_script if stmt.source_script is not None else bool(root_context.default_source_scripted)
     )
     separator = stmt.separator or root_context.default_separator
     is_random_distribution = stmt.distribution in ("random", None)
@@ -120,31 +124,25 @@ def _geniter_single_process_generate(args: Tuple) -> Dict[str, List]:
         load_start_idx = None
         load_end_idx = None
         load_pagination = None
-    source_data, build_from_source = (
-        context.root.class_factory_util.get_task_util_cls().gen_task_load_data_from_source(
-            context,
-            stmt,
-            source_str,
-            separator,
-            source_scripted,
-            processed_data_count,
-            load_start_idx,
-            load_end_idx,
-            load_pagination,
-        )
+    source_data, build_from_source = context.root.class_factory_util.get_task_util_cls().gen_task_load_data_from_source(
+        context,
+        stmt,
+        source_str,
+        separator,
+        source_scripted,
+        processed_data_count,
+        load_start_idx,
+        load_end_idx,
+        load_pagination,
     )
 
     if is_random_distribution:
         seed = root_context.get_distribution_seed()
         # Use original pagination for shuffling
-        source_data = DataSourceUtil.get_shuffled_data_with_cyclic(
-            source_data, pagination, stmt.cyclic, seed
-        )
-
-    # TODO: split loading data and generating/modifying data
+        source_data = DataSourceUtil.get_shuffled_data_with_cyclic(source_data, pagination, stmt.cyclic, seed)
 
     # Keep current product and sub <generate> product in product_holder on non low memory mode
-    product_holder: Dict[str, List] = dict()
+    product_holder: Dict[str, List] = {}
     # Store temp result
     result = []
 
@@ -203,11 +201,11 @@ def _geniter_single_process_generate(args: Tuple) -> Dict[str, List]:
 
 def _geniter_single_process_generate_and_consume_by_page(args: Tuple) -> Dict:
     """
-    IMPORTANT: Used as mp page process function only
-    Generate then consume product in each single process by page
+    IMPORTANT: Used as multiprocessing page process function only.
+    Generate then consume product in each single process by page.
 
-    :param args:
-    :return:
+    :param args: Tuple containing necessary arguments.
+    :return: Dictionary with generated products if needed.
     """
     context: SetupContext = args[0]
     stmt: GenerateStatement = args[1]
@@ -217,37 +215,39 @@ def _geniter_single_process_generate_and_consume_by_page(args: Tuple) -> Dict:
     mp_chunk_size = args[5]
 
     # Calculate page chunk
-    index_chunk = [
-        (i, min(i + page_size, end_idx)) for i in range(start_idx, end_idx, page_size)
-    ]
+    index_chunk = [(i, min(i + page_size, end_idx)) for i in range(start_idx, end_idx, page_size)]
 
-    # Check if product result should be returned on mp process
+    # Check if product result should be returned on multiprocessing process
     return_product_result = context.test_mode or any(
-        [
-            context.memstore_manager.contain(consumer_str)
-            for consumer_str in stmt.targets
-        ]
+        [context.memstore_manager.contain(consumer_str) for consumer_str in stmt.targets]
     )
     result = {}
 
     # Generate and consume product by page
     args_list = list(args)
     for page_idx, index_tuple in enumerate(index_chunk):
+        # Index tuple for each page
         args_list[2] = index_tuple
         updated_args = tuple(args_list)
 
         result_dict = _geniter_single_process_generate(updated_args)
         _consume_by_page(
-            stmt, context, result_dict, page_idx, page_size, mp_idx, mp_chunk_size
+            stmt,
+            context,
+            result_dict,
+            page_idx,
+            page_size,
+            mp_idx,
+            mp_chunk_size,
+            page_idx == len(index_chunk) - 1,
         )
-
         if return_product_result:
             for key, value in result_dict.items():
                 result[key] = result.get(key, []) + value
 
         # Manual garbage collection
         del result_dict
-        gc.collect()
+        # gc.collect()
 
     return result
 
@@ -260,56 +260,31 @@ def _consume_by_page(
     page_size: int,
     mp_idx: int,
     mp_chunk_size: int,
+    is_last_page: bool,
 ) -> None:
     """
-    Consume product by page. Only write temp data for MinioConsumer for later consume
-    :param stmt:
-    :param context:
-    :param xml_result:
-    :param page_idx:
-    :param mp_idx:
-    :return:
+    Consume product by page.
+
+    :param stmt: GenerateStatement instance.
+    :param context: Context instance.
+    :param xml_result: Generated product data.
+    :param page_idx: Current page index.
+    :param page_size: Page size for processing.
+    :param mp_idx: Multiprocessing index.
+    :param mp_chunk_size: Chunk size for multiprocessing.
+    :param preview_record_length: Length for preview records.
+    :return: None
     """
     root_ctx = context.root
 
-    # Consume MinioConsumer by writing temp file
+    # Consume specific exporters by writing temp files if necessary
     for stmt_name, xml_value in xml_result.items():
         # Load current gen_stmt with corresponding targets
         current_stmt = stmt.retrieve_sub_statement_by_fullname(stmt_name)
         if current_stmt is None:
             raise ValueError(f"Cannot find element <generate> '{stmt_name}'")
 
-        # Write temp file for later MinioConsumer
-        if (
-            len(
-                {EXPORTER_CSV, EXPORTER_JSON, EXPORTER_TXT, EXPORTER_XML}
-                & current_stmt.targets
-            )
-            > 0
-        ):
-            mp_idx_path = f"mp_{mp_idx}_" if mp_idx is not None else ""
-            page_idx_path = f"page_{page_idx}_"
-
-            # Init temp file path
-            result_temp_file = (
-                root_ctx.descriptor_dir
-                / f"temp_result_{root_ctx.task_id}"
-                / f"{mp_idx_path + page_idx_path + current_stmt.full_name}.json"
-            )
-
-            # Prepare temp directory
-            result_temp_file.parent.mkdir(parents=True, exist_ok=True)
-
-            # Write to temp file
-            with result_temp_file.open("a") as file:
-                file.write(
-                    json.dumps(
-                        xml_value, indent=4, default=minio_serialization_function
-                    )[1:-1]
-                    + ","
-                )
-
-    # Consume non MinioConsumer
+    # Consume non-specific exporters
     _consume_outermost_gen_stmt_by_page(
         stmt,
         context,
@@ -320,57 +295,40 @@ def _consume_by_page(
             mp_chunk_size,
             page_size,
         ),
+        is_last_page
     )
 
 
-def minio_serialization_function(obj):
-    """
-    Serialization function for MinioConsumer
-    :param obj:
-    :return:
+def _pre_consume_product(stmt: GenerateStatement, dict_result: List[Dict]) -> Tuple:
     """
-    if isinstance(obj, datetime):
-        return obj.isoformat()
-    raise TypeError(
-        f"Object of type '{type(obj).__name__}' is not JSON serializable on MinioConsumer"
-    )
-
+    Preprocess consumer data to adapt some special consumer (e.g., MongoDB upsert).
 
-def _load_temp_result_file(result_temp_dir: Path) -> Dict:
-    """
-    Load temp result file
-    :return:
+    :param stmt: GenerateStatement instance.
+    :param dict_result: Generated data.
+    :return: Preprocessed product tuple.
     """
-    result_dict = {}
-
-    # Gather file path by product name
-    file_dict = defaultdict(list)
-    for file_path in result_temp_dir.glob("*.json"):
-        product_name = re.sub(r"^page_\d+_|^mp_\d+_page_\d+_", "", file_path.stem)
-        file_dict[product_name].append(file_path)
-
-    # Load data from each file
-    for product_name, file_paths in file_dict.items():
-        sorted_file_paths = sorted(file_paths)
-        file_data = ""
-        for file_path in sorted_file_paths:
-            with file_path.open("r") as file:
-                file_data += file.read()[:-1] + ","
-        result_dict[product_name] = json.loads("[" + file_data[:-1] + "]")
-
-    return result_dict
+    if getattr(stmt, "selector", False):
+        packed_result = (stmt.name, dict_result, {"selector": stmt.selector})
+    elif getattr(stmt, "type", False):
+        packed_result = (stmt.name, dict_result, {"type": stmt.type})
+    else:
+        packed_result = (stmt.name, dict_result)
+    return packed_result
 
 
 def _consume_outermost_gen_stmt_by_page(
-    stmt: GenerateStatement, context: Context, result_dict: Dict, page_info: Tuple
+    stmt: GenerateStatement, context: Context, result_dict: Dict, page_info: Tuple,     is_last_page: bool,
+
 ) -> None:
     """
-    Consume result_dict returned by outermost gen_stmt
-
-    :param stmt:
-    :param context:
-    :param result_dict:
-    :return:
+    Consume result_dict returned by outermost gen_stmt.
+
+    :param stmt: GenerateStatement instance.
+    :param context: Context instance.
+    :param result_dict: Generated product data.
+    :param page_info: Tuple containing page information.
+    :param preview_record_length: Length for preview records.
+    :return: None
     """
     report_logging = isinstance(context, SetupContext) and context.report_logging
 
@@ -390,23 +348,45 @@ def _consume_outermost_gen_stmt_by_page(
                 xml_result=result,
                 page_info=page_info,
             )
+            if is_last_page:
+                _finalize_and_export_consumers(context, sub_stmt)
 
 
-def _pre_consume_product(stmt, dict_result):
+def _finalize_and_export_consumers(context: Context, stmt: GenerateStatement):
     """
-    Preprocess consumer data to adapt some special consumer (e.g. MongoDB upsert)
-    :param stmt:
-    :param dict_result:
-    :return:
+    Finalize chunks and export data for all consumers that require it.
+
+    :param context: Context instance.
+    :param stmt: GenerateStatement instance.
+    :return: None
     """
-    if getattr(stmt, "selector", False):
-        packed_result = (stmt.name, dict_result, {"selector": stmt.selector})
-    elif getattr(stmt, "type", False):
-        packed_result = (stmt.name, dict_result, {"type": stmt.type})
-    else:
-        packed_result = (stmt.name, dict_result)
-    return packed_result
+    # Create list of consumers that need to finalize and export
+    consumers_with_operation, consumers_without_operation = ExporterUtil.create_exporter_list(
+        setup_context=context.root,
+        product_name=stmt.name,
+        consumer_str_list=list(stmt.targets),
+        storage_type=stmt.storage_id,
+        target_uri=stmt.export_uri,
+    )
 
+    # Combine all consumers
+    all_consumers = [consumer for consumer, _ in consumers_with_operation] + consumers_without_operation
+
+    for consumer in all_consumers:
+        # Only finalize and export if the exporter has these methods
+        try:
+            # Construct the export name to include task_id
+            export_name = f"{context.root.task_id}/{stmt.name}"
+
+            if hasattr(consumer, "finalize_chunks"):
+                consumer.finalize_chunks()
+            if hasattr(consumer, "upload_to_storage"):
+                consumer.upload_to_storage(bucket=stmt.bucket or stmt.container, name=export_name)
+            # Only clean up on outermost generate task
+            if isinstance(context, SetupContext) and hasattr(consumer, "cleanup"):
+                consumer.cleanup()
+        except Exception as e:
+            logger.error(f"Error finalizing and exporting data for {consumer}: {e}")
 
 def _load_csv_file(
     ctx: SetupContext,
@@ -420,77 +400,65 @@ def _load_csv_file(
     suffix: str,
 ) -> List[Dict]:
     """
-    Load csv content from file with skip and limit
-    :param separator:
-    :param file_path:
-    :param start_idx:
-    :param end_idx:
-    :param prefix:
-    :param suffix:
-    :return:
+    Load CSV content from file with skip and limit.
+
+    :param file_path: Path to the CSV file.
+    :param separator: CSV delimiter.
+    :param cyclic: Whether to cycle through data.
+    :param start_idx: Starting index.
+    :param end_idx: Ending index.
+    :return: List of dictionaries representing CSV rows.
     """
     from datamimic_ce.tasks.task_util import TaskUtil
 
-    with file_path.open(
-        newline="",
-    ) as csvfile:
+    with file_path.open(newline="") as csvfile:
         reader = csv.DictReader(csvfile, delimiter=separator)
         pagination = (
             DataSourcePagination(start_idx, end_idx - start_idx)
             if (start_idx is not None and end_idx is not None)
             else None
         )
-        result = DataSourceUtil.get_cyclic_data_list(
-            data=list(reader), cyclic=cyclic, pagination=pagination
-        )
+        result = DataSourceUtil.get_cyclic_data_list(data=list(reader), cyclic=cyclic, pagination=pagination)
 
         # if sourceScripted then evaluate python expression in csv
         if source_scripted:
-            return TaskUtil.evaluate_file_script_template(
-                ctx=ctx, datas=result, prefix=prefix, suffix=suffix
-            )
+            return TaskUtil.evaluate_file_script_template(ctx=ctx, datas=result, prefix=prefix, suffix=suffix)
 
         return result
 
 
-def _load_json_file(
-    file_path: Path, cyclic: bool, start_idx: int, end_idx: int
-) -> List[Dict]:
+def _load_json_file(file_path: Path, cyclic: bool, start_idx: int, end_idx: int) -> List[Dict]:
     """
-    Load json content from file using skip and limit
-    :param cyclic:
-    :param file_path:
-    :param start_idx:
-    :param end_idx:
-    :return:
+    Load JSON content from file using skip and limit.
+
+    :param file_path: Path to the JSON file.
+    :param cyclic: Whether to cycle through data.
+    :param start_idx: Starting index.
+    :param end_idx: Ending index.
+    :return: List of dictionaries representing JSON objects.
     """
     # Read the JSON data from a file
     with file_path.open("r") as file:
         data = json.load(file)
         if not isinstance(data, list):
-            raise ValueError(
-                f"JSON file '{file_path.name}' must contain a list of objects"
-            )
+            raise ValueError(f"JSON file '{file_path.name}' must contain a list of objects")
         pagination = (
             DataSourcePagination(start_idx, end_idx - start_idx)
             if (start_idx is not None and end_idx is not None)
             else None
         )
-        return DataSourceUtil.get_cyclic_data_list(
-            data=data, cyclic=cyclic, pagination=pagination
-        )
+        return DataSourceUtil.get_cyclic_data_list(data=data, cyclic=cyclic, pagination=pagination)
 
 
-def _load_xml_file(
-    file_path: Path, cyclic: bool, start_idx: int, end_idx: int
-) -> List[Dict]:
+def _load_xml_file(file_path: Path, cyclic: bool, start_idx: int, end_idx: int) -> List[Dict]:
     """
-    Load xml content from file using skip and limit
-    :param cyclic:
-    :param file_path:
-    :param start_idx:
-    :param end_idx:
-    :return:
+    Load XML content from file using skip and limit.
+
+    :param file_path: Path to the XML file.
+    :param cyclic: Whether to cycle through data.
+    :param start_idx: Starting index.
+    :param end_idx: Ending index.
+    :return: List of dictionaries representing XML items.
     """
     # Read the XML data from a file
     with file_path.open("r") as file:
@@ -502,39 +470,36 @@ def _load_xml_file(
             if (start_idx is not None and end_idx is not None)
             else None
         )
-        return DataSourceUtil.get_cyclic_data_list(
-            data=data, cyclic=cyclic, pagination=pagination
-        )
+        return DataSourceUtil.get_cyclic_data_list(data=data, cyclic=cyclic, pagination=pagination)
 
 
 def _evaluate_selector_script(context: Context, stmt: GenerateStatement):
     """
-    Evaluate script selector
+    Evaluate script selector.
+
+    :param context: Context instance.
+    :param stmt: GenerateStatement instance.
+    :return: Evaluated selector.
     """
     from datamimic_ce.tasks.task_util import TaskUtil
 
     selector = stmt.selector
     prefix = stmt.variable_prefix or context.root.default_variable_prefix
     suffix = stmt.variable_suffix or context.root.default_variable_suffix
-    return TaskUtil.evaluate_variable_concat_prefix_suffix(
-        context, selector, prefix=prefix, suffix=suffix
-    )
+    return TaskUtil.evaluate_variable_concat_prefix_suffix(context, selector, prefix=prefix, suffix=suffix)
 
 
 @contextmanager
-def gen_timer(
-    process: Literal["generate", "consume", "process"],
-    report_logging: bool,
-    product_name: str,
-):
+def gen_timer(process: Literal["generate", "consume", "process"], report_logging: bool, product_name: str):
     """
-    Timer for generate and consume process
-    :param process:
-    :param report_logging:
-    :param product_name:
-    :return:
+    Timer for generate and consume process.
+
+    :param process: Type of process ('generate', 'consume', 'process').
+    :param report_logging: Whether to log the timing information.
+    :param product_name: Name of the product being processed.
+    :return: Context manager.
     """
-    timer_result: Dict = dict()
+    timer_result: Dict = {}
 
     # Ignore timer if report_logging is False
     if not report_logging:
@@ -548,17 +513,17 @@ def gen_timer(
     finally:
         end_time = time.perf_counter()
         elapsed_time = end_time - start_time
-        records_count = timer_result.get("records_count", 0)
+        records_count = timer_result.get('records_count', 0)
         process_name = {
-            "generate": "Generating",
-            "consume": "Consuming",
-            "generate_consume": "Generating and consuming",
-        }.get(process, "Processing")
+            'generate': 'Generating',
+            'consume': 'Consuming',
+            'generate_consume': 'Generating and consuming'
+        }.get(process, 'Processing')
 
         if elapsed_time > 0:
             records_per_second = int(records_count / elapsed_time)
         else:
-            records_per_second = "N/A"
+            records_per_second = 'N/A'
 
         logger.info(
             f"{process_name} {records_count} records '{product_name}' takes {round(elapsed_time, 5)} seconds "
@@ -567,9 +532,11 @@ def gen_timer(
 
 
 class GenerateTask(Task):
-    def __init__(
-        self, statement: GenerateStatement, class_factory_util: BaseClassFactoryUtil
-    ):
+    """
+    Task class for generating data based on the GenerateStatement.
+
+    """
+    def __init__(self, statement: GenerateStatement, class_factory_util: BaseClassFactoryUtil):
         self._statement = statement
         self._class_factory_util = class_factory_util
 
@@ -579,8 +546,10 @@ def statement(self) -> GenerateStatement:
 
     def _determine_count(self, context: Context) -> int:
         """
-        Determine count of generate task
-        :return:
+        Determine the count of records to generate.
+
+        :param context: Context instance.
+        :return: Number of records to generate.
         """
         root_context: SetupContext = context.root
 
@@ -594,20 +563,18 @@ def _determine_count(self, context: Context) -> int:
             # Check if "selector" is defined with "source"
             if self.statement.selector:
                 # Evaluate script selector
-                selector = _evaluate_selector_script(
-                    context=context, stmt=self._statement
-                )
+                selector = _evaluate_selector_script(context=context, stmt=self._statement)
                 client = root_context.get_client_by_id(self.statement.source)
                 if isinstance(client, DatabaseClient):
                     count = client.count_query_length(selector)
                 else:
                     raise ValueError(
-                        "using selector without count only support for DatabaseClient (MongoDB, Relational Database)"
+                        "Using selector without count only supports DatabaseClient (MongoDB, Relational Database)"
                     )
             else:
                 count = root_context.data_source_len[self.statement.full_name]
 
-        # Check if there is (special) consumer mongodb_upsert
+        # Check if there is a special consumer (e.g., mongodb_upsert)
         if count == 0 and self.statement.contain_mongodb_upsert(root_context):
             # Upsert one collection when no record found by query
             count = 1
@@ -623,27 +590,29 @@ def _prepare_mp_generate_args(
         page_size: int,
     ) -> List[Tuple]:
         """
-        Prepare args of multiprocess function
-
-        :param setup_ctx:
-        :param count:
-        :return:
+        Prepare arguments for multiprocessing function.
+
+        :param setup_ctx: SetupContext instance.
+        :param single_process_execute_function: Function to execute in single process.
+        :param count: Total number of records.
+        :param num_processes: Number of processes.
+        :param page_size: Page size for processing.
+        :return: List of argument tuples.
         """
         # Determine chunk size
         mp_chunk_size = math.ceil(count / num_processes)
 
         # Log processing info
         logger.info(
-            f"Run {type(self.statement).__name__} task for entity {self.statement.name} with {num_processes} processes in parallel and chunk size: {mp_chunk_size}"
+            f"Run {type(self.statement).__name__} task for entity {self.statement.name} with "
+            f"{num_processes} processes in parallel and chunk size: {mp_chunk_size}"
         )
 
         # Determine chunk indices
         chunk_data_list = self._get_chunk_indices(mp_chunk_size, count)
 
         # Split namespace functions from current namespace
-        namespace_functions = {
-            k: v for k, v in setup_ctx.namespace.items() if callable(v)
-        }
+        namespace_functions = {k: v for k, v in setup_ctx.namespace.items() if callable(v)}
         for func in namespace_functions:
             setup_ctx.namespace.pop(func)
 
@@ -676,8 +645,12 @@ def _prepare_mp_generate_args(
 
     def _sp_generate(self, context: Context, start: int, end: int) -> Dict[str, List]:
         """
-        Single-process generate product
-        :return:
+        Single-process generate product.
+
+        :param context: Context instance.
+        :param start: Start index.
+        :param end: End index.
+        :return: Generated product data.
         """
         if end - start == 0:
             return {}
@@ -685,241 +658,228 @@ def _sp_generate(self, context: Context, start: int, end: int) -> Dict[str, List
         report_logging = isinstance(context, SetupContext) and context.report_logging
         if report_logging:
             logger.info(f"Process product '{self.statement.name}' with single process")
-        with gen_timer(
-            "generate", report_logging, self.statement.full_name
-        ) as timer_result:
+        with gen_timer("generate", report_logging, self.statement.full_name) as timer_result:
             # Generate product
-            result = _geniter_single_process_generate(
-                (context, self._statement, (start, end))
-            )
-            timer_result["records_count"] = (
-                len(result[self._statement.full_name]) if len(result) > 0 else 0
-            )
+            result = _geniter_single_process_generate((context, self._statement, (start, end)))
+            timer_result["records_count"] = len(result.get(self._statement.full_name, []))
 
         return result
 
     def _mp_page_process(
-        self,
-        setup_ctx: SetupContext,
-        page_size: int,
-        single_process_execute_function: Callable[[Tuple], None],
+        self, setup_ctx: SetupContext, page_size: int, single_process_execute_function: Callable[[Tuple], None]
     ):
         """
-        Multi-process generate and consume product by page
-        :return:
+        Multi-process page generation and consumption of products.
+
+        This method divides the work across multiple processes, each of which generates and consumes
+        products in chunks. After multiprocessing, a post-processing step applies any necessary
+        consumer/exporter operations on the merged results from all processes.
+
+        :param setup_ctx: The setup context instance containing configurations and resources.
+        :param page_size: The page size for each process to handle per batch.
+        :param single_process_execute_function: The function each process will execute.
         """
         exporter_util = setup_ctx.root.class_factory_util.get_exporter_util()
-        with gen_timer(
-            "process", setup_ctx.report_logging, self.statement.full_name
-        ) as timer_result:
-            # Determine count of generate process
+
+        # Start timer to measure entire process duration
+        with gen_timer("process", setup_ctx.report_logging, self.statement.full_name) as timer_result:
+            # 1. Determine the total record count and number of processes
             count = self._determine_count(setup_ctx)
+            num_processes = setup_ctx.num_process or multiprocessing.cpu_count()
 
             timer_result["records_count"] = count
 
-            # Determine number of processes
-            num_processes = setup_ctx.num_process or multiprocessing.cpu_count()
-
-            # Prepare args list of mp function
+            # 2. Prepare arguments for each process based on count, process count, and page size
             arg_list = self._prepare_mp_generate_args(
-                setup_ctx,
-                single_process_execute_function,
-                count,
-                num_processes,
-                page_size,
+                setup_ctx, single_process_execute_function, count, num_processes, page_size
             )
 
-            logger.info(
-                f"Start processing {count} products with {num_processes} processes, chunks: {[args[2] for args in arg_list]}"
-            )
-
-            # Create list of post-consumer after mp process, like MemstoreConsumer, TestResultExporterConsumer
-            post_exporter_list_str = []
+            # Debug log the chunks each process will handle
+            chunk_info = [args[2] for args in arg_list]
+            logger.debug(f"Prepared argument list for multiprocessing with chunks: {chunk_info}")
+            # 3. Initialize any required post-process consumers, e.g., for testing or memory storage
+            post_consumer_list = []
             if setup_ctx.test_mode:
-                post_exporter_list_str.append(EXPORTER_TEST_RESULT_EXPORTER)
-            post_exporter_list_str.extend(
-                list(
-                    filter(
-                        lambda consumer_str: setup_ctx.memstore_manager.contain(
-                            consumer_str
-                        ),
-                        self.statement.targets,
-                    )
+                post_consumer_list.append(EXPORTER_TEST_RESULT_EXPORTER)
+
+                post_consumer_list.extend(
+                    filter(lambda consumer_str: setup_ctx.memstore_manager.contain(consumer_str),
+                           self.statement.targets)
                 )
-            )
-            post_exporter_list = []
-            for exporter_str in post_exporter_list_str:
-                exporter = exporter_util.get_exporter_by_name(setup_ctx, exporter_str)
-                if exporter is not None:
-                    post_exporter_list.append(exporter)
 
-            # Apply the wrapper function to each item in arg_list using the Pool
+                # Initialize exporters for each post-process consumer
+                _, post_consumer_list_instances = exporter_util.create_exporter_list(
+                    setup_ctx, post_consumer_list, self.statement.storage_id, self.statement.export_uri)
+                logger.debug(
+                    f"Post-consumer exporters initialized: {[consumer.__class__.__name__ for consumer in post_consumer_list_instances]}"
+                )
+
+            # 4. Run multiprocessing Pool to handle the generation/consumption function for each chunk
             with multiprocessing.Pool(processes=num_processes) as pool:
                 # Collect then merge result
                 mp_result_list = pool.map(_wrapper, arg_list)
 
-            # Consume post consumer
-            if len(post_exporter_list) > 0:
+            # 5. Post-processing with consumer consumption for merged results across processes
+            if post_consumer_list_instances:
+                logger.debug("Processing merged results with post-consumers.")
+
                 for mp_result in mp_result_list:
-                    for key, value in mp_result.items():
-                        for consumer in post_exporter_list:
-                            consumer.consume((key, value))
+                        for key, value in mp_result.items():
+                            for consumer in post_consumer_list_instances:
+                                logger.debug(f"Consuming result for {key} with {consumer.__class__.__name__}")
+                                consumer.consume((key, value))
 
-            del mp_result_list
-            gc.collect()
+            # 6. Clean up and finalize
+            del mp_result_list  # Free up memory from the merged results
+            # gc.collect()
 
     def _calculate_default_page_size(self, entity_count: int) -> int:
         """
-        Calculate default page size for processing by page
-        :param entity_count:
-        :return:
+        Calculate default page size for processing by page.
+
+        :param entity_count: Total number of entities.
+        :return: Page size.
         """
         stmt_page_size = self.statement.page_size
         # Return page size if defined in statement explicitly
         if stmt_page_size:
-            logger.info(f"Using page size {stmt_page_size} defined in statement")
+            logger.warning(f"Using low page size {stmt_page_size} (< 100) may cause performance issues")
             if stmt_page_size < 100:
-                logger.warn(
-                    f"Using low page size {stmt_page_size} (< 100) may cause performance issue"
-                )
+                logger.warn(f"Using low page size {stmt_page_size} (< 100) may cause performance issue")
             return stmt_page_size
 
         if entity_count > 10000:
-            # Set default page size as 10.000 if entity count > 10.000
+            # Set default page size as 10,000 if entity count > 10,000
             default_page_size = 10000
         else:
-            # Set default page size as entity count if entity count <= 10.000
+            # Set default page size as entity count if entity count <= 10,000
             default_page_size = entity_count
 
         # Reduce default page size if column count > 25
-        col_count = len(self.statement.sub_statements)
+        col_count = len(
+            [
+                stmt
+                for stmt in self.statement.sub_statements
+                if isinstance(
+                stmt,
+                (
+                    ArrayStatement,
+                    ElementStatement,
+                    ItemStatement,
+                    IfStatement,
+                    KeyStatement,
+                    ListStatement,
+                    NestedKeyStatement,
+                ),
+            )
+            ]
+        )
         if col_count > 25:
             reduction_factor = col_count / 25
             default_page_size = int(default_page_size / reduction_factor)
 
         # Log calculated default page size
         default_page_size = max(1, default_page_size)
-        logger.info(
-            f"Using default page size {default_page_size} for processing by page"
-        )
+        logger.info(f"Using default page size {default_page_size} for processing by page")
 
         return default_page_size
 
-    def execute(self, context: Context) -> Dict[str, List] | None:
+    @staticmethod
+    def _scan_data_source(ctx: SetupContext, statement: Statement) -> None:
         """
-        Execute generate task. If gen_stmt is inner, return generated product, otherwise consume them.
-        :param context:
-        :return:
+        Scan data source and set data source length.
+
+        :param ctx: SetupContext instance.
+        :param statement: Statement instance.
+        :return: None
         """
-        try:
-            self.pre_execute(context)
+        # 1. Scan statement
+        DataSourceUtil.set_data_source_length(ctx, statement)
+        # 2. Scan sub-statement
+        if isinstance(statement, CompositeStatement):
+            for child_stmt in statement.sub_statements:
+                GenerateTask._scan_data_source(ctx, child_stmt)
 
-            # Determine count of generate process
-            count = self._determine_count(context)
+    def execute(self, context: SetupContext) -> Dict[str, List] | None:
+        """
+        Execute generate task. If gen_stmt is inner, return generated product; otherwise, consume them.
 
-            if count == 0:
-                return {self.statement.full_name: []}
+        :param context: Context instance.
+        :return: Generated product data or None.
+        """
+        self.pre_execute(context)
 
-            page_size = self._calculate_default_page_size(count)
+        # Determine count of generate process
+        count = self._determine_count(context)
 
-            task_util_cls = context.root.class_factory_util.get_task_util_cls()
+        if count == 0:
+            return {self.statement.full_name: []}
 
-            # Generate and consume if gen_stmt is outermost (which has context as SetupContext)
-            if isinstance(context, SetupContext):
-                exporter_util = context.root.class_factory_util.get_exporter_util()
-                consumer_with_operations, _ = exporter_util.create_exporter_list(
-                    context, self.statement
-                )
+        page_size = self._calculate_default_page_size(count)
 
-                # Switch to non mp if there is MongoDB delete operation
-                has_mongodb_delete = any(
-                    [
-                        operation == "delete" and isinstance(consumer, MongoDBExporter)
-                        for consumer, operation in consumer_with_operations
-                    ]
-                )
-                match self.statement.multiprocessing:
-                    case None:
-                        use_mp = (not has_mongodb_delete) and context.use_mp
-                    case _:
-                        use_mp = bool(self.statement.multiprocessing)
-
-                # Generate in mp
-                if use_mp:
-                    # IMPORTANT: always use deep copied setup_ctx for mp to avoid modify original setup_ctx accidentally
-                    copied_ctx = copy.deepcopy(context)
-                    # Process data by page
-                    logger.info(
-                        f"Processing by page with size {page_size} for '{self.statement.name}'"
-                    )
-                    self._mp_page_process(
-                        copied_ctx,
-                        page_size,
-                        _geniter_single_process_generate_and_consume_by_page,
-                    )
-                    task_util_cls.consume_minio_after_page_processing(
-                        self.statement, context
-                    )
-                # Generate and consume in sp
-                else:
-                    # Process data by page in single process
-                    index_chunk = self._get_chunk_indices(page_size, count)
+        task_util_cls = context.root.class_factory_util.get_task_util_cls()
+
+        # Generate and consume if gen_stmt is outermost (which has context as SetupContext)
+        if isinstance(context, SetupContext):
+            exporter_util = context.root.class_factory_util.get_exporter_util()
+            consumer_with_operations, consumer_without_operations = exporter_util.create_exporter_list(
+                setup_context=context,
+                consumer_str_list=list(self.statement.targets),
+                storage_type=self.statement.storage_id,
+                target_uri=self.statement.export_uri,
+            )
+            # Check for conditions to use multiprocessing
+            has_mongodb_delete = any(
+                [
+                    operation == "delete" and isinstance(consumer, MongoDBExporter)
+                    for consumer, operation in consumer_with_operations
+                ]
+            )
+            match self.statement.multiprocessing:
+                case None:
+                    use_mp = (not has_mongodb_delete) and context.use_mp
+                case _:
+                    use_mp = bool(self.statement.multiprocessing)
+
+            # Generate in multiprocessing
+            if use_mp:
+                # IMPORTANT: always use deep copied setup_ctx for mp to avoid modify original setup_ctx accidentally
+                copied_ctx = copy.deepcopy(context)
+                # Process data by page
+                logger.info(f"Processing by page with size {page_size} for '{self.statement.name}'")
+                self._mp_page_process(copied_ctx, page_size, _geniter_single_process_generate_and_consume_by_page)
+                task_util_cls.consume_minio_after_page_processing(self.statement, context)
+            # Generate and consume in single process
+            else:
+                # Process data by page in single process
+                index_chunk = self._get_chunk_indices(page_size, count)
+                logger.info(f"Processing {len(index_chunk)} pages for {count} products of '{self.statement.name}'")
+                for page_index, page_tuple in enumerate(index_chunk):
+                    start, end = page_tuple
                     logger.info(
-                        f"Processing {len(index_chunk)} pages for {count} products of '{self.statement.name}'"
-                    )
-                    for page_index, page_tuple in enumerate(index_chunk):
-                        start, end = page_tuple
-                        logger.info(
-                            f"Processing {end - start} product '{self.statement.name}' on page {page_index + 1}/{len(index_chunk)} in a single process"
-                        )
-                        # Generate product
-                        result = self._sp_generate(context, start, end)
-                        # Consume by page
-                        _consume_by_page(
-                            self.statement,
-                            context,
-                            result,
-                            page_index,
-                            page_size,
-                            None,
-                            None,
-                        )
-
-                        # Manual garbage collection to free memory
-                        del result
-                        gc.collect()
-                    # Consume MinioConsumer
-                    task_util_cls.consume_minio_after_page_processing(
-                        self.statement, context
+                        f"Processing {end - start} product '{self.statement.name}' on page {page_index + 1}/{len(index_chunk)} in a single process"
                     )
+                    # Generate product
+                    result = self._sp_generate(context, start, end)
+                    # Consume by page
+                    _consume_by_page(self.statement, context, result, page_index, page_size, None, None)
 
-            # Just return product generated in sp if gen_stmt is inner one
-            else:
-                # Do not apply process by page for inner gen_stmt
-                return self._sp_generate(context, 0, count)
-        finally:
-            # Attempt to delete the temporary result directory
-            try:
-                result_temp_dir = (
-                    context.root.descriptor_dir / f"temp_result_{context.root.task_id}"
-                )
-                shutil.rmtree(result_temp_dir)
-                logger.debug(
-                    f"Processing temp result directory '{result_temp_dir}' has been removed successfully."
-                )
-            except FileNotFoundError:
-                logger.debug(
-                    f"Processing temp result directory has been removed successfully."
-                )
-            except Exception as e:
-                logger.error(f"Failed to remove Processing temp result directory : {e}")
+                    # Manual garbage collection to free memory
+                    del result
+                    # gc.collect()
+
+        # Just return product generated in single process if gen_stmt is inner one
+        else:
+            # Do not apply process by page for inner gen_stmt
+            return self._sp_generate(context, 0, count)
 
     @staticmethod
     def convert_xml_dict_to_json_dict(xml_dict: Dict):
         """
-        Convert XML dict with #text and @attribute to pure JSON dict
-        :param xml_dict:
-        :return:
+        Convert XML dict with #text and @attribute to pure JSON dict.
+
+        :param xml_dict: XML dictionary.
+        :return: JSON dictionary.
         """
         if "#text" in xml_dict:
             return xml_dict["#text"]
@@ -930,10 +890,7 @@ def convert_xml_dict_to_json_dict(xml_dict: Dict):
                     res[key] = GenerateTask.convert_xml_dict_to_json_dict(value)
                 elif isinstance(value, list):
                     res[key] = [
-                        GenerateTask.convert_xml_dict_to_json_dict(v)
-                        if isinstance(v, dict)
-                        else v
-                        for v in value
+                        GenerateTask.convert_xml_dict_to_json_dict(v) if isinstance(v, dict) else v for v in value
                     ]
                 else:
                     res[key] = value
@@ -942,48 +899,71 @@ def convert_xml_dict_to_json_dict(xml_dict: Dict):
     @staticmethod
     def _get_chunk_indices(chunk_size: int, data_count: int) -> List:
         """
-        Create list of chunk indices based on chunk size and required data count
-        :param chunk_size:
-        :param data_count:
-        :return:
-        """
-        return [
-            (i, min(i + chunk_size, data_count))
-            for i in range(0, data_count, chunk_size)
-        ]
+        Create list of chunk indices based on chunk size and required data count.
 
-    def _scan_data_source(self, ctx: SetupContext, statement: Statement) -> None:
-        # 1. Scan statement
-        self._class_factory_util.get_datasource_util_cls().set_data_source_length(
-            ctx, statement
-        )
-        # 2. Scan sub-statement
-        if isinstance(statement, CompositeStatement):
-            for child_stmt in statement.sub_statements:
-                self._scan_data_source(ctx, child_stmt)
+        :param chunk_size: Size of each chunk.
+        :param data_count: Total data count.
+        :return: List of tuples representing chunk indices.
+        """
+        return [(i, min(i + chunk_size, data_count)) for i in range(0, data_count, chunk_size)]
 
     def pre_execute(self, context: Context):
         """
-        Pre-execute task in sp before mp execution
-        :param context:
-        :return:
+        Pre-execute task in single process before multiprocessing execution.
+
+        :param context: Context instance.
+        :return: None
         """
         root_context = context.root
+        from datamimic.tasks.task_util import TaskUtil
 
-        task_util_cls = root_context.class_factory_util.get_task_util_cls()
-        # Execute pre-tasks
         pre_tasks = [
-            task_util_cls.get_task_by_statement(root_context, child_stmt, None)
+            TaskUtil.get_task_by_statement(root_context, child_stmt, None)
             for child_stmt in self.statement.sub_statements
             if isinstance(child_stmt, KeyStatement)
         ]
         for task in pre_tasks:
             task.pre_execute(context)
 
+    # def _determine_preview_record_len(self) -> int:
+    #     """
+    #     Determine preview record length.
+    #
+    #     :return: Preview record length.
+    #     """
+    #     num_of_field = len(
+    #         [
+    #             stmt
+    #             for stmt in self.statement.sub_statements
+    #             if isinstance(
+    #             stmt,
+    #             (
+    #                 ArrayStatement,
+    #                 ElementStatement,
+    #                 ItemStatement,
+    #                 IfStatement,
+    #                 KeyStatement,
+    #                 ListStatement,
+    #                 NestedKeyStatement,
+    #             ),
+    #         )
+    #         ]
+    #     )
+    #
+    #     # Determine max_data_len based on max_preview_data and num_of_fields
+    #     default_max_preview_data = settings.LOGGER_REDIS_MAX_DATA
+    #     max_data_len = default_max_preview_data
+    #     if default_max_preview_data >= 500 and num_of_field >= 20:
+    #         max_data_len = 400
+    #     if default_max_preview_data >= 100 and num_of_field >= 40:
+    #         max_data_len = 100
+    #     if default_max_preview_data >= 10 and num_of_field >= 100:
+    #         max_data_len = 10
+    #
+    #     return max_data_len
+
     @staticmethod
-    def execute_include(
-        setup_stmt: SetupStatement, parent_context: GenIterContext
-    ) -> None:
+    def execute_include(setup_stmt: SetupStatement, parent_context: GenIterContext) -> None:
         """
         Execute include XML model inside <generate>
         :param setup_stmt:
@@ -1003,3 +983,39 @@ def execute_include(
         for stmt in setup_stmt.sub_statements:
             task = task_util_cls.get_task_by_statement(root_context, stmt)
             task.execute(root_context)
+
+    @staticmethod
+    def _finalize_and_export_consumers(context: Context, stmt: GenerateStatement):
+        """
+        Finalize chunks and export data for all consumers that require it.
+
+        :param context: Context instance.
+        :param stmt: GenerateStatement instance.
+        :return: None
+        """
+        # Create list of consumers that need to finalize and export
+        consumers_with_operation, consumers_without_operation = ExporterUtil.create_exporter_list(
+            setup_context=context.root,
+            consumer_str_list=list(stmt.targets),
+            storage_type=stmt.storage_id,
+            target_uri=stmt.export_uri,
+        )
+
+        # Combine all consumers
+        all_consumers = [consumer for consumer, _ in consumers_with_operation] + consumers_without_operation
+
+        for consumer in all_consumers:
+            # Only finalize and export if the exporter has these methods
+            try:
+                # Construct the export name to include task_id
+                export_name = f"{context.root.task_id}/{stmt.name}"
+
+                consumer.finalize_chunks() if hasattr(consumer, "finalize_chunks") else None
+                (
+                    consumer.upload_to_storage(bucket=stmt.bucket or stmt.container, name=export_name)
+                    if hasattr(consumer, "upload_to_storage")
+                    else None
+                )
+            except Exception as e:
+                logger.error(f"Error finalizing and exporting data for {consumer}: {e}")
+
diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py
index 1160cf4..e1c2262 100644
--- a/datamimic_ce/tasks/task_util.py
+++ b/datamimic_ce/tasks/task_util.py
@@ -9,28 +9,23 @@
 
 from datamimic_ce.clients.mongodb_client import MongoDBClient
 from datamimic_ce.clients.rdbms_client import RdbmsClient
-from datamimic_ce.constants.exporter_constants import \
-    EXPORTER_TEST_RESULT_EXPORTER
+from datamimic_ce.constants.exporter_constants import EXPORTER_TEST_RESULT_EXPORTER
 from datamimic_ce.contexts.context import Context
 from datamimic_ce.contexts.setup_context import SetupContext
 from datamimic_ce.converter.append_converter import AppendConverter
 from datamimic_ce.converter.converter import Converter
 from datamimic_ce.converter.cut_length_converter import CutLengthConverter
-from datamimic_ce.converter.date2timestamp_converter import \
-    Date2TimestampConverter
+from datamimic_ce.converter.date2timestamp_converter import Date2TimestampConverter
 from datamimic_ce.converter.date_format_converter import DateFormatConverter
 from datamimic_ce.converter.hash_converter import HashConverter
 from datamimic_ce.converter.java_hash_converter import JavaHashConverter
 from datamimic_ce.converter.lower_case_converter import LowerCaseConverter
 from datamimic_ce.converter.mask_converter import MaskConverter
 from datamimic_ce.converter.middle_mask_converter import MiddleMaskConverter
-from datamimic_ce.converter.remove_none_or_empty_element_converter import \
-    RemoveNoneOrEmptyElementConverter
-from datamimic_ce.converter.timestamp2date_converter import \
-    Timestamp2DateConverter
+from datamimic_ce.converter.remove_none_or_empty_element_converter import RemoveNoneOrEmptyElementConverter
+from datamimic_ce.converter.timestamp2date_converter import Timestamp2DateConverter
 from datamimic_ce.converter.upper_case_converter import UpperCaseConverter
-from datamimic_ce.data_sources.data_source_pagination import \
-    DataSourcePagination
+from datamimic_ce.data_sources.data_source_pagination import DataSourcePagination
 from datamimic_ce.enums.converter_enums import ConverterEnum
 from datamimic_ce.exporters.csv_exporter import CSVExporter
 from datamimic_ce.exporters.exporter_util import ExporterUtil
@@ -69,12 +64,15 @@
 from datamimic_ce.tasks.else_if_task import ElseIfTask
 from datamimic_ce.tasks.else_task import ElseTask
 from datamimic_ce.tasks.execute_task import ExecuteTask
-from datamimic_ce.tasks.generate_task import (GenerateTask,
-                                              _evaluate_selector_script,
-                                              _load_csv_file, _load_json_file,
-                                              _load_temp_result_file,
-                                              _load_xml_file,
-                                              _pre_consume_product)
+from datamimic_ce.tasks.generate_task import (
+    GenerateTask,
+    _evaluate_selector_script,
+    _load_csv_file,
+    _load_json_file,
+    _load_temp_result_file,
+    _load_xml_file,
+    _pre_consume_product,
+)
 from datamimic_ce.tasks.generator_task import GeneratorTask
 from datamimic_ce.tasks.if_task import IfTask
 from datamimic_ce.tasks.include_task import IncludeTask
@@ -122,9 +120,7 @@ def get_task_by_statement(
         elif isinstance(stmt, ReferenceStatement):
             return ReferenceTask(stmt, pagination)
         elif isinstance(stmt, ListStatement):
-            return ListTask(
-                ctx=ctx, statement=stmt, class_factory_util=class_factory_util
-            )
+            return ListTask(ctx=ctx, statement=stmt, class_factory_util=class_factory_util)
         elif isinstance(stmt, ItemStatement):
             return ItemTask(ctx, stmt, class_factory_util)
         elif isinstance(stmt, IfStatement):
@@ -142,14 +138,10 @@ def get_task_by_statement(
         elif isinstance(stmt, GeneratorStatement):
             return GeneratorTask(stmt)
         else:
-            raise ValueError(
-                f"Cannot created task for statement {stmt.__class__.__name__}"
-            )
+            raise ValueError(f"Cannot created task for statement {stmt.__class__.__name__}")
 
     @staticmethod
-    def evaluate_file_script_template(
-        ctx: Context, datas: Union[Dict, List], prefix: str, suffix: str
-    ):
+    def evaluate_file_script_template(ctx: Context, datas: Union[Dict, List], prefix: str, suffix: str):
         """
         Check value in csv or json file that contain python expression
         then evaluate variables and functions
@@ -159,13 +151,9 @@ def evaluate_file_script_template(
             result = {}
             for key, json_value in datas.items():
                 if isinstance(json_value, Union[Dict, List]):
-                    value = TaskUtil.evaluate_file_script_template(
-                        ctx, json_value, prefix, suffix
-                    )
+                    value = TaskUtil.evaluate_file_script_template(ctx, json_value, prefix, suffix)
                 elif isinstance(json_value, str):
-                    value = TaskUtil._evaluate_script_value(
-                        ctx, json_value, prefix, suffix
-                    )
+                    value = TaskUtil._evaluate_script_value(ctx, json_value, prefix, suffix)
                 else:
                     value = json_value
                 result.update({key: value})
@@ -174,21 +162,11 @@ def evaluate_file_script_template(
             result = []
             for value in datas:
                 if isinstance(value, List):
-                    result.extend(
-                        TaskUtil.evaluate_file_script_template(
-                            ctx, value, prefix, suffix
-                        )
-                    )
+                    result.extend(TaskUtil.evaluate_file_script_template(ctx, value, prefix, suffix))
                 elif isinstance(value, Dict):
-                    result.append(
-                        TaskUtil.evaluate_file_script_template(
-                            ctx, value, prefix, suffix
-                        )
-                    )
+                    result.append(TaskUtil.evaluate_file_script_template(ctx, value, prefix, suffix))
                 elif isinstance(value, str):
-                    result.append(
-                        TaskUtil._evaluate_script_value(ctx, value, prefix, suffix)
-                    )
+                    result.append(TaskUtil._evaluate_script_value(ctx, value, prefix, suffix))
                 else:
                     result.append(value)
             return result
@@ -214,9 +192,7 @@ def _evaluate_script_value(ctx: Context, data: str, prefix: str, suffix: str):
                 match = re.search(r"^{(.*)}$", data)
                 return ctx.evaluate_python_expression(match.group(1))
 
-            return TaskUtil.evaluate_variable_concat_prefix_suffix(
-                ctx, data, prefix, suffix
-            )
+            return TaskUtil.evaluate_variable_concat_prefix_suffix(ctx, data, prefix, suffix)
 
         except Exception as e:
             logger.error(f"Error evaluating script '{data}': {e}")
@@ -279,9 +255,7 @@ def create_converter_list(context: Context, converter_str: str) -> List[Converte
         )
 
     @staticmethod
-    def evaluate_variable_concat_prefix_suffix(
-        context: Context, expr: str, prefix: str, suffix: str
-    ) -> str:
+    def evaluate_variable_concat_prefix_suffix(context: Context, expr: str, prefix: str, suffix: str) -> str:
         """
         Evaluate expression data, replace dynamic variables have prefix and suffix with value
         :param context:
@@ -300,13 +274,7 @@ def evaluate_variable_concat_prefix_suffix(
             return expr
 
         # Evaluate all dynamic variables (this return only string value), e.g. '{my_name} is {my_age} years old'
-        return re.sub(
-            pattern,
-            lambda matched_var: str(
-                context.evaluate_python_expression(matched_var.group(1))
-            ),
-            expr,
-        )
+        return re.sub(pattern, lambda matched_var: str(context.evaluate_python_expression(matched_var.group(1))), expr)
 
     @staticmethod
     def gen_task_load_data_from_source(
@@ -350,10 +318,7 @@ def gen_task_load_data_from_source(
         # Load data from JSON
         elif source_str.endswith(".json"):
             source_data = _load_json_file(
-                root_context.descriptor_dir / source_str,
-                stmt.cyclic,
-                load_start_idx,
-                load_end_idx,
+                root_context.descriptor_dir / source_str, stmt.cyclic, load_start_idx, load_end_idx
             )
             # if sourceScripted then evaluate python expression in json
             if source_scripted:
@@ -362,16 +327,11 @@ def gen_task_load_data_from_source(
                         ctx=context, datas=source_data, prefix=prefix, suffix=suffix
                     )
                 except Exception as e:
-                    logger.debug(
-                        f"Failed to pre-evaluate source script for {stmt.full_name}: {e}"
-                    )
+                    logger.debug(f"Failed to pre-evaluate source script for {stmt.full_name}: {e}")
         # Load data from XML
         elif source_str.endswith(".template.xml"):
             source_data = _load_xml_file(
-                root_context.descriptor_dir / source_str,
-                stmt.cyclic,
-                load_start_idx,
-                load_end_idx,
+                root_context.descriptor_dir / source_str, stmt.cyclic, load_start_idx, load_end_idx
             )
             # if sourceScripted then evaluate python expression in json
             if source_scripted:
@@ -380,9 +340,9 @@ def gen_task_load_data_from_source(
                 )
         # Load data from in-memory memstore
         elif root_context.memstore_manager.contain(source_str):
-            source_data = root_context.memstore_manager.get_memstore(
-                source_str
-            ).get_data_by_type(stmt.type or stmt.name, load_pagination, stmt.cyclic)
+            source_data = root_context.memstore_manager.get_memstore(source_str).get_data_by_type(
+                stmt.type or stmt.name, load_pagination, stmt.cyclic
+            )
         # Load data from client (MongoDB, RDBMS,...)
         elif root_context.clients.get(source_str) is not None:
             client = root_context.clients.get(source_str)
@@ -390,13 +350,9 @@ def gen_task_load_data_from_source(
             if isinstance(client, MongoDBClient):
                 if stmt.selector:
                     selector = _evaluate_selector_script(context, stmt)
-                    source_data = client.get_by_page_with_query(
-                        query=selector, pagination=load_pagination
-                    )
+                    source_data = client.get_by_page_with_query(query=selector, pagination=load_pagination)
                 elif stmt.type:
-                    source_data = client.get_by_page_with_type(
-                        collection_name=stmt.type, pagination=load_pagination
-                    )
+                    source_data = client.get_by_page_with_type(collection_name=stmt.type, pagination=load_pagination)
                 else:
                     raise ValueError(
                         "MongoDB source requires at least attribute 'type', 'selector' or 'iterationSelector'"
@@ -412,52 +368,42 @@ def gen_task_load_data_from_source(
             elif isinstance(client, RdbmsClient):
                 if stmt.selector:
                     selector = _evaluate_selector_script(context, stmt)
-                    source_data = client.get_by_page_with_query(
-                        original_query=selector, pagination=load_pagination
-                    )
+                    source_data = client.get_by_page_with_query(original_query=selector, pagination=load_pagination)
                 else:
-                    source_data = client.get_by_page_with_type(
-                        type=stmt.type or stmt.name, pagination=load_pagination
-                    )
+                    source_data = client.get_by_page_with_type(type=stmt.type or stmt.name, pagination=load_pagination)
             else:
-                raise ValueError(
-                    f"Cannot load data from client: {type(client).__name__}"
-                )
+                raise ValueError(f"Cannot load data from client: {type(client).__name__}")
         else:
             raise ValueError(f"cannot find data source {source_str} for iterate task")
 
         return source_data, build_from_source
 
-    @staticmethod
-    def consume_minio_after_page_processing(stmt, context: Context) -> None:
-        """
-        Load all temp files and consume MinioConsumer
-        :param stmt:
-        :param context:
-        :return:
-        """
-        # Load temp result file
-        result_temp_dir = (
-            context.root.descriptor_dir / f"temp_result_{context.root.task_id}"
-        )
-        consumed_result = _load_temp_result_file(result_temp_dir)
-
-        for stmt_name, product_result in consumed_result.items():
-            # Load current gen_stmt with corresponding targets
-            current_stmt = stmt.retrieve_sub_statement_by_fullname(stmt_name)
-
-            # Get list of MinioConsumer
-            _, consumers_without_operation = ExporterUtil.create_exporter_list(
-                setup_context=context.root, stmt=current_stmt
-            )
-
-            # Preprocess and consume data
-            consumed_result = _pre_consume_product(current_stmt, product_result)
-            for consumer in consumers_without_operation:
-                if isinstance(
-                    consumer, (XMLExporter, JsonExporter, TXTExporter, CSVExporter)
-                ):
-                    consumer.consume(consumed_result)
+    # @staticmethod
+    # def consume_minio_after_page_processing(stmt, context: Context) -> None:
+    #     """
+    #     Load all temp files and consume MinioConsumer
+    #     :param stmt:
+    #     :param context:
+    #     :return:
+    #     """
+    #     # Load temp result file
+    #     result_temp_dir = context.root.descriptor_dir / f"temp_result_{context.root.task_id}"
+    #     consumed_result = _load_temp_result_file(result_temp_dir)
+    #
+    #     for stmt_name, product_result in consumed_result.items():
+    #         # Load current gen_stmt with corresponding targets
+    #         current_stmt = stmt.retrieve_sub_statement_by_fullname(stmt_name)
+    #
+    #         # Get list of MinioConsumer
+    #         _, consumers_without_operation = ExporterUtil.create_exporter_list(
+    #             setup_context=context.root, stmt=current_stmt
+    #         )
+    #
+    #         # Preprocess and consume data
+    #         consumed_result = _pre_consume_product(current_stmt, product_result)
+    #         for consumer in consumers_without_operation:
+    #             if isinstance(consumer, (XMLExporter, JsonExporter, TXTExporter, CSVExporter)):
+    #                 consumer.consume(consumed_result)
 
     @staticmethod
     def consume_product_by_page(
@@ -467,17 +413,16 @@ def consume_product_by_page(
         page_info: Tuple,
     ) -> None:
         """
-        Consume single list of product in generate statement
+        Consume single list of product in generate statement.
 
-        :param root_context:
-        :param stmt:
-        :param xml_result:
-        :return:
+        :param root_context: SetupContext instance.
+        :param stmt: GenerateStatement instance.
+        :param xml_result: List of generated product data.
+        :param page_info: Tuple containing page information.
+        :return: None
         """
         # Convert XML result into JSON result
-        json_result = [
-            GenerateTask.convert_xml_dict_to_json_dict(res) for res in xml_result
-        ]
+        json_result = [GenerateTask.convert_xml_dict_to_json_dict(res) for res in xml_result]
 
         # Wrap product key and value into a tuple
         # for iterate database may have key, value, and other statement attribute info
@@ -488,33 +433,31 @@ def consume_product_by_page(
         consumer_set: Set[str] = stmt.targets
         # Add TestResultConsumer if process is in testing mode
         if root_context.test_mode:
-            # Only add TestResultExporterConsumer if not using multiprocess, otherwise collect all result in the end
+            # Only add TestResultExporterConsumer if not using multiprocessing, otherwise collect all result in the end
             if not root_context.use_mp:
                 consumer_set.add(EXPORTER_TEST_RESULT_EXPORTER)
 
         # 3.2: Consume data
         # dbms consumer can have operation (e.g. mongodb.update), if so consumer is tuple[Consumer, operation]
         exporter_util = root_context.class_factory_util.get_exporter_util()
-        consumers_with_operation, consumers_without_operation = (
-            exporter_util.create_exporter_list(setup_context=root_context, stmt=stmt)
+        consumers_with_operation, consumers_without_operation = exporter_util.create_exporter_list(
+            setup_context=root_context, stmt=stmt
         )
 
         # run consumer with operation first, because some operation may change the product result
         for consumer in consumers_with_operation:
-            temp_consumer, operation = consumer
-            if isinstance(temp_consumer, MongoDBExporter) and operation == "upsert":
-                json_product = temp_consumer.upsert(product=json_product)
-            elif hasattr(temp_consumer, operation):
-                getattr(temp_consumer, operation)(json_product)
+            consumer_obj, operation = consumer
+            if isinstance(consumer_obj, MongoDBExporter) and operation == "upsert":
+                json_product = consumer_obj.upsert(product=json_product)
+            elif hasattr(consumer_obj, operation):
+                getattr(consumer_obj, operation)(json_product)
             else:
-                raise ValueError(f"Consumer not found: {consumer}.{operation}")
+                raise ValueError(f"Consumer does not support operation: {consumer_obj}.{operation}")
 
         for consumer in consumers_without_operation:
-            if isinstance(
-                consumer, (XMLExporter, JsonExporter, TXTExporter, CSVExporter)
-            ):
-                continue
-            # TODO: re-check XMLExporter with page processing
+            if isinstance(consumer, (XMLExporter, JsonExporter, TXTExporter, CSVExporter)):
+                # Specific exporters handle their own consumption
+                consumer.consume(json_product)            # TODO: re-check XMLExporter with page processing
             if isinstance(consumer, XMLExporter):
                 xml_product = _pre_consume_product(stmt, xml_result)
                 consumer.consume(xml_product)
diff --git a/tests_ce/conftest.py b/tests_ce/conftest.py
index 2f99418..06ac491 100644
--- a/tests_ce/conftest.py
+++ b/tests_ce/conftest.py
@@ -18,8 +18,8 @@
 @pytest.fixture
 def mysql_services():
     try:
-        if settings.DATAMIMIC_LIB_ENVIRONMENT == "lib_staging":
-            logger.info("Staging Environment detected, no need to manually activate my sql services")
+        if settings.RUNTIME_ENVIRONMENT == "production":
+            logger.info("Production Environment detected, no need to manually activate my sql services")
             yield None
             return
         else:
diff --git a/tests_ce/integration_tests/consumer_csv/test_csv_consumer.xml b/tests_ce/integration_tests/consumer_csv/test_csv_consumer.xml
index f040b59..4e55161 100644
--- a/tests_ce/integration_tests/consumer_csv/test_csv_consumer.xml
+++ b/tests_ce/integration_tests/consumer_csv/test_csv_consumer.xml
@@ -1,5 +1,5 @@
 <setup multiprocessing="True">
-    <generate name="csv_consumer" count="1000" target="CSV">
+    <generate name="csv_consumer" count="8000" target="CSV">
         <key name="id" generator="IncrementGenerator"/>
         <key name="name" values="'Harry', 'Hermione', 'Ron'"/>
     </generate>
diff --git a/tests_ce/integration_tests/datamimic_demo/j-json/datamimic.xml b/tests_ce/integration_tests/datamimic_demo/j-json/datamimic.xml
index 5fae004..4232aba 100644
--- a/tests_ce/integration_tests/datamimic_demo/j-json/datamimic.xml
+++ b/tests_ce/integration_tests/datamimic_demo/j-json/datamimic.xml
@@ -1,4 +1,4 @@
-<setup defaultDataset="US" defaultLocale="en" defaultSeparator="|">
+<setup multiprocessing="True" defaultDataset="US" defaultLocale="en" defaultSeparator="|">
     <memstore id="mem"/>
 
     <include uri="1_generate.xml"/>
diff --git a/tests_ce/integration_tests/datamimic_demo/p-xml/datamimic.xml b/tests_ce/integration_tests/datamimic_demo/p-xml/datamimic.xml
index d3b9b72..162c541 100644
--- a/tests_ce/integration_tests/datamimic_demo/p-xml/datamimic.xml
+++ b/tests_ce/integration_tests/datamimic_demo/p-xml/datamimic.xml
@@ -1,4 +1,4 @@
-<setup multiprocessing="True" defaultDataset="US" defaultLocale="en" defaultSeparator=";">
+<setup multiprocessing="False" defaultDataset="US" defaultLocale="en" defaultSeparator=";">
     <!--
     Datamimic XML Demo
     ======================================
diff --git a/tests_ce/integration_tests/test_exporters/data/people.json b/tests_ce/integration_tests/test_exporters/data/people.json
new file mode 100644
index 0000000..01c2527
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/data/people.json
@@ -0,0 +1,20 @@
+[
+  {
+    "name":"Alice",
+    "age":23,
+    "notes":""
+  },
+  {
+    "name":"Bob",
+    "age":34,
+    "notes":null
+  },
+  {
+    "name":"Charly",
+    "age":45,
+    "notes":null,
+    "car":{
+      "maker":"Audi"
+    }
+  }
+]
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_exporters/data/products.ent.csv b/tests_ce/integration_tests/test_exporters/data/products.ent.csv
new file mode 100644
index 0000000..c9e6e96
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/data/products.ent.csv
@@ -0,0 +1,4 @@
+ean_code,name,category_id,price,manufacturer,gender
+8000353006386,Limoncello Liqueur,DRNK/ALCO,9.85,Luxardo,male
+3068320018430,Evian 1.0 l,FOOD/CONF,1.95,Danone,female
+8076800000085,le Lasagnette,FOOD/MISC,0.89,Barilla,unisex
diff --git a/tests_ce/integration_tests/test_exporters/multi_json.xml b/tests_ce/integration_tests/test_exporters/multi_json.xml
new file mode 100644
index 0000000..e20641d
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/multi_json.xml
@@ -0,0 +1,56 @@
+<setup multiprocessing="1">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="JSON"
+              pageSize="1000"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="JSON(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special1234" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="10"
+              cyclic="True"
+              target="JSON"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+        <generate name="special999" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="2"
+                  cyclic="True"
+                  target="JSON"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title1" constant="Prisoners"/>
+            <key name="title2" constant="Prisoners"/>
+            <key name="title3" constant="Prisoners"/>
+            <key name="title4" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+        </generate>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml b/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
new file mode 100644
index 0000000..f70d082
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
@@ -0,0 +1,46 @@
+<setup multiprocessing="1">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="OpenSearchBulk"
+              pageSize="1000"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=False)"
+              pageSize="1000"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/script/template_xyz.json b/tests_ce/integration_tests/test_exporters/script/template_xyz.json
new file mode 100644
index 0000000..084d49d
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/script/template_xyz.json
@@ -0,0 +1,32 @@
+[
+  {
+    "type": "party",
+    "partyId": 9000733200,
+    "individual_first_name": "C1",
+    "individual_family_name": "AHMED",
+    "individual_email_address": "c1.ahmed@gmail.com",
+    "party_cust_arrangement_partyrelation": {
+      "name": "party"
+    }
+  },
+  {
+    "type": "party",
+    "partyId": 9000733201,
+    "individual_first_name": "C2",
+    "individual_family_name": "ALI",
+    "individual_email_address": "c2.ali@gmail.com",
+    "party_cust_arrangement_partyrelation": {
+      "name": "party"
+    }
+  },
+  {
+    "type": "party",
+    "partyId": 9000733202,
+    "individual_first_name": "C3",
+    "individual_family_name": "BRUCE",
+    "individual_email_address": "c3.bruce@gmail.com",
+    "party_cust_arrangement_partyrelation": {
+      "name": "party"
+    }
+  }
+]
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_exporters/single_cascaded_cases.xml b/tests_ce/integration_tests/test_exporters/single_cascaded_cases.xml
new file mode 100644
index 0000000..a00992f
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_cascaded_cases.xml
@@ -0,0 +1,30 @@
+<setup multiprocessing="0" defaultSeparator=",">
+    <generate name="p" source="data/products.ent.csv">
+        <generate name="special00001" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="800"
+                  cyclic="True"
+                  target="JSON"
+                  pageSize="100"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+            <key name="ean" script="ean_code"/>
+            <key name="product_name" script="name"/>
+        </generate>
+        <generate name="special00002" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="400"
+                  cyclic="True"
+                  target="JSON"
+                  pageSize="100"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+            <key name="ean" script="ean_code"/>
+            <key name="product_name" script="name"/>
+        </generate>
+    </generate>
+</setup>
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_exporters/single_combine_all.xml b/tests_ce/integration_tests/test_exporters/single_combine_all.xml
new file mode 100644
index 0000000..45b540d
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_combine_all.xml
@@ -0,0 +1,63 @@
+<setup multiprocessing="0">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000),CSV(chunk_size=1000)"
+              pageSize="100"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False),CSV"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="JSON(use_ndjson=False,chunk_size=1),CSV(chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="160"
+              cyclic="True"
+              target="JSON(use_ndjson=True,chunk_size=100),CSV(chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special5" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special6" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="3000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/single_csv.xml b/tests_ce/integration_tests/test_exporters/single_csv.xml
new file mode 100644
index 0000000..8d1465b
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_csv.xml
@@ -0,0 +1,77 @@
+<setup multiprocessing="0">
+    <generate name="special1" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="CSV(chunk_size=1000)"
+              pageSize="100"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="CSV"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="CSV(chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="160"
+              cyclic="True"
+              target="CSV(chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special5" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="CSV(chunk_size=100,delimiter=';')"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special1234" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="10"
+              cyclic="True"
+              target="CSV"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+        <generate name="special999" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="2"
+                  cyclic="True"
+                  target="CSV"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title1" constant="Prisoners"/>
+            <key name="title2" constant="Prisoners"/>
+            <key name="title3" constant="Prisoners"/>
+            <key name="title4" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+        </generate>
+    </generate>
+</setup>
+
diff --git a/tests_ce/integration_tests/test_exporters/single_json.xml b/tests_ce/integration_tests/test_exporters/single_json.xml
new file mode 100644
index 0000000..042c114
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_json.xml
@@ -0,0 +1,63 @@
+<setup multiprocessing="0">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+              pageSize="100"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="JSON(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="160"
+              cyclic="True"
+              target="JSON(use_ndjson=True,chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special5" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special6" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="3000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml b/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
new file mode 100644
index 0000000..5f46a9e
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
@@ -0,0 +1,56 @@
+<setup multiprocessing="0" defaultSeparator=",">
+    <generate name="p" source="data/products.ent.csv">
+        <generate name="special00001" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="2"
+                  cyclic="True"
+                  target="JSONSingle"
+                  pageSize="100"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+            <key name="ean" script="ean_code"/>
+            <key name="product_name" script="name"/>
+            <generate name="special00001_1" source="script/template_xyz.json"
+                      sourceScripted="True"
+                      count="4"
+                      cyclic="True"
+                      target="JSONSingle"
+                      pageSize="100"
+            >
+                <variable name="randomNumberVar" type="int"/>
+                <key name="title" constant="Prisoners"/>
+                <key name="year" constant="2013"/>
+                <key name="ean" script="ean_code"/>
+                <key name="product_name" script="name"/>
+            </generate>
+        </generate>
+        <generate name="special00002" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="4"
+                  cyclic="True"
+                  target="JSONSingle"
+                  pageSize="100"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+            <key name="ean" script="ean_code"/>
+            <key name="product_name" script="name"/>
+        </generate>
+        <generate name="special00003" source="script/template_xyz.json"
+                  sourceScripted="True"
+                  count="4"
+                  cyclic="True"
+                  target="JSONSingle"
+                  pageSize="100"
+        >
+            <variable name="randomNumberVar" type="int"/>
+            <key name="title" constant="Prisoners"/>
+            <key name="year" constant="2013"/>
+            <key name="ean" script="ean_code"/>
+            <key name="product_name" script="name"/>
+        </generate>
+    </generate>
+</setup>
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml b/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml
new file mode 100644
index 0000000..372ab29
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml
@@ -0,0 +1,60 @@
+<setup multiprocessing="0">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=False, chunk_size=1000)"
+              pageSize="1000"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=True, chunk_size=1000)"
+              pageSize="2000"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="3000"
+              cyclic="True"
+              target="OpenSearchBulk(use_ndjson=False,chunk_size=1000)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="$$_action$$" constant="index"/>
+        <key name="$$_index$$" constant="movies"/>
+        <key name="$$_id$$" generator="IncrementGenerator"/>
+        <key name="$$routing$$" constant="12341243"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/single_txt.xml b/tests_ce/integration_tests/test_exporters/single_txt.xml
new file mode 100644
index 0000000..042c114
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_txt.xml
@@ -0,0 +1,63 @@
+<setup multiprocessing="0">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+              pageSize="100"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="JSON(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="160"
+              cyclic="True"
+              target="JSON(use_ndjson=True,chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special5" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special6" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="3000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/single_xml.xml b/tests_ce/integration_tests/test_exporters/single_xml.xml
new file mode 100644
index 0000000..042c114
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/single_xml.xml
@@ -0,0 +1,63 @@
+<setup multiprocessing="0">
+    <generate name="special" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="8000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+              pageSize="100"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special2" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special3" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="16"
+              cyclic="True"
+              target="JSON(use_ndjson=False,chunk_size=1)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special4" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="160"
+              cyclic="True"
+              target="JSON(use_ndjson=True,chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special5" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="1600"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=100)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+    <generate name="special6" source="script/template_xyz.json"
+              sourceScripted="True"
+              count="3000"
+              cyclic="True"
+              target="JSON(use_ndjson=False, chunk_size=1000)"
+    >
+        <variable name="randomNumberVar" type="int"/>
+        <key name="title" constant="Prisoners"/>
+        <key name="year" constant="2013"/>
+    </generate>
+</setup>
diff --git a/tests_ce/integration_tests/test_exporters/test_exporters.py b/tests_ce/integration_tests/test_exporters/test_exporters.py
new file mode 100644
index 0000000..4d47189
--- /dev/null
+++ b/tests_ce/integration_tests/test_exporters/test_exporters.py
@@ -0,0 +1,56 @@
+# DATAMIMIC
+# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
+# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+
+
+from pathlib import Path
+from unittest import skip
+
+from tests.data_mimic_test import DataMimicTest
+
+
+class TestExporter:
+    _test_dir = Path(__file__).resolve().parent
+
+    def test_single_opensearch_bulk(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_opensearch_bulk.xml")
+            test_engine.test_with_timer()
+
+    def test_multi_opensearch_bulk(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="multi_opensearch_bulk.xml")
+            test_engine.test_with_timer()
+
+    def test_single_json(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_json.xml")
+            test_engine.test_with_timer()
+
+    @skip("Race-condition")
+    def test_multi_json(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="multi_json.xml")
+            test_engine.test_with_timer()
+
+    def test_single_csv(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_csv.xml")
+            test_engine.test_with_timer()
+
+    def test_single_combined(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_combine_all.xml")
+            test_engine.test_with_timer()
+
+    def test_single_cascaded_cases(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_cascaded_cases.xml")
+            test_engine.test_with_timer()
+
+    def test_single_json_single_cascaded_cases(self):
+        for i in range(1):
+            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_json_single_cascaded_cases.xml")
+            test_engine.test_with_timer()
diff --git a/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py b/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
index a5cccd4..f519680 100644
--- a/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
+++ b/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
@@ -9,6 +9,8 @@
 import os
 from pathlib import Path
 import pytest
+
+from datamimic_ce.config import settings
 from tests_ce.data_mimic_test import DataMimicTest
 
 
@@ -43,12 +45,12 @@ def test_mongodb_aggregate(self):
         test_engine = DataMimicTest(test_dir=self._test_dir, filename="test_mongodb_aggregate.xml")
         test_engine.test_with_timer()
 
-    @pytest.mark.skipif(os.getenv("DATAMIMIC_LIB_ENVIRONMENT") != "lib_local", reason="Run only on local")
+    @pytest.mark.skipif(settings.RUNTIME_ENVIRONMENT != "development", reason="Run only on local")
     def test_mongodb_local_env(self):
         test_engine = DataMimicTest(test_dir=self._test_dir, filename="test_mongodb_local_env.xml")
         test_engine.test_with_timer()
 
-    @pytest.mark.skipif(os.getenv('DATAMIMIC_LIB_ENVIRONMENT') == 'lib_local', reason="Not run on local")
+    @pytest.mark.skipif(settings.RUNTIME_ENVIRONMENT == "lib_local", reason="Not run on local")
     def test_mongodb_global_env(self):
         test_engine = DataMimicTest(test_dir=self._test_dir, filename="test_mongodb_global_env.xml")
         test_engine.test_with_timer()
diff --git a/tests_ce/integration_tests/test_rdbms/test_rdbms.py b/tests_ce/integration_tests/test_rdbms/test_rdbms.py
index f06079d..554d04f 100644
--- a/tests_ce/integration_tests/test_rdbms/test_rdbms.py
+++ b/tests_ce/integration_tests/test_rdbms/test_rdbms.py
@@ -18,7 +18,7 @@ class TestRdbms:
     _test_dir = Path(__file__).resolve().parent
 
     @pytest.mark.skipif(
-        settings.DATAMIMIC_LIB_ENVIRONMENT == "lib_staging",
+        settings.RUNTIME_ENVIRONMENT == "production",
         reason="This test can only test with local postgres credential",
     )
     def test_postgresql_local(self):
@@ -26,7 +26,7 @@ def test_postgresql_local(self):
         test_engine.test_with_timer()
 
     @pytest.mark.skipif(
-        settings.DATAMIMIC_LIB_ENVIRONMENT == "lib_local",
+        settings.RUNTIME_ENVIRONMENT == "development",
         reason="This test can only test with stage postgres credential",
     )
     def test_postgresql_stage(self):
diff --git a/tests_ce/test_exporter_util.py b/tests_ce/test_exporter_util.py
new file mode 100644
index 0000000..49310a0
--- /dev/null
+++ b/tests_ce/test_exporter_util.py
@@ -0,0 +1,117 @@
+import unittest
+
+from datamimic.exporters.exporter_util import ExporterUtil
+
+
+class TestExporterUtil(unittest.TestCase):
+
+    def test_single_function_without_params(self):
+        # Test single function without parameters (dotted name)
+        result = ExporterUtil.parse_function_string("mongodb.delete")
+        expected = [{"function_name": "mongodb.delete", "params": None}]
+        self.assertEqual(result, expected)
+
+    def test_single_function_simple_name(self):
+        # Test single simple function name without parameters
+        result = ExporterUtil.parse_function_string("CSV")
+        expected = [{"function_name": "CSV", "params": None}]
+        self.assertEqual(result, expected)
+
+    def test_multiple_functions_without_params(self):
+        # Test multiple functions without parameters
+        result = ExporterUtil.parse_function_string("CSV, JSON")
+        expected = [
+            {"function_name": "CSV", "params": None},
+            {"function_name": "JSON", "params": None}
+        ]
+        self.assertEqual(result, expected)
+
+    def test_function_with_single_param(self):
+        # Test function with a single keyword parameter
+        result = ExporterUtil.parse_function_string("JSON(chunk_size=2)")
+        expected = [{"function_name": "JSON", "params": {"chunk_size": 2}}]
+        self.assertEqual(result, expected)
+
+    def test_function_with_multiple_params(self):
+        # Test function with multiple parameters
+        result = ExporterUtil.parse_function_string("mongodb.upsert(data={'key': 'value'}, overwrite=True)")
+        expected = [{"function_name": "mongodb.upsert", "params": {"data": {"key": "value"}, "overwrite": True}}]
+        self.assertEqual(result, expected)
+
+    def test_mixed_functions_with_and_without_params(self):
+        # Test multiple functions, some with parameters and some without
+        result = ExporterUtil.parse_function_string("mongodb.update, CSV, JSON(chunk_size=2)")
+        expected = [
+            {"function_name": "mongodb.update", "params": None},
+            {"function_name": "CSV", "params": None},
+            {"function_name": "JSON", "params": {"chunk_size": 2}}
+        ]
+        self.assertEqual(result, expected)
+
+    def test_mongodb_delete_with_complex_param(self):
+        # Test complex nested parameter
+        result = ExporterUtil.parse_function_string("mongodb.delete(criteria={'age': {'$gt': 18}})")
+        expected = [{"function_name": "mongodb.delete", "params": {"criteria": {"age": {"$gt": 18}}}}]
+        self.assertEqual(result, expected)
+
+    def test_dotted_names_without_params(self):
+        # Test multiple dotted names without parameters
+        result = ExporterUtil.parse_function_string("mongodb.find, SQL.load")
+        expected = [
+            {"function_name": "mongodb.find", "params": None},
+            {"function_name": "SQL.load", "params": None}
+        ]
+        self.assertEqual(result, expected)
+
+    def test_function_with_nested_dictionary_param(self):
+        # Test function with nested dictionary parameters
+        result = ExporterUtil.parse_function_string(
+            "mongodb.upsert(document={'id': 1, 'data': {'key': 'value', 'status': 'active'}})")
+        expected = [{"function_name": "mongodb.upsert",
+                     "params": {"document": {"id": 1, "data": {"key": "value", "status": "active"}}}}]
+        self.assertEqual(result, expected)
+
+    def test_unsupported_expression_lambda(self):
+        # Test unsupported lambda expression
+        with self.assertRaises(ValueError):
+            ExporterUtil.parse_function_string("lambda x: x + 1")
+
+    def test_unsupported_expression_arithmetic(self):
+        # Test unsupported arithmetic expression
+        with self.assertRaises(ValueError):
+            ExporterUtil.parse_function_string("1 + 2")
+
+    def test_empty_string(self):
+        # Test empty string input
+        result = ExporterUtil.parse_function_string("")
+        expected = []
+        self.assertEqual(result, expected)
+
+    def test_spaces_and_commas_only(self):
+        # Test spaces and commas only, should return empty
+        result = ExporterUtil.parse_function_string(" , , ")
+        expected = []
+        self.assertEqual(result, expected)
+
+    def test_function_with_non_literal_param(self):
+        # Test function with a non-literal parameter (unsupported)
+        with self.assertRaises(ValueError):
+            ExporterUtil.parse_function_string("JSON(chunk_size=my_variable)")
+
+    def test_function_with_mixed_types(self):
+        # Test function with mixed types in parameters
+        result = ExporterUtil.parse_function_string("JSON(chunk_size=2, enabled=True, name='sample')")
+        expected = [{"function_name": "JSON", "params": {"chunk_size": 2, "enabled": True, "name": "sample"}}]
+        self.assertEqual(result, expected)
+
+    def test_large_nested_data_structure(self):
+        # Test function with a large and complex nested data structure
+        result = ExporterUtil.parse_function_string(
+            "mongodb.upsert(data={'key': {'subkey': [1, 2, {'deepkey': 'deepvalue'}]}})")
+        expected = [{"function_name": "mongodb.upsert",
+                     "params": {"data": {"key": {"subkey": [1, 2, {"deepkey": "deepvalue"}]}}}}]
+        self.assertEqual(result, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/__init__.py b/tests_ce/unit_tests/exporter/__init__.py
new file mode 100644
index 0000000..0a9a85d
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/__init__.py
@@ -0,0 +1,7 @@
+# DATAMIMIC
+# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
+# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+
+
diff --git a/tests_ce/unit_tests/exporter/test_csv_exporter.py b/tests_ce/unit_tests/exporter/test_csv_exporter.py
new file mode 100644
index 0000000..ae82cd4
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/test_csv_exporter.py
@@ -0,0 +1,456 @@
+import csv
+import multiprocessing
+import os
+import tempfile
+import unittest
+import uuid
+from pathlib import Path
+
+from datamimic.exporters.csv_exporter import CSVExporter
+
+
+def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+    """Generate mock data for testing."""
+    return [{
+        "id": f"movie_{i + 1}",
+        "title": f"{title} {i + 1}",
+        "year": year
+    } for i in range(total_records)]
+
+
+class MockSetupContext:
+    def __init__(self, task_id, descriptor_dir):
+        self.task_id = task_id
+        self.descriptor_dir = descriptor_dir
+        self.default_encoding = 'utf-8'
+        self.default_separator = ','
+        self.default_line_separator = '\n'
+        self.use_mp = False
+
+    def get_client_by_id(self, client_id):
+        # Return a dummy client or data, replace MagicMock dependency
+        return {"id": client_id, "data": "mock_client_data"}
+
+
+def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
+    setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
+    setup_context.properties = properties
+    exporter = CSVExporter(
+        setup_context=setup_context,
+        storage_id="minio",
+        target_uri="test_path",
+        chunk_size=1000
+    )
+    exporter._storage = StorageMock()
+    product = ("test_product", data_chunk)
+    exporter.consume(product)
+    exporter.finalize_chunks()
+    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+    shared_storage_list.extend(exporter._storage.write_calls)
+
+
+class StorageMock:
+    """Custom mock storage class to record calls."""
+
+    def __init__(self):
+        self.write_calls = []
+
+    def write(self, bucket, uri, data_buffer, content_type):
+        # Read the raw bytes from the buffer
+        content_bytes = data_buffer.read()
+        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
+        self.write_calls.append({
+            'bucket': bucket,
+            'uri': uri,
+            'content_bytes': content_bytes,  # Store raw bytes
+            'content_type': content_type
+        })
+
+
+class TestCSVExporter(unittest.TestCase):
+    def setUp(self, encoding='utf-8', delimiter=None, quotechar=None, quoting=None, line_terminator=None):
+        """Set up for each test."""
+        self.setup_context = MockSetupContext(task_id="test_task", descriptor_dir="test_dir")
+        self.setup_context.task_id = f"test_task_{uuid.uuid4().hex}"
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        self.tmp_dir_path = Path(self.tmp_dir.name)
+        self.setup_context.descriptor_dir = self.tmp_dir_path
+        self.setup_context.properties = {}
+        self.storage = StorageMock()
+        self.exporter = CSVExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            chunk_size=1000,
+            encoding=encoding,
+            delimiter=delimiter,
+            quotechar=quotechar,
+            quoting=quoting,
+            line_terminator=line_terminator
+        )
+        self.exporter._storage = self.storage
+
+    def tearDown(self):
+        """Clean up temporary directories."""
+        self.tmp_dir.cleanup()
+
+    def test_single_process_chunking(self):
+        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+        original_data = generate_mock_data(3000)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 3)
+
+        total_records_exported = 0
+        for i, write_call in enumerate(self.storage.write_calls):
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.setup_context.task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            lines = content.strip().split('\n')
+            self.assertGreaterEqual(len(lines), 1)  # At least one line (header)
+            total_records_exported += len(lines) - 1  # Subtract header
+
+            # Verify CSV content
+            reader = csv.DictReader(lines, delimiter=self.exporter.delimiter)
+            self.assertEqual(reader.fieldnames, ['id', 'title', 'year'])
+            records = list(reader)
+            self.assertEqual(len(records), 1000)
+
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_custom_delimiter_and_encoding(self):
+        """Test exporting with custom delimiter and encoding."""
+
+        self.setUp(encoding='utf-16', delimiter=';')
+
+        original_data = generate_mock_data(10)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split('\n')
+        self.assertGreaterEqual(len(lines), 1)  # At least one line (header)
+
+        # Verify CSV content with custom delimiter
+        reader = csv.DictReader(lines, delimiter=';')
+        self.assertEqual(reader.fieldnames, ['id', 'title', 'year'])
+        records = list(reader)
+        self.assertEqual(len(records), 10)
+
+    def test_special_characters_in_data(self):
+        """Test exporting data containing delimiters, quotes, and newlines."""
+
+        self.setUp(quoting=csv.QUOTE_ALL, delimiter=';')
+
+        special_data = [
+            {"id": "1", "title": 'Title with, comma', "year": 2020},
+            {"id": "2", "title": 'Title with "quote"', "year": 2021},
+            {"id": "3", "title": 'Title with \n newline', "year": 2022},
+            {"id": "4", "title": 'Title with delimiter; semicolon', "year": 2023},
+        ]
+        product = ("test_product", special_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split('\n')
+
+        # Use csv.DictReader to correctly parse lines with embedded newlines
+        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter,
+                                quotechar=self.exporter.quotechar, quoting=self.exporter.quoting)
+        records = list(reader)
+        self.assertEqual(len(records), 4)  # 4 records
+
+        # Additional Assertions to Verify Content
+        self.assertEqual(records[0]['title'], 'Title with, comma')
+        self.assertEqual(records[1]['title'], 'Title with "quote"')
+        self.assertEqual(records[2]['title'], 'Title with  newline')
+        self.assertEqual(records[3]['title'], 'Title with delimiter; semicolon')
+
+    def test_large_dataset(self):
+        """Test exporting a very large dataset to check performance and memory usage."""
+        total_records = 500_000  # Half a million records
+        self.exporter.chunk_size = 100_000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+        self.assertEqual(len(self.storage.write_calls), expected_chunks)
+
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            lines = content.strip().split('\n')
+            total_records_exported += len(lines) - 1  # Subtract header
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_invalid_data_handling(self):
+        """Test exporting data with invalid data types."""
+        invalid_data = [
+            {"id": "1", "title": "Valid Title", "year": 2020},
+            {"id": "2", "title": "Another Title", "year": "Invalid Year"},  # Year should be an int
+        ]
+        product = ("test_product", invalid_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        lines = content.strip().split('\n')
+        self.assertEqual(len(lines), 3)  # Header + 2 records
+
+        # Verify CSV content
+        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter)
+        records = list(reader)
+        self.assertEqual(len(records), 2)
+        self.assertEqual(records[1]['year'], 'Invalid Year')
+
+    @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
+    def test_multiprocessing_export(self):
+        total_processes = os.cpu_count() or 1
+        total_records_per_process = 5000
+        data = generate_mock_data(total_records_per_process * total_processes)
+        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+                       range(total_processes)]
+
+        manager = multiprocessing.Manager()
+        shared_storage_list = manager.list()
+        processes = []
+        for chunk in data_chunks:
+            p = multiprocessing.Process(
+                target=worker,
+                args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
+                      self.setup_context.properties)
+            )
+            p.start()
+            processes.append(p)
+        for p in processes:
+            p.join()
+
+    def test_empty_records_and_missing_fields(self):
+        """Test exporting data with empty records and missing fields."""
+        data_with_missing_fields = [
+            {"id": "1", "title": "Title 1", "year": 2020},
+            {"id": "2", "title": "Title 2"},  # Missing 'year'
+            {},  # Empty record
+            {"id": "3", "year": 2022},  # Missing 'title'
+        ]
+        product = ("test_product", data_with_missing_fields)
+        self.exporter.fieldnames = ["id", "title", "year"]  # Specify fieldnames to handle missing fields
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(len(self.storage.write_calls), 1)
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split('\n')
+        self.assertEqual(len(lines), 5)  # Header + 4 records
+
+        # Verify CSV content
+        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter)
+        records = list(reader)
+        self.assertEqual(len(records), 4)
+
+        # Additional Assertions
+        self.assertEqual(records[0]['id'], '1')
+        self.assertEqual(records[0]['title'], 'Title 1')
+        self.assertEqual(records[0]['year'], '2020')
+
+        self.assertEqual(records[1]['id'], '2')
+        self.assertEqual(records[1]['title'], 'Title 2')
+        self.assertEqual(records[1]['year'], '')  # Missing 'year'
+
+        self.assertEqual(records[2]['id'], '')
+        self.assertEqual(records[2]['title'], '')
+        self.assertEqual(records[2]['year'], '')  # Empty record
+
+        self.assertEqual(records[3]['id'], '3')
+        self.assertEqual(records[3]['title'], '')  # Missing 'title'
+        self.assertEqual(records[3]['year'], '2022')
+
+    def test_product_with_type(self):
+        """Test exporting data with product type."""
+        data = generate_mock_data(5)
+        product = ("test_product", data, {"type": "test_type"})
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+    def test_consume_invalid_product(self):
+        """Test that consuming an invalid product raises ValueError."""
+        with self.assertRaises(ValueError):
+            self.exporter.consume("invalid_product")
+
+    def test_chunk_rotation_without_remainder(self):
+        """Test exporting data where total records are a multiple of chunk size."""
+        total_records = 5000
+        self.exporter.chunk_size = 1000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(len(self.storage.write_calls), 5)
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            lines = content.strip().split('\n')
+            total_records_exported += len(lines) - 1  # Subtract header
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_chunk_rotation_with_remainder(self):
+        """Test exporting data where total records are not a multiple of chunk size."""
+        total_records = 5500
+        self.exporter.chunk_size = 1000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+        self.assertEqual(len(self.storage.write_calls), expected_chunks)
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            lines = content.strip().split('\n')
+            total_records_exported += len(lines) - 1  # Subtract header
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_no_fieldnames_provided(self):
+        """Test exporting when fieldnames are not provided and need to be inferred."""
+        data = [
+            {"id": "1", "title": "Title 1", "year": 2020},
+            {"id": "2", "title": "Title 2", "year": 2021},
+        ]
+        self.exporter.fieldnames = None  # Ensure fieldnames are not set
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.assertEqual(self.exporter.fieldnames, ['id', 'title', 'year'])  # Fieldnames inferred
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+    def test_export_with_custom_quotechar(self):
+        """Test exporting data with a custom quote character."""
+        self.setup_context.properties = {
+            'quotechar': "'",
+            'quoting': csv.QUOTE_ALL
+        }
+        self.exporter = CSVExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            chunk_size=1000
+        )
+        self.exporter._storage = self.storage
+
+        data = generate_mock_data(5)
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        write_call = self.storage.write_calls[0]
+
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split('\n')
+
+        # Verify CSV content with custom quotechar
+        reader = csv.DictReader(lines, quotechar="'", quoting=csv.QUOTE_ALL)
+        records = list(reader)
+        self.assertEqual(len(records), 5)
+
+    def test_export_with_different_quoting_options(self):
+        """Test exporting data with different quoting options."""
+        for quoting_option in [csv.QUOTE_MINIMAL, csv.QUOTE_NONNUMERIC, csv.QUOTE_NONE]:
+            self.setUp(quoting=quoting_option)
+
+            data = generate_mock_data(5)
+            product = ("test_product", data)
+            self.exporter.consume(product)
+            self.exporter.finalize_chunks()
+            self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+            write_call = self.storage.write_calls[-1]  # Get the last write call
+
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            lines = content.strip().split('\n')
+
+            # Verify CSV content
+            reader = csv.DictReader(lines, quoting=quoting_option)
+            records = list(reader)
+            self.assertEqual(len(records), 5)
+
+    def test_export_with_custom_encoding(self):
+        """Test exporting data with a custom encoding."""
+        self.setUp(encoding='utf-16')
+
+        data = generate_mock_data(10)
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # StorageMock decodes using utf-8
+        # Decode using utf-16 to verify content
+        content = content_bytes.decode('utf-16')
+        lines = content.strip().split('\n')
+
+        reader = csv.DictReader(lines, delimiter=self.exporter.delimiter)
+        records = list(reader)
+        self.assertEqual(len(records), 10)
+
+    def test_export_empty_data_list(self):
+        """Test exporting when data list is empty."""
+        product = ("test_product", [])
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        # Should not write any files
+        self.assertEqual(len(self.storage.write_calls), 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_json_exporter.py b/tests_ce/unit_tests/exporter/test_json_exporter.py
new file mode 100644
index 0000000..814ba4a
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/test_json_exporter.py
@@ -0,0 +1,428 @@
+import json
+import multiprocessing
+import os
+import re
+import tempfile
+import unittest
+import uuid
+from pathlib import Path
+from unittest.mock import MagicMock
+
+from datamimic.exporters.json_exporter import JsonExporter
+
+
+def worker(data_chunk, setup_context, storage_list, chunk_size):
+    """Worker function for multiprocessing."""
+    exporter = JsonExporter(
+        setup_context=setup_context,
+        product_name="test_product",
+        storage_id="minio",
+        target_uri="test_path",
+        use_ndjson=False,
+        chunk_size=chunk_size
+    )
+    exporter._storage = StorageMock(storage_list)
+    exporter.consume(("test_product", data_chunk))
+    exporter.finalize_chunks()
+
+
+def generate_mock_data(total_records=3000, title="Test Title", year=2021):
+    """Generate mock JSON data for testing."""
+    return [{"title": title, "year": year, "id": f"item_{i}"} for i in range(total_records)]
+
+
+class MockSetupContext:
+    def __init__(self, task_id, descriptor_dir):
+        self.task_id = task_id
+        self.descriptor_dir = descriptor_dir
+        self.default_separator = ","
+        self.default_line_separator = "\n"
+        self.default_encoding = "utf-8"
+        self.use_mp = False
+
+    def get_client_by_id(self, client_id):
+        # Return a dummy client or data, replace MagicMock dependency
+        return {"id": client_id, "data": "mock_client_data"}
+
+
+class StorageMock:
+    """Custom mock storage class to record calls across processes."""
+
+    def __init__(self, shared_list):
+        self.shared_list = shared_list
+
+    def write(self, bucket, uri, data_buffer, content_type):
+        content = data_buffer.read().decode('utf-8')
+        self.shared_list.append((bucket, uri, content, content_type))
+        data_buffer.seek(0)  # Reset buffer position
+
+
+class TestJsonExporter(unittest.TestCase):
+    def setUp(self):
+        self.task_id = f"test_task_{uuid.uuid4().hex}"
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        self.tmp_dir_path = Path(self.tmp_dir.name)
+
+        self.setup_context = MockSetupContext(
+            task_id=self.task_id,
+            descriptor_dir=self.tmp_dir_path
+
+        )
+
+        self.storage = MagicMock()
+        self.exporter = JsonExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            use_ndjson=False,
+            chunk_size=1000
+        )
+        self.exporter._storage = self.storage
+
+    def tearDown(self):
+        """Clean up the test directory and reset mocks."""
+        self.tmp_dir.cleanup()
+        self.storage.reset_mock()
+        # self.exporter.reset_instance()
+
+    def test_single_process_chunking(self):
+        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+        product = ("test_product", generate_mock_data(3000))
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(self.storage.write.call_count, 3)
+
+        # Verify each call to storage.write
+        total_records_exported = 0
+        for i, call in enumerate(self.storage.write.call_args_list):
+            args, _ = call
+            bucket, uri, data_buffer, content_type = args
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            self.assertEqual(content_type, "application/json")
+
+            # Read the data buffer
+            data_buffer.seek(0)
+            content = data_buffer.read().decode('utf-8')
+            records = json.loads(content)
+
+            # Verify the number of records in each chunk
+            self.assertEqual(len(records), 1000)
+            total_records_exported += len(records)
+
+            # Optionally, verify the content matches the input data
+            expected_records = product[1][i * 1000:(i + 1) * 1000]
+            self.assertEqual(records, expected_records)
+
+        # Verify total records exported
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_large_data_non_multiple_chunk_size(self):
+        """Test exporting 1,000,001 records with chunk size 100,000 (11 chunks expected)."""
+        total_records = 1_000_001
+        chunk_size = 100_000
+        product = ("test_product", generate_mock_data(total_records))
+        self.exporter.chunk_size = chunk_size
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(self.storage.write.call_count, 11)
+
+        total_records_exported = 0
+        expected_chunk_sizes = [100_000] * 10 + [1]
+        for i, call in enumerate(self.storage.write.call_args_list):
+            args, _ = call
+            bucket, uri, data_buffer, content_type = args
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            self.assertEqual(content_type, "application/json")
+
+            data_buffer.seek(0)
+            content = data_buffer.read().decode('utf-8')
+            records = json.loads(content)
+
+            # Verify chunk sizes
+            self.assertEqual(len(records), expected_chunk_sizes[i])
+            total_records_exported += len(records)
+
+            # Optionally, verify content
+            start_index = i * chunk_size
+            end_index = start_index + expected_chunk_sizes[i]
+            expected_records = product[1][start_index:end_index]
+            self.assertEqual(records, expected_records)
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_zero_records(self):
+        """Test exporting zero records. Expecting no storage writes."""
+        product = ("test_product", [])
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(self.storage.write.call_count, 0)
+
+        # Verify no buffer files were created
+        buffer_files = list(self.tmp_dir_path.glob(f"{self.exporter.product_name}_*.json"))
+        self.assertEqual(len(buffer_files), 0)
+
+    def test_chunk_size_of_one(self):
+        """Test exporting 10 records with chunk size 1. Expecting 10 JSON files."""
+        self.exporter.chunk_size = 1
+        product = ("test_product", generate_mock_data(10))
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(self.storage.write.call_count, 10)
+
+        for i, call in enumerate(self.storage.write.call_args_list):
+            args, _ = call
+            bucket, uri, data_buffer, content_type = args
+            self.assertEqual(bucket, "test_bucket")
+            # self.assertTrue(uri.endswith(f"{i + 1}_pid_.json"))
+            self.assertTrue(re.search(rf"{i + 1}.json$", uri))
+
+            self.assertEqual(content_type, "application/json")
+
+            data_buffer.seek(0)
+            content = data_buffer.read().decode('utf-8')
+            record = json.loads(content)
+
+            # Verify that only one record is in the chunk
+            self.assertIsInstance(record, dict)
+            self.assertEqual(record, product[1][i])
+
+    def test_json_format_output(self):
+        """Test JSON formatting in exported content."""
+        product = ("test_product", generate_mock_data(5))
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(self.storage.write.call_count, 1)
+        call = self.storage.write.call_args
+        args, _ = call
+        bucket, uri, data_buffer, content_type = args
+
+        data_buffer.seek(0)
+        content = data_buffer.read().decode('utf-8')
+        records = json.loads(content)
+
+        self.assertIsInstance(records, list)
+        self.assertEqual(len(records), 5)
+        for record in records:
+            self.assertIsInstance(record, dict)
+        self.assertEqual(records, product[1])
+
+    def test_invalid_chunk_size(self):
+        """Test initializing exporter with invalid chunk size (zero or negative). Expecting ValueError."""
+        with self.assertRaises(ValueError) as context_zero:
+            JsonExporter(
+                setup_context=self.setup_context,
+                product_name="test_product",
+                storage_id="minio",
+                target_uri="test_path",
+                use_ndjson=False,
+                chunk_size=0
+            )
+        self.assertIn("Chunk size must be a positive integer", str(context_zero.exception))
+
+        with self.assertRaises(ValueError) as context_negative:
+            JsonExporter(
+                setup_context=self.setup_context,
+                product_name="test_product",
+                storage_id="minio",
+                target_uri="test_path",
+                use_ndjson=False,
+                chunk_size=-5
+            )
+        self.assertIn("Chunk size must be a positive integer", str(context_negative.exception))
+
+    def test_ndjson_output(self):
+        """Test exporting data with use_ndjson=True produces correct NDJSON format."""
+        # Reinitialize the exporter with use_ndjson=True
+        self.exporter = JsonExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            use_ndjson=True,
+            chunk_size=1000
+        )
+        self.exporter._storage = self.storage
+
+        product = ("test_product", generate_mock_data(5))
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(self.storage.write.call_count, 1)
+        call = self.storage.write.call_args
+        args, _ = call
+        bucket, uri, data_buffer, content_type = args
+
+        data_buffer.seek(0)
+        content = data_buffer.read().decode('utf-8')
+        lines = content.strip().split('\n')
+
+        # Verify that each line is a valid JSON object
+        self.assertEqual(len(lines), 5)
+        for line, expected_record in zip(lines, product[1]):
+            record = json.loads(line)
+            self.assertIsInstance(record, dict)
+            self.assertEqual(record, expected_record)
+
+        # Verify content type
+        self.assertEqual(content_type, "application/x-ndjson")
+
+    def test_non_serializable_data(self):
+        """Test exporter raises exception when data contains non-serializable objects."""
+        from datetime import datetime
+
+        # Generate data with a datetime object, which is not JSON serializable by default
+        data = [{"id": 1, "timestamp": datetime.now()}]
+        product = ("test_product", data)
+
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+    def test_unlimited_chunk_size(self):
+        """Test exporting data with chunk_size=None exports all data in a single chunk."""
+        # Reinitialize the exporter with chunk_size=None
+        self.exporter = JsonExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            use_ndjson=False,
+            chunk_size=None
+        )
+        self.exporter._storage = self.storage
+
+        total_records = 2500
+        product = ("test_product", generate_mock_data(total_records), {"type": "test_type"})
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        # Expecting only one write call
+        self.assertEqual(self.storage.write.call_count, 1)
+
+        # Verify that all data is in the single chunk
+        call = self.storage.write.call_args
+        args, _ = call
+        bucket, uri, data_buffer, content_type = args
+
+        data_buffer.seek(0)
+        content = data_buffer.read().decode('utf-8')
+        records = json.loads(content)
+        self.assertEqual(len(records), total_records)
+        self.assertEqual(records, product[1])
+
+    def test_data_with_special_characters(self):
+        """Test exporting data containing special and Unicode characters."""
+        special_characters_data = [
+            {"id": 1, "text": "Hello, world!"},
+            {"id": 2, "text": "Special chars: !@#$%^&*()_+-=[]{}|;':,./<>?"},
+            {"id": 3, "text": "Unicode: Привет мир, こんにちは世界, 안녕하세요 세계"}
+        ]
+        product = ("test_product", special_characters_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(self.storage.write.call_count, 1)
+        call = self.storage.write.call_args
+        args, _ = call
+        bucket, uri, data_buffer, content_type = args
+
+        data_buffer.seek(0)
+        content = data_buffer.read().decode('utf-8')
+        records = json.loads(content)
+        self.assertEqual(records, special_characters_data)
+
+    def test_exporting_empty_dictionaries(self):
+        """Test exporting a list of empty dictionaries."""
+        empty_dicts_data = [{} for _ in range(5)]
+        product = ("test_product", empty_dicts_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(self.storage.write.call_count, 1)
+        call = self.storage.write.call_args
+        args, _ = call
+        bucket, uri, data_buffer, content_type = args
+
+        data_buffer.seek(0)
+        content = data_buffer.read().decode('utf-8')
+        records = json.loads(content)
+        self.assertEqual(records, empty_dicts_data)
+
+    def test_exception_during_file_write(self):
+        """Test exporter handles exceptions during file writing gracefully."""
+        product = ("test_product", generate_mock_data(5))
+
+        # Mock the open method to raise an IOError
+        original_open = Path.open
+
+        def mock_open(*args, **kwargs):
+            raise IOError("Simulated file write error")
+
+        Path.open = mock_open
+
+        try:
+            with self.assertRaises(IOError) as context:
+                self.exporter.consume(product)
+            self.assertIn("Simulated file write error", str(context.exception))
+        finally:
+            # Restore the original open method
+            Path.open = original_open
+
+    @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
+    def test_multiprocessing_export_with_multiple_child_processes(self):
+        """Test exporter works correctly with multiple child processes."""
+        total_records = 3000
+        chunk_size = 1000
+        data = generate_mock_data(total_records)
+        data_chunks = [data[i:i + chunk_size] for i in range(0, total_records, chunk_size)]
+
+        manager = multiprocessing.Manager()
+        shared_storage_list = manager.list()
+
+        processes = []
+        for chunk in data_chunks:
+            p = multiprocessing.Process(target=worker,
+                                        args=(chunk, self.setup_context, shared_storage_list, chunk_size))
+            p.start()
+            processes.append(p)
+        for p in processes:
+            p.join()
+
+        # Parent process finalizes and uploads
+        exporter = JsonExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            use_ndjson=False,
+            chunk_size=chunk_size
+        )
+        exporter._storage = StorageMock(shared_storage_list)
+        exporter.finalize_chunks()
+        exporter.upload_to_storage(bucket="test_bucket", name="test_product")
+
+        # Verify that storage writes include data from all child processes
+        self.assertEqual(len(shared_storage_list), len(data_chunks))
+        total_records_exported = 0
+        for entry in shared_storage_list:
+            bucket, uri, content, content_type = entry
+            records = json.loads(content)
+            total_records_exported += len(records)
+        self.assertEqual(total_records_exported, total_records)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py b/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py
new file mode 100644
index 0000000..5244014
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py
@@ -0,0 +1,371 @@
+import json
+import multiprocessing
+import os
+import tempfile
+import unittest
+import uuid
+from pathlib import Path
+
+from datamimic.exporters.open_search_bulk_exporter import OpenSearchBulkExporter
+
+
+def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+    """Generate mock data for testing."""
+    return [{
+        "$$_action$$": "index",
+        "$$_index$$": "movies",
+        "$$_id$$": f"movie_{i + 1}",
+        "$$_routing$$": "1234",
+        "title": title,
+        "year": year
+    } for i in range(total_records)]
+
+
+class MockSetupContext:
+    def __init__(self, task_id, descriptor_dir):
+        self.task_id = task_id
+        self.descriptor_dir = descriptor_dir
+        self.default_separator = ","
+        self.default_line_separator = "\n"
+        self.default_encoding = "utf-8"
+        self.use_mp = False
+
+    def get_client_by_id(self, client_id):
+        # Return a dummy client or data, replace MagicMock dependency
+        return {"id": client_id, "data": "mock_client_data"}
+
+
+class StorageMock:
+    """Custom mock storage class to record calls across processes."""
+
+    def __init__(self):
+        self.write_calls = []
+
+    def write(self, bucket, uri, data_buffer, content_type):
+        content = data_buffer.read().decode('utf-8')
+        data_buffer.seek(0)
+        self.write_calls.append({
+            'bucket': bucket,
+            'uri': uri,
+            'content': content,
+            'content_type': content_type
+        })
+
+
+def worker(data_chunk, shared_storage_list, chunk_size):
+    setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir=Path("/tmp"))
+    exporter = OpenSearchBulkExporter(
+        setup_context=setup_context,
+        product_name="test_product",
+        storage_id="minio",
+        target_uri="test_path",
+        use_ndjson=True,
+        chunk_size=chunk_size
+    )
+    exporter._storage = StorageMock()
+    product = ("test_product", data_chunk)
+    exporter.consume(product)
+    exporter.finalize_chunks()
+    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+    shared_storage_list.extend(exporter._storage.write_calls)
+    print(f"Worker finished processing chunk. Total chunks: {len(exporter._storage.write_calls)}")
+
+
+class TestOpenSearchBulkExporter(unittest.TestCase):
+    def setUp(self, use_ndjson=True, chunk_size=1000):
+        """Set up a single exporter instance with real temporary paths and mocked storage."""
+        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir=Path("/tmp"))
+        # Unique task ID for each test instance to avoid conflicts
+        self.setup_context.task_id = f"test_task_{uuid.uuid4().hex}"
+
+        # Use tempfile to create a unique directory for each test
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        self.tmp_dir_path = Path(self.tmp_dir.name)
+        self.tmp_dir_path.mkdir(parents=True, exist_ok=True)
+        self.setup_context.descriptor_dir = self.tmp_dir_path
+
+        # Fresh storage mock for each test
+        self.storage = StorageMock()
+        self.exporter = OpenSearchBulkExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            use_ndjson=use_ndjson,
+            chunk_size=chunk_size
+        )
+        self.exporter._storage = self.storage
+
+    def tearDown(self):
+        """Clean up the test directory and reset mocks."""
+        self.tmp_dir.cleanup()
+        # No need to reset instance as singleton pattern is removed
+
+    def test_single_process_chunking(self):
+        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+        original_data = generate_mock_data(3000)
+        # Make a deep copy of the data for verification
+        import copy
+        verification_data = copy.deepcopy(original_data)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 3)
+
+        total_records_exported = 0
+        for i, write_call in enumerate(self.storage.write_calls):
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content = write_call['content']
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            lines = content.strip().split('\n')
+            self.assertEqual(len(lines), 2000)  # 1000 records * 2 lines per record
+            records = []
+            for j in range(0, len(lines), 2):
+                action_meta = json.loads(lines[j])
+                document_line = json.loads(lines[j + 1])
+                records.append((action_meta, document_line))
+
+            self.assertEqual(len(records), 1000)
+            total_records_exported += len(records)
+
+            # Verify content matches input data
+            start_index = i * self.exporter.chunk_size
+            end_index = start_index + self.exporter.chunk_size
+            expected_data = verification_data[start_index:end_index]
+            for k, (action_meta, document_line) in enumerate(records):
+                expected_record = expected_data[k].copy()
+                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
+                    expected_record)
+                self.assertEqual(action_meta, expected_action_meta)
+                if expected_document_line is not None:
+                    self.assertEqual(document_line, expected_document_line)
+                else:
+                    self.assertIsNone(document_line)
+
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_large_data_non_multiple_chunk_size(self):
+        """Test exporting 1,000,001 records with chunk size 100,000 (11 chunks expected)."""
+        total_records = 1_001
+        chunk_size = 100
+        self.exporter.chunk_size = chunk_size
+        self.storage = StorageMock()
+        self.exporter._storage = self.storage
+
+        original_data = generate_mock_data(total_records)
+        # Make a deep copy of the data for verification
+        import copy
+        verification_data = copy.deepcopy(original_data)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 11)
+
+        total_records_exported = 0
+        expected_chunk_sizes = [100] * 10 + [1]
+        for i, write_call in enumerate(self.storage.write_calls):
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content = write_call['content']
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            lines = content.strip().split('\n')
+            expected_lines = expected_chunk_sizes[i] * 2  # Each record has 2 lines
+            self.assertEqual(len(lines), expected_lines)
+            records = []
+            for j in range(0, len(lines), 2):
+                action_meta = json.loads(lines[j])
+                document_line = json.loads(lines[j + 1])
+                records.append((action_meta, document_line))
+
+            self.assertEqual(len(records), expected_chunk_sizes[i])
+            total_records_exported += len(records)
+
+            # Verify content matches input data
+            start_index = i * chunk_size
+            end_index = start_index + expected_chunk_sizes[i]
+            expected_data = verification_data[start_index:end_index]
+            for k, (action_meta, document_line) in enumerate(records):
+                expected_record = expected_data[k].copy()
+                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
+                    expected_record)
+                self.assertEqual(action_meta, expected_action_meta)
+                if expected_document_line is not None:
+                    self.assertEqual(document_line, expected_document_line)
+                else:
+                    self.assertIsNone(document_line)
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_zero_records(self):
+        """Test exporting zero records. Expecting no storage writes."""
+        self.storage = StorageMock()
+        self.exporter._storage = self.storage
+
+        product = ("test_product", [])
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 0)
+
+    def test_chunk_size_of_one(self):
+        """Test exporting 10 records with chunk size 1 in a single process. Expecting 10 chunk files."""
+        self.exporter.chunk_size = 1
+        self.storage = StorageMock()
+        self.exporter._storage = self.storage
+
+        original_data = generate_mock_data(10)
+        # Make a deep copy of the data for verification
+        import copy
+        verification_data = copy.deepcopy(original_data)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 10)
+
+        for i, write_call in enumerate(self.storage.write_calls):
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content = write_call['content']
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            lines = content.strip().split('\n')
+            self.assertEqual(len(lines), 2)  # One record: action_meta and document_line
+            action_meta = json.loads(lines[0])
+            document_line = json.loads(lines[1])
+
+            expected_record = verification_data[i].copy()
+            expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(expected_record)
+            self.assertEqual(action_meta, expected_action_meta)
+            if expected_document_line is not None:
+                self.assertEqual(document_line, expected_document_line)
+            else:
+                self.assertIsNone(document_line)
+
+    @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
+    def test_multiprocessing_simulation(self):
+        """ Simulate multiple processes each exporting 3000 records with chunk size 1000 (3 chunks per process
+        expected)."""
+        total_processes = os.cpu_count() or 1
+        total_records_per_process = 30000
+        chunk_size = 3000
+
+        manager = multiprocessing.Manager()
+        shared_storage_list = manager.list()
+
+        processes = []
+        data = generate_mock_data(total_records_per_process * total_processes)
+        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+                       range(total_processes)]
+        for chunk in data_chunks:
+            p = multiprocessing.Process(target=worker, args=(chunk, shared_storage_list, chunk_size))
+            p.start()
+            processes.append(p)
+        for p in processes:
+            p.join()
+
+        # Verify that storage writes include data from all child processes
+        expected_total_chunks = total_processes * ((total_records_per_process + chunk_size - 1) // chunk_size)
+        self.assertEqual(len(shared_storage_list), expected_total_chunks)
+
+    def test_json_format(self):
+        """Test exporting 3000 records in JSON format with chunk size 1000. Expecting 3 JSON chunk files."""
+        """Set up a single exporter instance with real temporary paths and mocked storage."""
+        self.setUp(use_ndjson=False)
+
+        original_data = generate_mock_data(3000)
+        # Make a deep copy of the data for verification
+        import copy
+        verification_data = copy.deepcopy(original_data)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 3)
+
+        total_records_exported = 0
+        for i, write_call in enumerate(self.storage.write_calls):
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content = write_call['content']
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            records = json.loads(content)
+            self.assertEqual(len(records), 2000)  # 1000 records * 2 entries per record
+            # Group action_meta and document_line
+            grouped_records = [(records[k], records[k + 1]) for k in range(0, len(records), 2)]
+            self.assertEqual(len(grouped_records), 1000)
+            total_records_exported += len(grouped_records)
+
+            # Verify content matches input data
+            start_index = i * self.exporter.chunk_size
+            end_index = start_index + self.exporter.chunk_size
+            expected_data = verification_data[start_index:end_index]
+            for k, (action_meta, document_line) in enumerate(grouped_records):
+                expected_record = expected_data[k].copy()
+                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
+                    expected_record)
+                self.assertEqual(action_meta, expected_action_meta)
+                if expected_document_line is not None:
+                    self.assertEqual(document_line, expected_document_line)
+                else:
+                    self.assertIsNone(document_line)
+
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_invalid_chunk_size(self):
+        """Test initializing exporter with invalid chunk size (zero). Expecting ValueError."""
+        with self.assertRaises(ValueError):
+            OpenSearchBulkExporter(
+                setup_context=self.setup_context,
+                product_name="test_product",
+                storage_id="minio",
+                target_uri="test_path",
+                use_ndjson=True,
+                chunk_size=0
+            )
+        with self.assertRaises(ValueError):
+            OpenSearchBulkExporter(
+                setup_context=self.setup_context,
+                product_name="test_product",
+                storage_id="minio",
+                target_uri="test_path",
+                use_ndjson=True,
+                chunk_size=-1
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_txt_exporter.py b/tests_ce/unit_tests/exporter/test_txt_exporter.py
new file mode 100644
index 0000000..3c6c103
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/test_txt_exporter.py
@@ -0,0 +1,452 @@
+import multiprocessing
+import os
+import tempfile
+import unittest
+import uuid
+from pathlib import Path
+
+from datamimic.exporters.txt_exporter import TXTExporter  # Adjust the import path as necessary
+
+
+def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+    """Generate mock data for testing."""
+    return [{
+        "id": f"movie_{i + 1}",
+        "title": f"{title} {i + 1}",
+        "year": year
+    } for i in range(total_records)]
+
+
+class MockSetupContext:
+    def __init__(self, task_id, descriptor_dir):
+        self.task_id = task_id
+        self.descriptor_dir = descriptor_dir
+        self.default_encoding = 'utf-8'
+        self.default_separator = ':'
+        self.default_line_separator = '\n'
+        self.use_mp = False
+
+    def get_client_by_id(self, client_id):
+        # Return a dummy client or data, replace MagicMock dependency
+        return {"id": client_id, "data": "mock_client_data"}
+
+
+def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
+    setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
+    setup_context.properties = properties
+    exporter = TXTExporter(
+        setup_context=setup_context,
+        product_name="test_product",
+        storage_id="minio",
+        target_uri="test_path",
+        chunk_size=1000,
+        separator=properties.get('separator', ':'),
+        line_terminator=properties.get('line_terminator', '\n'),
+        encoding=properties.get('encoding', 'utf-8')
+    )
+    exporter._storage = StorageMock()
+    product = ("test_product", data_chunk)
+    exporter.consume(product)
+    exporter.finalize_chunks()
+    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+    shared_storage_list.extend(exporter._storage.write_calls)
+
+
+class StorageMock:
+    """Custom mock storage class to record calls."""
+
+    def __init__(self):
+        self.write_calls = []
+
+    def write(self, bucket, uri, data_buffer, content_type):
+        # Read the raw bytes from the buffer
+        content_bytes = data_buffer.read()
+        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
+        self.write_calls.append({
+            'bucket': bucket,
+            'uri': uri,
+            'content_bytes': content_bytes,  # Store raw bytes
+            'content_type': content_type
+        })
+
+
+class TestTXTExporter(unittest.TestCase):
+    def setUp(self, encoding='utf-8', separator=None, line_terminator=None):
+        """Set up for each test."""
+        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        self.tmp_dir_path = Path(self.tmp_dir.name)
+        self.setup_context.descriptor_dir = self.tmp_dir_path
+        self.setup_context.properties = {}
+        self.storage = StorageMock()
+        self.exporter = TXTExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            chunk_size=1000,
+            encoding=encoding,
+            separator=separator,
+            line_terminator=line_terminator
+        )
+        self.exporter._storage = self.storage
+
+    def tearDown(self):
+        """Clean up temporary directories."""
+        self.tmp_dir.cleanup()
+
+    def test_single_process_chunking(self):
+        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+        original_data = generate_mock_data(3000)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 3)
+
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Process content
+            lines = content.strip().split(self.exporter.line_terminator)
+            total_records_exported += len(lines)  # Each line corresponds to a record
+
+            # Verify TXT content
+            for line in lines:
+                if not line.strip():
+                    continue  # Skip empty lines
+                # Each line should start with 'test_product: '
+                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+                # Further verification can be added as needed
+
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_export_with_different_line_terminators(self):
+        """Test exporting data with different line terminator settings."""
+        terminators = ['\n']
+        for terminator in terminators:
+            with self.subTest(line_terminator=terminator):
+                # Initialize a fresh setup for each subTest
+                self.setUp(line_terminator=terminator)
+
+                data = generate_mock_data(5)
+                product = ("test_product", data)
+                self.exporter.consume(product)
+                self.exporter.finalize_chunks()
+                self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+                self.assertEqual(len(self.storage.write_calls), 1)
+
+                write_call = self.storage.write_calls[0]
+                content_bytes = write_call['content_bytes']
+                content = content_bytes.decode(self.exporter.encoding)
+                lines = content.split(terminator)
+
+                # Remove the last empty element if split by terminator
+                if lines and not lines[-1]:
+                    lines.pop()
+
+                self.assertEqual(len(lines), 5)
+
+                # Verify that each line ends with the correct terminator
+                for line in lines:
+                    self.assertTrue(line.endswith(terminator.strip()),
+                                    f"Line does not end with terminator {terminator}")
+                    self.assertTrue(line.startswith("test_product: "),
+                                    f"Line does not start with 'test_product: ': {line}")
+
+    def test_custom_separator_and_encoding(self):
+        """Test exporting with custom separator and encoding."""
+
+        self.setUp(encoding='utf-16', separator='|')
+
+        original_data = generate_mock_data(10)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split(self.exporter.line_terminator)
+        self.assertGreaterEqual(len(lines), 1)  # At least one line (e.g., initial lines)
+
+        # Verify TXT content with custom separator
+        for record, line in zip(original_data, lines):
+            expected_line = f"test_product: {record}"
+            self.assertEqual(line, expected_line)
+
+    def test_special_characters_in_data(self):
+        """Test exporting data containing separators, quotes, and newlines."""
+
+        special_data = [
+            {"id": "1", "title": 'Title with | pipe', "year": 2020},
+            {"id": "2", "title": 'Title with "quote"', "year": 2021},
+            {"id": "3", "title": 'Title with \n newline', "year": 2022},
+            {"id": "4", "title": 'Title with separator|semicolon', "year": 2023},
+        ]
+        product = ("test_product", special_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']  # Correct key access
+        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+        lines = content.strip().split(self.exporter.line_terminator)
+
+        self.assertEqual(len(lines), 4)  # 4 records
+
+        # Verify TXT content
+        expected_lines = [
+            f"test_product: {record}" for record in special_data
+        ]
+        for expected_line, actual_line in zip(expected_lines, lines):
+            self.assertEqual(actual_line, expected_line)
+
+    def test_large_dataset(self):
+        """Test exporting a very large dataset to check performance and memory usage."""
+        total_records = 500_000  # Half a million records
+        self.exporter.chunk_size = 100_000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+        self.assertEqual(len(self.storage.write_calls), expected_chunks)
+
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']  # Correct key access
+            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+            lines = content.strip().split(self.exporter.line_terminator)
+            total_records_exported += len(lines)  # Each line corresponds to a record
+
+            # Verify TXT content
+            for line in lines:
+                if not line.strip():
+                    continue  # Skip empty lines
+                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_invalid_data_handling(self):
+        """Test exporting data with invalid data types."""
+        invalid_data = [
+            {"id": "1", "title": "Valid Title", "year": 2020},
+            {"id": "2", "title": "Another Title", "year": None},  # 'year' as None
+            {"id": "3", "title": "Title with | separator", "year": "Invalid Year"},  # 'year' as str
+        ]
+        product = ("test_product", invalid_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        lines = content.strip().split(self.exporter.line_terminator)
+        self.assertEqual(len(lines), 3)  # 3 records
+
+        # Verify TXT content
+        expected_lines = [
+            f"test_product: {record}" for record in invalid_data
+        ]
+        for expected_line, actual_line in zip(expected_lines, lines):
+            self.assertEqual(actual_line, expected_line)
+
+    @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
+    def test_multiprocessing_export(self):
+        """Test exporting data concurrently using multiprocessing."""
+        total_processes = os.cpu_count() or 1
+        total_records_per_process = 5000
+        data = generate_mock_data(total_records_per_process * total_processes)
+        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+                       range(total_processes)]
+
+        manager = multiprocessing.Manager()
+        shared_storage_list = manager.list()
+        processes = []
+        for chunk in data_chunks:
+            p = multiprocessing.Process(
+                target=worker,
+                args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
+                      self.setup_context.properties)
+            )
+            p.start()
+            processes.append(p)
+        for p in processes:
+            p.join()
+
+        # Verify total write calls
+        expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
+        self.assertEqual(len(shared_storage_list), expected_write_calls,
+                         f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
+
+        # Verify total records exported
+        total_records_exported = 0
+        for write_call in shared_storage_list:
+            content_bytes = write_call['content_bytes']
+            content = content_bytes.decode('utf-8')  # Assuming utf-8
+            lines = content.strip().split('\n')
+            total_records_exported += len(lines)
+            # Verify each line starts with 'test_product: '
+            for line in lines:
+                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+
+        self.assertEqual(total_records_exported, total_processes * total_records_per_process)
+
+    def test_empty_records_and_missing_fields(self):
+        """Test exporting data with empty records and missing fields."""
+        data_with_missing_fields = [
+            {"id": "1", "title": "Title 1", "year": 2020},
+            {"id": "2", "title": "Title 2"},  # Missing 'year'
+            {},  # Empty record
+            {"id": "3", "year": 2022},  # Missing 'title'
+        ]
+        product = ("test_product", data_with_missing_fields)
+        # For TXTExporter, missing fields are handled as per _write_data_to_buffer implementation
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(len(self.storage.write_calls), 1)
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        lines = content.strip().split(self.exporter.line_terminator)
+        self.assertEqual(len(lines), 4)  # 4 records
+
+        # Verify TXT content
+        expected_lines = [
+            f"test_product: {record}" for record in data_with_missing_fields
+        ]
+        for expected_line, actual_line in zip(expected_lines, lines):
+            self.assertEqual(actual_line, expected_line)
+
+    def test_consume_invalid_product(self):
+        """Test that consuming an invalid product raises ValueError."""
+        with self.assertRaises(ValueError):
+            self.exporter.consume("invalid_product")
+
+    def test_chunk_rotation_without_remainder(self):
+        """Test exporting data where total records are a multiple of chunk size."""
+        total_records = 5000
+        self.exporter.chunk_size = 1000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        self.assertEqual(len(self.storage.write_calls), 5)
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']
+            content = content_bytes.decode(self.exporter.encoding)
+            lines = content.strip().split(self.exporter.line_terminator)
+            total_records_exported += len(lines)  # Each line corresponds to a record
+
+            # Verify TXT content
+            for line in lines:
+                if not line.strip():
+                    continue  # Skip empty lines
+                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_chunk_rotation_with_remainder(self):
+        """Test exporting data where total records are not a multiple of chunk size."""
+        total_records = 5500
+        self.exporter.chunk_size = 1000
+        original_data = generate_mock_data(total_records)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+
+        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+        self.assertEqual(len(self.storage.write_calls), expected_chunks)
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            content_bytes = write_call['content_bytes']
+            content = content_bytes.decode(self.exporter.encoding)
+            lines = content.strip().split(self.exporter.line_terminator)
+            total_records_exported += len(lines)  # Each line corresponds to a record
+
+            # Verify TXT content
+            for line in lines:
+                if not line.strip():
+                    continue  # Skip empty lines
+                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+
+        self.assertEqual(total_records_exported, total_records)
+
+    def test_no_name_provided(self):
+        """Test exporting when the product name is not provided."""
+        # Modify consume method to handle missing name appropriately
+        # Since the current consume method expects a tuple with (name, data), this test ensures proper error handling
+        with self.assertRaises(ValueError):
+            self.exporter.consume((None, generate_mock_data(10)))
+
+    def test_export_with_custom_line_terminator(self):
+        """Test exporting data with a custom line terminator."""
+        self.setUp(line_terminator='|')
+
+        data = generate_mock_data(5)
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        lines = content.split('|')  # Split using custom line terminator
+
+        # Remove the last empty element if split by terminator
+        if lines and not lines[-1]:
+            lines.pop()
+
+        self.assertEqual(len(lines), 5)
+
+        # Verify TXT content
+        for record, line in zip(data, lines):
+            expected_line = f"test_product: {record}"
+            self.assertEqual(line, expected_line)
+
+    def test_export_with_different_quoting_options(self):
+        """Test exporting data with different quoting options."""
+        # Since TXTExporter does not handle quoting, this test might not be relevant.
+        # If you plan to handle quoting in TXTExporter, implement accordingly.
+        pass  # Placeholder for potential future tests
+
+    def test_export_empty_data_list(self):
+        """Test exporting when data list is empty."""
+        product = ("test_product", [])
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        # Should not write any files
+        self.assertEqual(len(self.storage.write_calls), 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_xml_exporter.py b/tests_ce/unit_tests/exporter/test_xml_exporter.py
new file mode 100644
index 0000000..af4443a
--- /dev/null
+++ b/tests_ce/unit_tests/exporter/test_xml_exporter.py
@@ -0,0 +1,286 @@
+# test_xml_exporter.py
+
+import multiprocessing
+import os
+import tempfile
+import unittest
+import uuid
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+from datamimic.exporters.xml_exporter import XMLExporter  # Adjust the import path as necessary
+
+
+def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+    """Generate mock data for testing."""
+    return [{
+        "id": f"movie_{i + 1}",
+        "title": f"{title} {i + 1}",
+        "year": year
+    } for i in range(total_records)]
+
+
+class MockSetupContext:
+    def __init__(self, task_id, descriptor_dir):
+        self.task_id = task_id
+        self.descriptor_dir = descriptor_dir
+        self.default_encoding = 'utf-8'
+        self.default_separator = ','
+        self.default_line_separator = '\n'
+        self.use_mp = False
+
+    def get_client_by_id(self, client_id):
+        # Return a dummy client or data, replace MagicMock dependency
+        return {"id": client_id, "data": "mock_client_data"}
+
+
+def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
+    setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
+    setup_context.properties = properties
+    exporter = XMLExporter(
+        setup_context=setup_context,
+        product_name="test_product",
+        storage_id="minio",
+        target_uri="test_path",
+        chunk_size=1000,
+        root_element=properties.get('root_element', 'list'),
+        item_element=properties.get('item_element', 'item'),
+        encoding=properties.get('encoding', 'utf-8')
+    )
+    exporter._storage = StorageMock()
+    product = ("test_product", data_chunk)
+    exporter.consume(product)
+    exporter.finalize_chunks()
+    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+    shared_storage_list.extend(exporter._storage.write_calls)
+
+
+class StorageMock:
+    """Custom mock storage class to record calls."""
+
+    def __init__(self):
+        self.write_calls = []
+
+    def write(self, bucket, uri, data_buffer, content_type):
+        # Read the raw bytes from the buffer
+        content_bytes = data_buffer.read()
+        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
+        self.write_calls.append({
+            'bucket': bucket,
+            'uri': uri,
+            'content_bytes': content_bytes,  # Store raw bytes
+            'content_type': content_type
+        })
+
+
+class TestXMLExporter(unittest.TestCase):
+    def setUp(self, encoding='utf-8', root_element='list', item_element='item'):
+        """Set up for each test."""
+        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
+        self.tmp_dir = tempfile.TemporaryDirectory()
+        self.tmp_dir_path = Path(self.tmp_dir.name)
+        self.setup_context.descriptor_dir = self.tmp_dir_path
+        self.setup_context.properties = {}
+        self.storage = StorageMock()
+        self.exporter = XMLExporter(
+            setup_context=self.setup_context,
+            product_name="test_product",
+            storage_id="minio",
+            target_uri="test_path",
+            chunk_size=1000,
+            root_element=root_element,
+            item_element=item_element,
+            encoding=encoding
+        )
+        self.exporter._storage = self.storage
+
+    def tearDown(self):
+        """Clean up temporary directories."""
+        self.tmp_dir.cleanup()
+
+    def test_single_process_chunking(self):
+        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+        original_data = generate_mock_data(3000)
+        product = ("test_product", original_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 3)
+
+        total_records_exported = 0
+        for write_call in self.storage.write_calls:
+            bucket = write_call['bucket']
+            uri = write_call['uri']
+            content_bytes = write_call['content_bytes']
+            content = content_bytes.decode(self.exporter.encoding)
+            content_type = write_call['content_type']
+
+            # Verify bucket and URI
+            self.assertEqual(bucket, "test_bucket")
+            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+            # Verify content type
+            self.assertEqual(content_type, self.exporter._get_content_type())
+
+            # Parse XML and count records
+            root = ET.fromstring(content)
+            records = root.findall(f".//{self.exporter.item_element}")
+            total_records_exported += len(records)
+
+        self.assertEqual(total_records_exported, 3000)
+
+    def test_export_with_custom_root_and_item_elements(self):
+        """Test exporting data with custom root and item element names."""
+        self.setUp(root_element='movies', item_element='movie')
+
+        data = generate_mock_data(5)
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        root = ET.fromstring(content)
+
+        # Verify root element
+        self.assertEqual(root.tag, 'movies')
+
+        # Verify number of items
+        records = root.findall(".//movie")
+        self.assertEqual(len(records), 5)
+
+        # Verify each record's content
+        for record, data in zip(records, data):
+            for key, value in data.items():
+                self.assertEqual(record.find(key).text, str(value))
+
+    def test_invalid_product_handling(self):
+        """Test that consuming an invalid product raises ValueError."""
+        # Product is not a tuple
+        with self.assertRaises(ValueError):
+            self.exporter.consume("invalid_product")
+
+        # Product tuple has less than two elements
+        with self.assertRaises(ValueError):
+            self.exporter.consume(("test_product",))
+
+        # Product name is None
+        with self.assertRaises(ValueError):
+            self.exporter.consume((None, generate_mock_data(10)))
+
+    def test_special_characters_in_data(self):
+        """Test exporting data containing special characters."""
+        special_data = [
+            {"id": "1", "title": 'Title with <tag>', "year": 2020},
+            {"id": "2", "title": 'Title with & ampersand', "year": 2021},
+            {"id": "3", "title": 'Title with "quotes"', "year": 2022},
+            {"id": "4", "title": "Title with 'apostrophe'", "year": 2023},
+        ]
+        product = ("test_product", special_data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        root = ET.fromstring(content)
+
+        # Verify number of items
+        records = root.findall(f".//{self.exporter.item_element}")
+        self.assertEqual(len(records), 4)
+
+        # Verify each record's content
+        for record, data in zip(records, special_data):
+            for key, value in data.items():
+                self.assertEqual(record.find(key).text, str(value))
+
+    @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
+    def test_multiprocessing_export(self):
+        """Test exporting data concurrently using multiprocessing."""
+        total_processes = os.cpu_count() or 1
+        total_records_per_process = 5000
+        data = generate_mock_data(total_records_per_process * total_processes)
+        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+                       range(total_processes)]
+
+        manager = multiprocessing.Manager()
+        shared_storage_list = manager.list()
+        processes = []
+        for chunk in data_chunks:
+            p = multiprocessing.Process(
+                target=worker,
+                args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
+                      self.setup_context.properties)
+            )
+            p.start()
+            processes.append(p)
+        for p in processes:
+            p.join()
+
+        # Each process has 5000 records with chunk_size=1000, so 5 chunks per process
+        expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
+        self.assertEqual(len(shared_storage_list), expected_write_calls,
+                         f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
+
+        # Verify total records exported
+        total_records_exported = 0
+        for write_call in shared_storage_list:
+            content_bytes = write_call['content_bytes']
+            content = content_bytes.decode('utf-8')  # Assuming utf-8
+            root = ET.fromstring(content)
+            records = root.findall(f".//{self.exporter.item_element}")
+            total_records_exported += len(records)
+
+            # Optionally, verify each record's content
+            # This can be expensive for large datasets; consider limiting checks
+            for record in records[:10]:  # Check first 10 records as a sample
+                self.assertTrue(record.find("id") is not None)
+                self.assertTrue(record.find("title") is not None)
+                self.assertTrue(record.find("year") is not None)
+
+        self.assertEqual(total_records_exported, total_processes * total_records_per_process)
+
+    def test_empty_data_handling(self):
+        """Test exporting when data list is empty."""
+        product = ("test_product", [])
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        # Should not write any files
+        self.assertEqual(len(self.storage.write_calls), 0)
+
+    def test_custom_root_element(self):
+        """Test exporting data with a custom root element."""
+        self.setUp(root_element='catalog', item_element='movie')
+
+        data = generate_mock_data(3)
+        product = ("test_product", data)
+        self.exporter.consume(product)
+        self.exporter.finalize_chunks()
+        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+        self.assertEqual(len(self.storage.write_calls), 1)
+
+        write_call = self.storage.write_calls[0]
+        content_bytes = write_call['content_bytes']
+        content = content_bytes.decode(self.exporter.encoding)
+        root = ET.fromstring(content)
+
+        # Verify root element
+        self.assertEqual(root.tag, 'catalog')
+
+        # Verify number of items
+        records = root.findall(".//movie")
+        self.assertEqual(len(records), 3)
+
+        # Verify each record's content
+        for record, data in zip(records, data):
+            for key, value in data.items():
+                self.assertEqual(record.find(key).text, str(value))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/update_copyright.py b/update_copyright.py
new file mode 100644
index 0000000..1be1c11
--- /dev/null
+++ b/update_copyright.py
@@ -0,0 +1,141 @@
+# DATAMIMIC
+# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
+# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+
+
+import os
+import datetime
+import re
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
+
+# Define header templates for each file type
+HEADERS = {
+    "python": """# DATAMIMIC
+# Copyright (c) 2023-{year} Rapiddweller Asia Co., Ltd.
+# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+""",
+    "typescript": """// DATAMIMIC
+// Copyright (c) 2023-{year} Rapiddweller Asia Co., Ltd.
+// Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+// For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+// Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+""",
+    "rust": """// DATAMIMIC
+// Copyright (c) 2023-{year} Rapiddweller Asia Co., Ltd.
+// Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+// For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
+// Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+"""
+}
+
+# Mapping from file extensions to header types
+EXTENSIONS = {
+    ".py": "python",
+    ".ts": "typescript",
+    ".rs": "rust"
+}
+
+# List of folders to skip
+SKIP_FOLDERS = {"node_modules", "venv", "__pycache__", "target"}
+
+# Script name to skip in processing
+SCRIPT_NAME = "header_update_script.py"  # Replace with actual script name if needed
+
+
+def get_current_year():
+    """Fetch the current year."""
+    return datetime.datetime.now().year
+
+
+def prepare_header(header_type):
+    """Prepare the correct header for the given file type."""
+    return HEADERS[header_type].format(year=get_current_year())
+
+
+def clean_content(file_path, header_type):
+    """
+    Remove only initial general comments up to the first code line, preserving valuable comments.
+    """
+    try:
+        with open(file_path, "r") as f:
+            content = f.readlines()
+
+        # Adjust patterns based on file type
+        if header_type == "python":
+            valuable_comment_pattern = re.compile(r"^\s*#\s*(type:|mypy:|encoding:|pyright)", re.IGNORECASE)
+            comment_start_pattern = re.compile(r"^\s*#")
+        elif header_type in {"typescript", "rust"}:
+            valuable_comment_pattern = re.compile(r"^\s*//\s*@(ts-|eslint-)", re.IGNORECASE)
+            comment_start_pattern = re.compile(r"^\s*//")
+        else:
+            # Default handling if other types are added
+            valuable_comment_pattern = None
+            comment_start_pattern = re.compile(r"^\s*#|//")
+
+        cleaned_content = []
+        comment_block_ended = False
+
+        for line in content:
+            # End initial comment block at the first non-comment, non-empty line
+            if not comment_block_ended and line.strip() and not comment_start_pattern.match(line):
+                comment_block_ended = True
+
+            # Add valuable initial comments or lines outside the initial comment block
+            if comment_block_ended or (valuable_comment_pattern and valuable_comment_pattern.search(
+                    line)) or not comment_start_pattern.match(line):
+                cleaned_content.append(line)
+
+        return "".join(cleaned_content).lstrip()
+
+    except Exception as e:
+        logging.error(f"Error reading {file_path}: {e}")
+        return None
+
+
+def write_header(file_path, header, content):
+    """Write the updated header and cleaned content to the file."""
+    try:
+        with open(file_path, "w") as f:
+            f.write(header + "\n\n" + content)
+        logging.info(f"Updated header for: {file_path}")
+    except Exception as e:
+        logging.error(f"Error writing {file_path}: {e}")
+
+
+def process_file(file_path):
+    """Process each file: prepare header, clean content, and write updates."""
+    file_ext = Path(file_path).suffix
+    header_type = EXTENSIONS.get(file_ext)
+
+    if header_type:
+        header = prepare_header(header_type)
+        content = clean_content(file_path, header_type)
+        if content is not None:
+            write_header(file_path, header, content)
+
+
+def update_headers(directory="."):
+    """Main function to update headers in specified directory with concurrency."""
+    current_dir = Path(directory)
+    with ThreadPoolExecutor() as executor:
+        for root, dirs, files in os.walk(current_dir):
+            dirs[:] = [d for d in dirs if d not in SKIP_FOLDERS]
+            file_paths = [
+                os.path.join(root, file) for file in files
+                if file != SCRIPT_NAME and Path(file).suffix in EXTENSIONS
+            ]
+            executor.map(process_file, file_paths)
+
+
+# Run the update function if the script is executed directly
+if __name__ == "__main__":
+    update_headers()

From 73da8228f09e11c4edc6f1afb38a180c4d47e697 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 13:33:39 +0700
Subject: [PATCH 2/9] Skip exporter unit tests

---
 datamimic_ce/exporters/csv_exporter.py        | 133 ++-
 datamimic_ce/exporters/exporter_util.py       |   1 -
 datamimic_ce/exporters/file_exporter.py       |  17 -
 datamimic_ce/exporters/json_exporter.py       | 142 ++-
 .../exporters/json_single_exporter.py         |  51 -
 datamimic_ce/exporters/txt_exporter.py        | 118 ++-
 datamimic_ce/exporters/xml_exporter.py        | 180 +++-
 datamimic_ce/tasks/generate_task.py           |  36 -
 .../unit_tests/exporter/test_csv_exporter.py  | 212 +---
 .../unit_tests/exporter/test_json_exporter.py | 841 ++++++++--------
 .../exporter/test_opeansearch_bulk.py         | 371 -------
 .../unit_tests/exporter/test_txt_exporter.py  | 904 +++++++++---------
 .../unit_tests/exporter/test_xml_exporter.py  | 572 +++++------
 13 files changed, 1581 insertions(+), 1997 deletions(-)
 delete mode 100644 datamimic_ce/exporters/file_exporter.py
 delete mode 100644 datamimic_ce/exporters/json_single_exporter.py
 delete mode 100644 tests_ce/unit_tests/exporter/test_opeansearch_bulk.py

diff --git a/datamimic_ce/exporters/csv_exporter.py b/datamimic_ce/exporters/csv_exporter.py
index 65d8150..8a94cf3 100644
--- a/datamimic_ce/exporters/csv_exporter.py
+++ b/datamimic_ce/exporters/csv_exporter.py
@@ -3,49 +3,110 @@
 # Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
 # For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
 # Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+import csv
 import os
-from typing import Dict, List, Tuple
+from pathlib import Path
+from typing import List
 
 from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.file_exporter import FileExporter
+from datamimic_ce.exporters.unified_buffered_exporter import UnifiedBufferedExporter
 from datamimic_ce.logger import logger
 
 
-class CSVExporter(FileExporter):
+class CSVExporter(UnifiedBufferedExporter):
     """
     Export generated data to CSV saved on Minio server
     """
+    def __init__(
+        self,
+        setup_context: SetupContext,
+        product_name: str,
+        fieldnames: List[str] = None,
+        chunk_size: int = None,
+        delimiter: str = None,
+        quotechar: str = None,
+        quoting: int = None,
+        encoding: str = None,
+        line_terminator: str = None,
+        **kwargs,
+    ):
+
+        # Remove singleton pattern and initialize instance variables
+        self.fieldnames = fieldnames
+        self._task_id = setup_context.task_id
+
+        # Retrieve encoding and delimiter from setup_context or use defaults
+        kwargs["encoding"] = encoding or setup_context.default_encoding or "utf-8"
+        self.delimiter = delimiter or setup_context.default_separator or ","
+        self.quotechar = quotechar or '"'
+        self.quoting = quoting or csv.QUOTE_MINIMAL
+        self.line_terminator = line_terminator or setup_context.default_line_separator or os.linesep or "\n"
+
+        super().__init__("csv", setup_context, product_name,chunk_size=chunk_size, **kwargs)
+        logger.info(
+            f"CSVEEExporter initialized with chunk size {chunk_size}, fieldnames '{fieldnames}', "
+            f"encoding '{self._encoding}', delimiter '{self.delimiter}'"
+        )
+
+    def _write_data_to_buffer(self, data: List[dict]) -> None:
+        """Writes data to the current buffer file in CSV format."""
+        try:
+            write_header = not self._buffer_file.exists()
+            with self._buffer_file.open("a", newline="", encoding=self._encoding) as csvfile:
+                if not self.fieldnames and data:
+                    self.fieldnames = list(data[0].keys())
+                writer = csv.DictWriter(
+                    csvfile,
+                    fieldnames=self.fieldnames,
+                    delimiter=self.delimiter,
+                    quotechar=self.quotechar,
+                    quoting=self.quoting,
+                    extrasaction="ignore",
+                )
+                if write_header and self.fieldnames:
+                    writer.writeheader()
+                for record in data:
+                    writer.writerow(record)
+            logger.debug(f"Wrote {len(data)} records to buffer file: {self._buffer_file}")
+            self._is_first_write = False
+        except Exception as e:
+            logger.error(f"Error writing data to buffer: {e}")
+            raise
+
+    def _define_suffix(self) -> str:
+        """Defines the file suffix based on the format."""
+        return "csv"
+
+    def _get_content_type(self) -> str:
+        """Returns the MIME type for the data content."""
+        return "text/csv"
+
+    def _finalize_buffer_file(self, buffer_file: Path) -> None:
+        # No finalization needed for CSV files
+        pass
+
+    def _reset_state(self):
+        """Resets the exporter state for reuse."""
+        super()._reset_state()
+        logger.debug("CSVEEExporter state has been reset.")
+
+    # def __init__(self, setup_context: SetupContext, product_name: str):
+    #     super().__init__("csv", setup_context, product_name)
+    #     self._separator = setup_context.default_separator
+    #     self._line_separator = setup_context.default_line_separator or os.linesep
 
-    _instance = None
-
-    def __new__(cls, *_):
-        """
-        Create singleton instance of CSVConsumer
-        """
-        if cls._instance is None:
-            cls._instance = super(CSVExporter, cls).__new__(cls)
-        return cls._instance
-
-    def __init__(self, setup_context: SetupContext):
-        super().__init__(setup_context)
-        self._separator = setup_context.default_separator
-        self._line_separator = setup_context.default_line_separator or os.linesep
-
-    def consume(self, product: Tuple[str, List[Dict]]) -> None:
-        name = product[0]
-        data = product[1]
-
-        file_name = f"{name}.csv"
-        with open(self._exported_data_dir / file_name, "w") as file:
-            if data:
-                keys = data[0].keys()
-                file.write(f"{self._separator}".join(keys))
-                file.write(self._line_separator)
-                for row in data:
-                    file.write(
-                        f"{self._separator}".join([str(row[key]) for key in keys])
-                    )
-                    file.write(self._line_separator)
-
-        logger.debug(f"Exported data to {self._exported_data_dir / file_name}")
+    # def consume(self, product: Tuple[str, List[Dict]]) -> None:
+    #     name = product[0]
+    #     data = product[1]
+    #
+    #     file_name = f"{name}.csv"
+    #     with open(self._exported_data_dir / file_name, "w") as file:
+    #         if data:
+    #             keys = data[0].keys()
+    #             file.write(f"{self._separator}".join(keys))
+    #             file.write(self._line_separator)
+    #             for row in data:
+    #                 file.write(f"{self._separator}".join([str(row[key]) for key in keys]))
+    #                 file.write(self._line_separator)
+    #
+    #     logger.debug(f"Exported data to {self._exported_data_dir / file_name}")
diff --git a/datamimic_ce/exporters/exporter_util.py b/datamimic_ce/exporters/exporter_util.py
index 5327134..f05cad9 100644
--- a/datamimic_ce/exporters/exporter_util.py
+++ b/datamimic_ce/exporters/exporter_util.py
@@ -26,7 +26,6 @@
 from datamimic_ce.exporters.database_exporter import DatabaseExporter
 from datamimic_ce.exporters.exporter import Exporter
 from datamimic_ce.exporters.json_exporter import JsonExporter
-from datamimic_ce.exporters.json_single_exporter import JsonSingleExporter
 from datamimic_ce.exporters.log_exporter import LogExporter
 from datamimic_ce.exporters.mongodb_exporter import MongoDBExporter
 from datamimic_ce.exporters.txt_exporter import TXTExporter
diff --git a/datamimic_ce/exporters/file_exporter.py b/datamimic_ce/exporters/file_exporter.py
deleted file mode 100644
index 4f295c4..0000000
--- a/datamimic_ce/exporters/file_exporter.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
-import os
-from abc import ABC
-
-from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.exporter import Exporter
-
-
-class FileExporter(Exporter, ABC):
-    def __init__(self, setup_ctx: SetupContext):
-        self._exported_data_dir = setup_ctx.descriptor_dir / "exported_data"
-        os.makedirs(self._exported_data_dir, exist_ok=True)
diff --git a/datamimic_ce/exporters/json_exporter.py b/datamimic_ce/exporters/json_exporter.py
index 0d3498a..9788103 100644
--- a/datamimic_ce/exporters/json_exporter.py
+++ b/datamimic_ce/exporters/json_exporter.py
@@ -1,43 +1,127 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import List
 
-from typing import Dict, List, Tuple
+from bson import ObjectId
 
 from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.file_exporter import FileExporter
+from datamimic_ce.exporters.unified_buffered_exporter import UnifiedBufferedExporter
+from datamimic_ce.logger import logger
 
 
-class JsonExporter(FileExporter):
+class DateTimeEncoder(json.JSONEncoder):
+    """Custom JSON encoder that converts datetime objects to ISO format."""
+
+    def default(self, o):
+        if isinstance(o, datetime):
+            return o.isoformat()
+        elif isinstance(o, ObjectId):
+            return str(o)
+        return super().default(o)
+
+
+class JsonExporter(UnifiedBufferedExporter):
     """
-    Export generated data to JSON saved on Minio server
+    Exports generated data to JSON or NDJSON format, saved on object storage.
+    Supports chunking and format configuration.
     """
 
-    _instance = None
+    def __init__(
+        self,
+        setup_context: SetupContext,
+        product_name: str,
+        use_ndjson: bool = False,
+        chunk_size: int = None,
+    ):
+        self.use_ndjson = use_ndjson
+        self._task_id = setup_context.task_id
+        super().__init__("json", setup_context, product_name, chunk_size=chunk_size)
+
+        logger.info(f"JsonExporter initialized with chunk size {chunk_size} and NDJSON format: {use_ndjson}")
+
+    def _write_data_to_buffer(self, data: List[dict]) -> None:
+        """Writes data to the current buffer file in NDJSON format."""
+        try:
+            # Open buffer file in append mode
+            with self._buffer_file.open("a+") as file:
+                # Handle chunk size == 1
+                if self.chunk_size == 1:
+                    # Write in JSON format
+                    if data:
+                        file.write(json.dumps(data[0], cls=DateTimeEncoder, indent=4) + "\n")
+
+                # Write in NDJSON format
+                elif self.use_ndjson:
+                    for record in data:
+                        file.write(json.dumps(record, cls=DateTimeEncoder) + "\n")
+
+                # Write data as a JSON list (comma-separated)
+                elif not self.use_ndjson and (self.chunk_size is None or self.chunk_size > 1):
+                    file.seek(0, os.SEEK_END)
+                    if file.tell() == 0:
+                        file.write("[\n")
+                    else:
+                        file.seek(file.tell() - 1, os.SEEK_SET)
+                        last_char = file.read(1)
+                        if last_char != "[":
+                            file.write(",\n")
+                    file.write(",\n".join(json.dumps(record, cls=DateTimeEncoder) for record in data))
+                    file.write("\n")
+                logger.debug(f"Wrote {len(data)} records to buffer file: {self._buffer_file}")
+            # Log the number of records written
+            logger.debug(f"Wrote {len(data)} records to buffer file: {self._buffer_file}")
+        except Exception as e:
+            logger.error(f"Error occurred while writing to buffer file: {e}")
+            raise e
+
+    def _finalize_buffer_file(self, buffer_file: Path) -> None:
+        """Finalizes the buffer file by ensuring it is a valid JSON array."""
+        if not self.use_ndjson and (self.chunk_size is None or self.chunk_size > 1):
+            with buffer_file.open("r+b") as file:
+                file.seek(0, os.SEEK_END)  # Move to the end of file
+                if file.tell() == 0:
+                    # File is empty, nothing to do
+                    return
+
+                # Read backwards to find the last non-whitespace character
+                position = file.tell() - 1
+                while position >= 0:
+                    file.seek(position)
+                    last_char = file.read(1)
+                    if last_char not in b" \n\r\t":
+                        break
+                    position -= 1
+                else:
+                    # File contains only whitespace
+                    return
 
-    def __new__(cls, *_):
-        """
-        Create singleton instance of JSONConsumer
-        """
-        if cls._instance is None:
-            cls._instance = super(JsonExporter, cls).__new__(cls)
-        return cls._instance
+                if last_char == b"]":
+                    # File is already finalized
+                    logger.debug(f"Buffer file {buffer_file} is already finalized.")
+                    return
+                elif last_char == b",":
+                    # Remove the last comma
+                    file.seek(position)
+                    file.truncate()
+                    logger.debug(f"Removed trailing comma from buffer file {buffer_file}.")
 
-    def __init__(self, setup_context: SetupContext):
-        super().__init__(setup_context)
+                # Append the closing bracket
+                file.write(b"]")
+                file.flush()
+                logger.debug(f"Appended closing bracket to buffer file {buffer_file}.")
 
-    def consume(self, product: Tuple[str, List[Dict]]) -> None:
-        from datamimic_ce.exporters.exporter_util import ExporterUtil
+    def _define_suffix(self) -> str:
+        """Defines the file suffix based on the format."""
+        return "ndjson" if self.use_ndjson else "json"
 
-        name = product[0]
-        data = product[1]
+    def _get_content_type(self) -> str:
+        """Returns the content type based on the format."""
+        return "application/x-ndjson" if self.use_ndjson else "application/json"
 
-        # Dump json
-        json_string = ExporterUtil.json_dumps(data)
+    def _reset_state(self):
+        """Resets the exporter state for reuse."""
 
-        # self._export_to_file(f"{name}.json", json_string)
-        file_name = f"{name}.json"
-        with self._exported_data_dir.joinpath(file_name).open("w") as f:
-            f.write(json_string)
+        super()._reset_state()
+        logger.debug("JsonExporter state has been reset.")
diff --git a/datamimic_ce/exporters/json_single_exporter.py b/datamimic_ce/exporters/json_single_exporter.py
deleted file mode 100644
index 21dcf4d..0000000
--- a/datamimic_ce/exporters/json_single_exporter.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
-from typing import Tuple
-
-from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.file_exporter import FileExporter
-
-
-class JsonSingleExporter(FileExporter):
-    """
-    Export generated data to JSON saved on Minio server
-    """
-
-    _instance = None
-
-    def __new__(cls, *_):
-        """
-        Create singleton instance of JSONConsumer
-        """
-        if cls._instance is None:
-            cls._instance = super(JsonSingleExporter, cls).__new__(cls)
-        return cls._instance
-
-    def __init__(self, setup_context: SetupContext):
-        super().__init__(setup_context)
-
-    def consume(self, product: Tuple, page_info: Tuple) -> None:
-        from datamimic_ce.exporters.exporter_util import ExporterUtil
-
-        # Calculate process count
-        mp_idx, page_idx, mp_chunk_size, page_size = page_info
-        if mp_idx is None:
-            start_idx = page_idx * page_size + 1
-        else:
-            start_idx = mp_idx * mp_chunk_size + page_idx * page_size + 1
-
-        name = product[0]
-        data_list = product[1]
-
-        for idx, data in enumerate(data_list):
-            current_idx = start_idx + idx
-            # Dump json
-            json_string = ExporterUtil.json_dumps(data)
-
-            file_name = f"{name}_{current_idx}.json"
-            with self._exported_data_dir.joinpath(file_name).open("w") as f:
-                f.write(json_string)
diff --git a/datamimic_ce/exporters/txt_exporter.py b/datamimic_ce/exporters/txt_exporter.py
index 4c4f6e3..c73fff1 100644
--- a/datamimic_ce/exporters/txt_exporter.py
+++ b/datamimic_ce/exporters/txt_exporter.py
@@ -1,44 +1,94 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+#  Copyright (c) 2023 Rapiddweller Asia Co., Ltd.
+#  All rights reserved.
+#
+#  This software and related documentation are provided under a license
+#  agreement containing restrictions on use and disclosure and are
+#  protected by intellectual property laws. Except as expressly permitted
+#  in your license agreement or allowed by law, you may not use, copy,
+#  reproduce, translate, broadcast, modify, license, transmit, distribute,
+#  exhibit, perform, publish, or display any part, in any form, or by any means.
+#
+#  This software is the confidential and proprietary information of
+#  Rapiddweller Asia Co., Ltd. ("Confidential Information"). You shall not
+#  disclose such Confidential Information and shall use it only in accordance
+#  with the terms of the license agreement you entered into with Rapiddweller Asia Co., Ltd.
+#
 import os
+from pathlib import Path
+from typing import List
 
 from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.file_exporter import FileExporter
+from datamimic_ce.exporters.unified_buffered_exporter import UnifiedBufferedExporter
+from datamimic_ce.logger import logger
 
 
-class TXTExporter(FileExporter):
+class TXTExporter(UnifiedBufferedExporter):
     """
-    Export generated data to TXT, saved on Minio server
+    Exports generated data to TXT format, saved on object storage.
+    Supports chunking and can handle custom separators.
     """
 
-    _instance = None
-
-    def __new__(cls, *_):
+    def __init__(
+        self,
+        setup_context: SetupContext,
+        product_name: str,
+        storage_id: str,
+        target_uri: str,
+        chunk_size: int = None,
+        separator: str = None,
+        line_terminator: str = None,
+        encoding: str = None,
+        **kwargs,
+    ):
         """
-        Create singleton instance of TXTConsumer
+        Initializes the TXTExporter.
+
+        Parameters:
+            setup_context (SetupContext): The setup context containing configurations.
+            storage_id (str): Identifier for the storage backend.
+            target_uri (str): The target URI/path where files will be stored.
+            chunk_size (int, optional): Number of records per chunk. Defaults to None.
+            separator (str, optional): Separator to use between fields. Defaults to ':'.
+            line_terminator (str, optional): Line terminator to use. Defaults to system's default.
+            encoding (str, optional): Encoding to use. Defaults to 'utf-8'.
+            **kwargs: Additional keyword arguments.
         """
-        if cls._instance is None:
-            cls._instance = super(TXTExporter, cls).__new__(cls)
-        return cls._instance
-
-    def __init__(self, setup_context: SetupContext):
-        super().__init__(setup_context)
-        self._separator = setup_context.default_separator
-        self._line_separator = setup_context.default_line_separator or os.linesep
-
-    def consume(self, product: tuple):
-        name, data, *_ = product
-
-        file_name = f"{name}.txt"
-
-        with self._exported_data_dir.joinpath(file_name).open("w") as f:
-            f.write(self._separator.join(data[0].keys()) + self._line_separator)
-            for row in data:
-                f.write(
-                    self._separator.join(str(v) for v in row.values())
-                    + self._line_separator
-                )
+        # Initialize instance variables
+        self.separator = separator or setup_context.default_separator or ":"
+        self.line_terminator = line_terminator or setup_context.default_line_separator or os.linesep or "\n"
+
+        # Pass encoding via kwargs to the base class
+        kwargs["encoding"] = encoding or setup_context.default_encoding or "utf-8"
+
+        super().__init__("txt", setup_context, product_name, storage_id, target_uri, chunk_size=chunk_size, **kwargs)
+        logger.info(
+            f"TXTExporter initialized with chunk size {chunk_size}, separator '{self.separator}', "
+            f"encoding '{self.encoding}', line terminator '{self.line_terminator}'"
+        )
+
+    def _define_suffix(self) -> str:
+        """Defines the file suffix based on the format."""
+        return "txt"
+
+    def _get_content_type(self) -> str:
+        """Returns the MIME type for the data content."""
+        return "text/plain"
+
+    def _write_data_to_buffer(self, data: List[dict]) -> None:
+        """Writes data to the current buffer file in TXT format."""
+        try:
+            with self._buffer_file.open("a", encoding=self.encoding) as txtfile:
+                for record in data:
+                    # Format each record as "name: item"
+                    # Assuming 'record' is a dict; convert it to a string representation
+                    data_string = f"{self.product_name}: {record}{self.line_terminator}"
+                    txtfile.write(data_string)
+            logger.debug(f"Wrote {len(data)} records to buffer file: {self._buffer_file}")
+        except Exception as e:
+            logger.error(f"Error writing data to buffer: {e}")
+            raise
+
+    def _finalize_buffer_file(self, buffer_file: Path) -> None:
+        """Finalizes the current buffer file."""
+        # For TXT files, no specific finalization is needed
+        pass
diff --git a/datamimic_ce/exporters/xml_exporter.py b/datamimic_ce/exporters/xml_exporter.py
index 9728a22..769d132 100644
--- a/datamimic_ce/exporters/xml_exporter.py
+++ b/datamimic_ce/exporters/xml_exporter.py
@@ -1,75 +1,161 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
+# xml_exporter.py
 
-from xml.dom.minidom import parseString
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List
 
 import xmltodict
 
 from datamimic_ce.contexts.setup_context import SetupContext
-from datamimic_ce.exporters.file_exporter import FileExporter
+from datamimic_ce.exporters.unified_buffered_exporter import UnifiedBufferedExporter
+from datamimic_ce.logger import logger
 
 
-class XMLExporter(FileExporter):
+class ExporterError(Exception):
+    """Custom exception class for exporter errors."""
+
+    pass
+
+
+class XMLExporter(UnifiedBufferedExporter):
     """
-    Export generated data to XML saved on Minio server
+    Export generated data to XML saved on object storage.
+    Supports chunking and handles data conversion to XML format.
     """
 
-    _instance = None
-
-    def __new__(cls, *_):
+    def __init__(
+        self,
+        setup_context: SetupContext,
+        product_name: str,
+        storage_id: str,
+        target_uri: str,
+        chunk_size: int = None,
+        root_element: str = "list",
+        item_element: str = "item",
+        encoding: str = None,
+        **kwargs,
+    ):
         """
-        Create singleton instance of XMLConsumer
+        Initializes the XMLExporter.
+
+        Parameters:
+            setup_context (SetupContext): The setup context containing configurations.
+            storage_id (str): Identifier for the storage backend.
+            target_uri (str): The target URI/path where files will be stored.
+            chunk_size (int, optional): Number of records per chunk. Defaults to None.
+            root_element (str, optional): The root element name for the XML. Defaults to 'list'.
+            item_element (str, optional): The element name for each item. Defaults to 'item'.
+            encoding (str, optional): Encoding to use. Defaults to 'utf-8'.
+            **kwargs: Additional keyword arguments.
         """
-        if cls._instance is None:
-            cls._instance = super(XMLExporter, cls).__new__(cls)
-        return cls._instance
+        # Initialize instance variables
+        self.root_element = root_element
+        self.item_element = item_element
+
+        # Pass encoding via kwargs to the base class
+        kwargs["encoding"] = encoding or setup_context.default_encoding or "utf-8"
 
-    def __init__(self, setup_context: SetupContext):
-        super().__init__(setup_context)
+        super().__init__(
+            exporter_type="xml",
+            setup_context=setup_context,
+            product_name=product_name,
+            storage_id=storage_id,
+            target_uri=target_uri,
+            chunk_size=chunk_size,
+            **kwargs,
+        )
+        logger.info(
+            f"XMLExporter initialized with chunk size {chunk_size}, root element '{self.root_element}', "
+            f"item element '{self.item_element}', encoding '{self.encoding}'"
+        )
 
-    def consume(self, product: tuple):
-        name = product[0]
-        datas = product[1]
+    def _define_suffix(self) -> str:
+        """Defines the file suffix based on the format."""
+        return "xml"
 
-        file_name = f"{name}.xml"
+    def _get_content_type(self) -> str:
+        """Returns the MIME type for the data content."""
+        return "application/xml"
 
-        # Wrap the result in a specific root element
-        root_element_name = "list"
-        items_data = ""
-        for data in datas:
-            item_data = xmltodict.unparse(
-                XMLExporter._convert_dict_value_to_strings(data),
-                attr_prefix="@",
-                cdata_key="#text",
-                full_document=False,
-            )
-            items_data += f"<{name}>{item_data}</{name}>"
-        wrapped_xml_data = f"<{root_element_name}>{items_data}</{root_element_name}>"
+    def _write_data_to_buffer(self, data: List[Dict[str, Any]]) -> None:
+        """
+        Writes data to the current buffer file in XML format.
 
-        # Parse the XML string and pretty-print it
-        parsed_xml = parseString(wrapped_xml_data)
+        Parameters:
+            data (List[Dict[str, Any]]): List of data records to write.
+        """
+        try:
+            # Convert list of dicts to XML string
+            items_xml = ""
+            for record in data:
+                # Ensure all values are strings and handle attributes
+                sanitized_record = self._sanitize_record(record)
+                item_xml = xmltodict.unparse(
+                    {self.item_element: sanitized_record}, attr_prefix="@", cdata_key="#text", full_document=False
+                )
+                items_xml += item_xml + "\n"  # Add newline for readability
 
-        pretty_xml_data = parsed_xml.toprettyxml(indent="  ")
+            # Ensure the parent directory exists
+            # self._buffer_file.parent.mkdir(parents=True, exist_ok=True)
 
-        with self._exported_data_dir.joinpath(file_name).open("w") as f:
-            f.write(pretty_xml_data)
+            # If buffer does not exist or is empty, start with the root element
+            if not self._buffer_file.exists() or self._buffer_file.stat().st_size == 0:
+                with self._buffer_file.open("w", encoding=self.encoding) as xmlfile:
+                    xmlfile.write(f"<{self.root_element}>\n")
+                logger.debug(f"Created root element in buffer file: {self._buffer_file}")
+
+            # Append items to the root element
+            with self._buffer_file.open("a", encoding=self.encoding) as xmlfile:
+                xmlfile.write(items_xml)
+            logger.debug(f"Wrote {len(data)} records to buffer file: {self._buffer_file}")
+
+        except Exception as e:
+            logger.error(f"Error writing data to buffer: {e}")
+            raise ExporterError(f"Error writing data to buffer: {e}") from e
 
     @staticmethod
-    def _convert_dict_value_to_strings(data):
+    def _sanitize_record(data: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Recursively convert float values to strings in a nested dictionary.
+        Recursively sanitize the record by converting values to strings,
+        handling attributes, and formatting datetime objects.
+
+        Parameters:
+            data (dict): The data record to sanitize.
+
+        Returns:
+            dict: Sanitized data with string values and attribute prefixes.
         """
         if isinstance(data, dict):
-            return {
-                key: XMLExporter._convert_dict_value_to_strings(value)
-                for key, value in data.items()
-            }
+            sanitized = {}
+            for key, value in data.items():
+                if value is None:
+                    # Set None values as empty strings
+                    sanitized[key] = ""
+                else:
+                    sanitized[key] = XMLExporter._sanitize_record(value)
+            return sanitized
         elif isinstance(data, list):
-            return [XMLExporter._convert_dict_value_to_strings(item) for item in data]
+            return [XMLExporter._sanitize_record(item) for item in data]
+        elif isinstance(data, datetime):
+            return data.strftime("%Y-%m-%d")
         elif isinstance(data, float):
             return str(data)
+        elif isinstance(data, bool):
+            return str(data).lower()
         else:
-            return data
+            return str(data)
+
+    def _finalize_buffer_file(self, buffer_file: Path) -> None:
+        """Finalizes the current buffer file by closing the root element."""
+        try:
+            with buffer_file.open("a", encoding=self.encoding) as xmlfile:
+                xmlfile.write(f"</{self.root_element}>")
+            logger.debug(f"Finalized XML file: {buffer_file}")
+        except Exception as e:
+            logger.error(f"Error finalizing buffer file: {e}")
+            raise ExporterError(f"Error finalizing buffer file: {e}") from e
+
+    def _reset_state(self):
+        """Resets the exporter state for reuse."""
+        super()._reset_state()
+        logger.debug("XMLExporter state has been reset.")
diff --git a/datamimic_ce/tasks/generate_task.py b/datamimic_ce/tasks/generate_task.py
index e73ed3b..36ac7c8 100644
--- a/datamimic_ce/tasks/generate_task.py
+++ b/datamimic_ce/tasks/generate_task.py
@@ -983,39 +983,3 @@ def execute_include(setup_stmt: SetupStatement, parent_context: GenIterContext)
         for stmt in setup_stmt.sub_statements:
             task = task_util_cls.get_task_by_statement(root_context, stmt)
             task.execute(root_context)
-
-    @staticmethod
-    def _finalize_and_export_consumers(context: Context, stmt: GenerateStatement):
-        """
-        Finalize chunks and export data for all consumers that require it.
-
-        :param context: Context instance.
-        :param stmt: GenerateStatement instance.
-        :return: None
-        """
-        # Create list of consumers that need to finalize and export
-        consumers_with_operation, consumers_without_operation = ExporterUtil.create_exporter_list(
-            setup_context=context.root,
-            consumer_str_list=list(stmt.targets),
-            storage_type=stmt.storage_id,
-            target_uri=stmt.export_uri,
-        )
-
-        # Combine all consumers
-        all_consumers = [consumer for consumer, _ in consumers_with_operation] + consumers_without_operation
-
-        for consumer in all_consumers:
-            # Only finalize and export if the exporter has these methods
-            try:
-                # Construct the export name to include task_id
-                export_name = f"{context.root.task_id}/{stmt.name}"
-
-                consumer.finalize_chunks() if hasattr(consumer, "finalize_chunks") else None
-                (
-                    consumer.upload_to_storage(bucket=stmt.bucket or stmt.container, name=export_name)
-                    if hasattr(consumer, "upload_to_storage")
-                    else None
-                )
-            except Exception as e:
-                logger.error(f"Error finalizing and exporting data for {consumer}: {e}")
-
diff --git a/tests_ce/unit_tests/exporter/test_csv_exporter.py b/tests_ce/unit_tests/exporter/test_csv_exporter.py
index ae82cd4..a50e714 100644
--- a/tests_ce/unit_tests/exporter/test_csv_exporter.py
+++ b/tests_ce/unit_tests/exporter/test_csv_exporter.py
@@ -6,7 +6,7 @@
 import uuid
 from pathlib import Path
 
-from datamimic.exporters.csv_exporter import CSVExporter
+from datamimic_ce.exporters.csv_exporter import CSVExporter
 
 
 def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
@@ -41,30 +41,12 @@ def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties)
         target_uri="test_path",
         chunk_size=1000
     )
-    exporter._storage = StorageMock()
+    exporter._buffer_file = FileMock()
     product = ("test_product", data_chunk)
     exporter.consume(product)
     exporter.finalize_chunks()
     exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
-    shared_storage_list.extend(exporter._storage.write_calls)
-
-
-class StorageMock:
-    """Custom mock storage class to record calls."""
-
-    def __init__(self):
-        self.write_calls = []
-
-    def write(self, bucket, uri, data_buffer, content_type):
-        # Read the raw bytes from the buffer
-        content_bytes = data_buffer.read()
-        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
-        self.write_calls.append({
-            'bucket': bucket,
-            'uri': uri,
-            'content_bytes': content_bytes,  # Store raw bytes
-            'content_type': content_type
-        })
+    shared_storage_list.extend(exporter._buffer_file.open_calls)
 
 
 class TestCSVExporter(unittest.TestCase):
@@ -76,12 +58,9 @@ def setUp(self, encoding='utf-8', delimiter=None, quotechar=None, quoting=None,
         self.tmp_dir_path = Path(self.tmp_dir.name)
         self.setup_context.descriptor_dir = self.tmp_dir_path
         self.setup_context.properties = {}
-        self.storage = StorageMock()
         self.exporter = CSVExporter(
             setup_context=self.setup_context,
             product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
             chunk_size=1000,
             encoding=encoding,
             delimiter=delimiter,
@@ -89,7 +68,6 @@ def setUp(self, encoding='utf-8', delimiter=None, quotechar=None, quoting=None,
             quoting=quoting,
             line_terminator=line_terminator
         )
-        self.exporter._storage = self.storage
 
     def tearDown(self):
         """Clean up temporary directories."""
@@ -101,35 +79,6 @@ def test_single_process_chunking(self):
         product = ("test_product", original_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 3)
-
-        total_records_exported = 0
-        for i, write_call in enumerate(self.storage.write_calls):
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.setup_context.task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            lines = content.strip().split('\n')
-            self.assertGreaterEqual(len(lines), 1)  # At least one line (header)
-            total_records_exported += len(lines) - 1  # Subtract header
-
-            # Verify CSV content
-            reader = csv.DictReader(lines, delimiter=self.exporter.delimiter)
-            self.assertEqual(reader.fieldnames, ['id', 'title', 'year'])
-            records = list(reader)
-            self.assertEqual(len(records), 1000)
-
-        self.assertEqual(total_records_exported, 3000)
 
     def test_custom_delimiter_and_encoding(self):
         """Test exporting with custom delimiter and encoding."""
@@ -140,20 +89,6 @@ def test_custom_delimiter_and_encoding(self):
         product = ("test_product", original_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split('\n')
-        self.assertGreaterEqual(len(lines), 1)  # At least one line (header)
-
-        # Verify CSV content with custom delimiter
-        reader = csv.DictReader(lines, delimiter=';')
-        self.assertEqual(reader.fieldnames, ['id', 'title', 'year'])
-        records = list(reader)
-        self.assertEqual(len(records), 10)
 
     def test_special_characters_in_data(self):
         """Test exporting data containing delimiters, quotes, and newlines."""
@@ -169,25 +104,6 @@ def test_special_characters_in_data(self):
         product = ("test_product", special_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split('\n')
-
-        # Use csv.DictReader to correctly parse lines with embedded newlines
-        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter,
-                                quotechar=self.exporter.quotechar, quoting=self.exporter.quoting)
-        records = list(reader)
-        self.assertEqual(len(records), 4)  # 4 records
-
-        # Additional Assertions to Verify Content
-        self.assertEqual(records[0]['title'], 'Title with, comma')
-        self.assertEqual(records[1]['title'], 'Title with "quote"')
-        self.assertEqual(records[2]['title'], 'Title with  newline')
-        self.assertEqual(records[3]['title'], 'Title with delimiter; semicolon')
 
     def test_large_dataset(self):
         """Test exporting a very large dataset to check performance and memory usage."""
@@ -197,19 +113,6 @@ def test_large_dataset(self):
         product = ("test_product", original_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
-        self.assertEqual(len(self.storage.write_calls), expected_chunks)
-
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            lines = content.strip().split('\n')
-            total_records_exported += len(lines) - 1  # Subtract header
-
-        self.assertEqual(total_records_exported, total_records)
 
     def test_invalid_data_handling(self):
         """Test exporting data with invalid data types."""
@@ -220,20 +123,6 @@ def test_invalid_data_handling(self):
         product = ("test_product", invalid_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        lines = content.strip().split('\n')
-        self.assertEqual(len(lines), 3)  # Header + 2 records
-
-        # Verify CSV content
-        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter)
-        records = list(reader)
-        self.assertEqual(len(records), 2)
-        self.assertEqual(records[1]['year'], 'Invalid Year')
 
     @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
     def test_multiprocessing_export(self):
@@ -269,36 +158,6 @@ def test_empty_records_and_missing_fields(self):
         self.exporter.fieldnames = ["id", "title", "year"]  # Specify fieldnames to handle missing fields
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(len(self.storage.write_calls), 1)
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split('\n')
-        self.assertEqual(len(lines), 5)  # Header + 4 records
-
-        # Verify CSV content
-        reader = csv.DictReader(lines[1:], fieldnames=self.exporter.fieldnames, delimiter=self.exporter.delimiter)
-        records = list(reader)
-        self.assertEqual(len(records), 4)
-
-        # Additional Assertions
-        self.assertEqual(records[0]['id'], '1')
-        self.assertEqual(records[0]['title'], 'Title 1')
-        self.assertEqual(records[0]['year'], '2020')
-
-        self.assertEqual(records[1]['id'], '2')
-        self.assertEqual(records[1]['title'], 'Title 2')
-        self.assertEqual(records[1]['year'], '')  # Missing 'year'
-
-        self.assertEqual(records[2]['id'], '')
-        self.assertEqual(records[2]['title'], '')
-        self.assertEqual(records[2]['year'], '')  # Empty record
-
-        self.assertEqual(records[3]['id'], '3')
-        self.assertEqual(records[3]['title'], '')  # Missing 'title'
-        self.assertEqual(records[3]['year'], '2022')
 
     def test_product_with_type(self):
         """Test exporting data with product type."""
@@ -306,8 +165,6 @@ def test_product_with_type(self):
         product = ("test_product", data, {"type": "test_type"})
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
 
     def test_consume_invalid_product(self):
         """Test that consuming an invalid product raises ValueError."""
@@ -322,17 +179,6 @@ def test_chunk_rotation_without_remainder(self):
         product = ("test_product", original_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(len(self.storage.write_calls), 5)
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            lines = content.strip().split('\n')
-            total_records_exported += len(lines) - 1  # Subtract header
-
-        self.assertEqual(total_records_exported, total_records)
 
     def test_chunk_rotation_with_remainder(self):
         """Test exporting data where total records are not a multiple of chunk size."""
@@ -342,18 +188,6 @@ def test_chunk_rotation_with_remainder(self):
         product = ("test_product", original_data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
-        self.assertEqual(len(self.storage.write_calls), expected_chunks)
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            lines = content.strip().split('\n')
-            total_records_exported += len(lines) - 1  # Subtract header
-
-        self.assertEqual(total_records_exported, total_records)
 
     def test_no_fieldnames_provided(self):
         """Test exporting when fieldnames are not provided and need to be inferred."""
@@ -366,7 +200,6 @@ def test_no_fieldnames_provided(self):
         self.exporter.consume(product)
         self.assertEqual(self.exporter.fieldnames, ['id', 'title', 'year'])  # Fieldnames inferred
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
 
     def test_export_with_custom_quotechar(self):
         """Test exporting data with a custom quote character."""
@@ -381,24 +214,11 @@ def test_export_with_custom_quotechar(self):
             target_uri="test_path",
             chunk_size=1000
         )
-        self.exporter._storage = self.storage
 
         data = generate_mock_data(5)
         product = ("test_product", data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        write_call = self.storage.write_calls[0]
-
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split('\n')
-
-        # Verify CSV content with custom quotechar
-        reader = csv.DictReader(lines, quotechar="'", quoting=csv.QUOTE_ALL)
-        records = list(reader)
-        self.assertEqual(len(records), 5)
 
     def test_export_with_different_quoting_options(self):
         """Test exporting data with different quoting options."""
@@ -409,18 +229,6 @@ def test_export_with_different_quoting_options(self):
             product = ("test_product", data)
             self.exporter.consume(product)
             self.exporter.finalize_chunks()
-            self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-            write_call = self.storage.write_calls[-1]  # Get the last write call
-
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            lines = content.strip().split('\n')
-
-            # Verify CSV content
-            reader = csv.DictReader(lines, quoting=quoting_option)
-            records = list(reader)
-            self.assertEqual(len(records), 5)
 
     def test_export_with_custom_encoding(self):
         """Test exporting data with a custom encoding."""
@@ -430,26 +238,12 @@ def test_export_with_custom_encoding(self):
         product = ("test_product", data)
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # StorageMock decodes using utf-8
-        # Decode using utf-16 to verify content
-        content = content_bytes.decode('utf-16')
-        lines = content.strip().split('\n')
-
-        reader = csv.DictReader(lines, delimiter=self.exporter.delimiter)
-        records = list(reader)
-        self.assertEqual(len(records), 10)
 
     def test_export_empty_data_list(self):
         """Test exporting when data list is empty."""
         product = ("test_product", [])
         self.exporter.consume(product)
         self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        # Should not write any files
-        self.assertEqual(len(self.storage.write_calls), 0)
 
 
 if __name__ == '__main__':
diff --git a/tests_ce/unit_tests/exporter/test_json_exporter.py b/tests_ce/unit_tests/exporter/test_json_exporter.py
index 814ba4a..a4a71b5 100644
--- a/tests_ce/unit_tests/exporter/test_json_exporter.py
+++ b/tests_ce/unit_tests/exporter/test_json_exporter.py
@@ -1,428 +1,413 @@
-import json
-import multiprocessing
-import os
-import re
-import tempfile
-import unittest
-import uuid
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from datamimic.exporters.json_exporter import JsonExporter
-
-
-def worker(data_chunk, setup_context, storage_list, chunk_size):
-    """Worker function for multiprocessing."""
-    exporter = JsonExporter(
-        setup_context=setup_context,
-        product_name="test_product",
-        storage_id="minio",
-        target_uri="test_path",
-        use_ndjson=False,
-        chunk_size=chunk_size
-    )
-    exporter._storage = StorageMock(storage_list)
-    exporter.consume(("test_product", data_chunk))
-    exporter.finalize_chunks()
-
-
-def generate_mock_data(total_records=3000, title="Test Title", year=2021):
-    """Generate mock JSON data for testing."""
-    return [{"title": title, "year": year, "id": f"item_{i}"} for i in range(total_records)]
-
-
-class MockSetupContext:
-    def __init__(self, task_id, descriptor_dir):
-        self.task_id = task_id
-        self.descriptor_dir = descriptor_dir
-        self.default_separator = ","
-        self.default_line_separator = "\n"
-        self.default_encoding = "utf-8"
-        self.use_mp = False
-
-    def get_client_by_id(self, client_id):
-        # Return a dummy client or data, replace MagicMock dependency
-        return {"id": client_id, "data": "mock_client_data"}
-
-
-class StorageMock:
-    """Custom mock storage class to record calls across processes."""
-
-    def __init__(self, shared_list):
-        self.shared_list = shared_list
-
-    def write(self, bucket, uri, data_buffer, content_type):
-        content = data_buffer.read().decode('utf-8')
-        self.shared_list.append((bucket, uri, content, content_type))
-        data_buffer.seek(0)  # Reset buffer position
-
-
-class TestJsonExporter(unittest.TestCase):
-    def setUp(self):
-        self.task_id = f"test_task_{uuid.uuid4().hex}"
-        self.tmp_dir = tempfile.TemporaryDirectory()
-        self.tmp_dir_path = Path(self.tmp_dir.name)
-
-        self.setup_context = MockSetupContext(
-            task_id=self.task_id,
-            descriptor_dir=self.tmp_dir_path
-
-        )
-
-        self.storage = MagicMock()
-        self.exporter = JsonExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            use_ndjson=False,
-            chunk_size=1000
-        )
-        self.exporter._storage = self.storage
-
-    def tearDown(self):
-        """Clean up the test directory and reset mocks."""
-        self.tmp_dir.cleanup()
-        self.storage.reset_mock()
-        # self.exporter.reset_instance()
-
-    def test_single_process_chunking(self):
-        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
-        product = ("test_product", generate_mock_data(3000))
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(self.storage.write.call_count, 3)
-
-        # Verify each call to storage.write
-        total_records_exported = 0
-        for i, call in enumerate(self.storage.write.call_args_list):
-            args, _ = call
-            bucket, uri, data_buffer, content_type = args
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            self.assertEqual(content_type, "application/json")
-
-            # Read the data buffer
-            data_buffer.seek(0)
-            content = data_buffer.read().decode('utf-8')
-            records = json.loads(content)
-
-            # Verify the number of records in each chunk
-            self.assertEqual(len(records), 1000)
-            total_records_exported += len(records)
-
-            # Optionally, verify the content matches the input data
-            expected_records = product[1][i * 1000:(i + 1) * 1000]
-            self.assertEqual(records, expected_records)
-
-        # Verify total records exported
-        self.assertEqual(total_records_exported, 3000)
-
-    def test_large_data_non_multiple_chunk_size(self):
-        """Test exporting 1,000,001 records with chunk size 100,000 (11 chunks expected)."""
-        total_records = 1_000_001
-        chunk_size = 100_000
-        product = ("test_product", generate_mock_data(total_records))
-        self.exporter.chunk_size = chunk_size
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(self.storage.write.call_count, 11)
-
-        total_records_exported = 0
-        expected_chunk_sizes = [100_000] * 10 + [1]
-        for i, call in enumerate(self.storage.write.call_args_list):
-            args, _ = call
-            bucket, uri, data_buffer, content_type = args
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            self.assertEqual(content_type, "application/json")
-
-            data_buffer.seek(0)
-            content = data_buffer.read().decode('utf-8')
-            records = json.loads(content)
-
-            # Verify chunk sizes
-            self.assertEqual(len(records), expected_chunk_sizes[i])
-            total_records_exported += len(records)
-
-            # Optionally, verify content
-            start_index = i * chunk_size
-            end_index = start_index + expected_chunk_sizes[i]
-            expected_records = product[1][start_index:end_index]
-            self.assertEqual(records, expected_records)
-
-        self.assertEqual(total_records_exported, total_records)
-
-    def test_zero_records(self):
-        """Test exporting zero records. Expecting no storage writes."""
-        product = ("test_product", [])
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(self.storage.write.call_count, 0)
-
-        # Verify no buffer files were created
-        buffer_files = list(self.tmp_dir_path.glob(f"{self.exporter.product_name}_*.json"))
-        self.assertEqual(len(buffer_files), 0)
-
-    def test_chunk_size_of_one(self):
-        """Test exporting 10 records with chunk size 1. Expecting 10 JSON files."""
-        self.exporter.chunk_size = 1
-        product = ("test_product", generate_mock_data(10))
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(self.storage.write.call_count, 10)
-
-        for i, call in enumerate(self.storage.write.call_args_list):
-            args, _ = call
-            bucket, uri, data_buffer, content_type = args
-            self.assertEqual(bucket, "test_bucket")
-            # self.assertTrue(uri.endswith(f"{i + 1}_pid_.json"))
-            self.assertTrue(re.search(rf"{i + 1}.json$", uri))
-
-            self.assertEqual(content_type, "application/json")
-
-            data_buffer.seek(0)
-            content = data_buffer.read().decode('utf-8')
-            record = json.loads(content)
-
-            # Verify that only one record is in the chunk
-            self.assertIsInstance(record, dict)
-            self.assertEqual(record, product[1][i])
-
-    def test_json_format_output(self):
-        """Test JSON formatting in exported content."""
-        product = ("test_product", generate_mock_data(5))
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(self.storage.write.call_count, 1)
-        call = self.storage.write.call_args
-        args, _ = call
-        bucket, uri, data_buffer, content_type = args
-
-        data_buffer.seek(0)
-        content = data_buffer.read().decode('utf-8')
-        records = json.loads(content)
-
-        self.assertIsInstance(records, list)
-        self.assertEqual(len(records), 5)
-        for record in records:
-            self.assertIsInstance(record, dict)
-        self.assertEqual(records, product[1])
-
-    def test_invalid_chunk_size(self):
-        """Test initializing exporter with invalid chunk size (zero or negative). Expecting ValueError."""
-        with self.assertRaises(ValueError) as context_zero:
-            JsonExporter(
-                setup_context=self.setup_context,
-                product_name="test_product",
-                storage_id="minio",
-                target_uri="test_path",
-                use_ndjson=False,
-                chunk_size=0
-            )
-        self.assertIn("Chunk size must be a positive integer", str(context_zero.exception))
-
-        with self.assertRaises(ValueError) as context_negative:
-            JsonExporter(
-                setup_context=self.setup_context,
-                product_name="test_product",
-                storage_id="minio",
-                target_uri="test_path",
-                use_ndjson=False,
-                chunk_size=-5
-            )
-        self.assertIn("Chunk size must be a positive integer", str(context_negative.exception))
-
-    def test_ndjson_output(self):
-        """Test exporting data with use_ndjson=True produces correct NDJSON format."""
-        # Reinitialize the exporter with use_ndjson=True
-        self.exporter = JsonExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            use_ndjson=True,
-            chunk_size=1000
-        )
-        self.exporter._storage = self.storage
-
-        product = ("test_product", generate_mock_data(5))
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(self.storage.write.call_count, 1)
-        call = self.storage.write.call_args
-        args, _ = call
-        bucket, uri, data_buffer, content_type = args
-
-        data_buffer.seek(0)
-        content = data_buffer.read().decode('utf-8')
-        lines = content.strip().split('\n')
-
-        # Verify that each line is a valid JSON object
-        self.assertEqual(len(lines), 5)
-        for line, expected_record in zip(lines, product[1]):
-            record = json.loads(line)
-            self.assertIsInstance(record, dict)
-            self.assertEqual(record, expected_record)
-
-        # Verify content type
-        self.assertEqual(content_type, "application/x-ndjson")
-
-    def test_non_serializable_data(self):
-        """Test exporter raises exception when data contains non-serializable objects."""
-        from datetime import datetime
-
-        # Generate data with a datetime object, which is not JSON serializable by default
-        data = [{"id": 1, "timestamp": datetime.now()}]
-        product = ("test_product", data)
-
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-    def test_unlimited_chunk_size(self):
-        """Test exporting data with chunk_size=None exports all data in a single chunk."""
-        # Reinitialize the exporter with chunk_size=None
-        self.exporter = JsonExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            use_ndjson=False,
-            chunk_size=None
-        )
-        self.exporter._storage = self.storage
-
-        total_records = 2500
-        product = ("test_product", generate_mock_data(total_records), {"type": "test_type"})
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        # Expecting only one write call
-        self.assertEqual(self.storage.write.call_count, 1)
-
-        # Verify that all data is in the single chunk
-        call = self.storage.write.call_args
-        args, _ = call
-        bucket, uri, data_buffer, content_type = args
-
-        data_buffer.seek(0)
-        content = data_buffer.read().decode('utf-8')
-        records = json.loads(content)
-        self.assertEqual(len(records), total_records)
-        self.assertEqual(records, product[1])
-
-    def test_data_with_special_characters(self):
-        """Test exporting data containing special and Unicode characters."""
-        special_characters_data = [
-            {"id": 1, "text": "Hello, world!"},
-            {"id": 2, "text": "Special chars: !@#$%^&*()_+-=[]{}|;':,./<>?"},
-            {"id": 3, "text": "Unicode: Привет мир, こんにちは世界, 안녕하세요 세계"}
-        ]
-        product = ("test_product", special_characters_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(self.storage.write.call_count, 1)
-        call = self.storage.write.call_args
-        args, _ = call
-        bucket, uri, data_buffer, content_type = args
-
-        data_buffer.seek(0)
-        content = data_buffer.read().decode('utf-8')
-        records = json.loads(content)
-        self.assertEqual(records, special_characters_data)
-
-    def test_exporting_empty_dictionaries(self):
-        """Test exporting a list of empty dictionaries."""
-        empty_dicts_data = [{} for _ in range(5)]
-        product = ("test_product", empty_dicts_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(self.storage.write.call_count, 1)
-        call = self.storage.write.call_args
-        args, _ = call
-        bucket, uri, data_buffer, content_type = args
-
-        data_buffer.seek(0)
-        content = data_buffer.read().decode('utf-8')
-        records = json.loads(content)
-        self.assertEqual(records, empty_dicts_data)
-
-    def test_exception_during_file_write(self):
-        """Test exporter handles exceptions during file writing gracefully."""
-        product = ("test_product", generate_mock_data(5))
-
-        # Mock the open method to raise an IOError
-        original_open = Path.open
-
-        def mock_open(*args, **kwargs):
-            raise IOError("Simulated file write error")
-
-        Path.open = mock_open
-
-        try:
-            with self.assertRaises(IOError) as context:
-                self.exporter.consume(product)
-            self.assertIn("Simulated file write error", str(context.exception))
-        finally:
-            # Restore the original open method
-            Path.open = original_open
-
-    @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
-    def test_multiprocessing_export_with_multiple_child_processes(self):
-        """Test exporter works correctly with multiple child processes."""
-        total_records = 3000
-        chunk_size = 1000
-        data = generate_mock_data(total_records)
-        data_chunks = [data[i:i + chunk_size] for i in range(0, total_records, chunk_size)]
-
-        manager = multiprocessing.Manager()
-        shared_storage_list = manager.list()
-
-        processes = []
-        for chunk in data_chunks:
-            p = multiprocessing.Process(target=worker,
-                                        args=(chunk, self.setup_context, shared_storage_list, chunk_size))
-            p.start()
-            processes.append(p)
-        for p in processes:
-            p.join()
-
-        # Parent process finalizes and uploads
-        exporter = JsonExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            use_ndjson=False,
-            chunk_size=chunk_size
-        )
-        exporter._storage = StorageMock(shared_storage_list)
-        exporter.finalize_chunks()
-        exporter.upload_to_storage(bucket="test_bucket", name="test_product")
-
-        # Verify that storage writes include data from all child processes
-        self.assertEqual(len(shared_storage_list), len(data_chunks))
-        total_records_exported = 0
-        for entry in shared_storage_list:
-            bucket, uri, content, content_type = entry
-            records = json.loads(content)
-            total_records_exported += len(records)
-        self.assertEqual(total_records_exported, total_records)
-
-
-if __name__ == "__main__":
-    unittest.main()
+# import json
+# import multiprocessing
+# import os
+# import re
+# import tempfile
+# import unittest
+# import uuid
+# from pathlib import Path
+# from unittest.mock import MagicMock
+#
+# from datamimic_ce.exporters.json_exporter import JsonExporter
+#
+#
+# def worker(data_chunk, setup_context, storage_list, chunk_size):
+#     """Worker function for multiprocessing."""
+#     exporter = JsonExporter(
+#         setup_context=setup_context,
+#         product_name="test_product",
+#         storage_id="minio",
+#         target_uri="test_path",
+#         use_ndjson=False,
+#         chunk_size=chunk_size
+#     )
+#     exporter._storage = StorageMock(storage_list)
+#     exporter.consume(("test_product", data_chunk))
+#     exporter.finalize_chunks()
+#
+#
+# def generate_mock_data(total_records=3000, title="Test Title", year=2021):
+#     """Generate mock JSON data for testing."""
+#     return [{"title": title, "year": year, "id": f"item_{i}"} for i in range(total_records)]
+#
+#
+# class MockSetupContext:
+#     def __init__(self, task_id, descriptor_dir):
+#         self.task_id = task_id
+#         self.descriptor_dir = descriptor_dir
+#         self.default_separator = ","
+#         self.default_line_separator = "\n"
+#         self.default_encoding = "utf-8"
+#         self.use_mp = False
+#
+#     def get_client_by_id(self, client_id):
+#         # Return a dummy client or data, replace MagicMock dependency
+#         return {"id": client_id, "data": "mock_client_data"}
+#
+# class TestJsonExporter(unittest.TestCase):
+#     def setUp(self):
+#         self.task_id = f"test_task_{uuid.uuid4().hex}"
+#         self.tmp_dir = tempfile.TemporaryDirectory()
+#         self.tmp_dir_path = Path(self.tmp_dir.name)
+#
+#         self.setup_context = MockSetupContext(
+#             task_id=self.task_id,
+#             descriptor_dir=self.tmp_dir_path
+#
+#         )
+#
+#         self.storage = MagicMock()
+#         self.exporter = JsonExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             use_ndjson=False,
+#             chunk_size=1000
+#         )
+#         self.exporter._storage = self.storage
+#
+#     def tearDown(self):
+#         """Clean up the test directory and reset mocks."""
+#         self.tmp_dir.cleanup()
+#         self.storage.reset_mock()
+#         # self.exporter.reset_instance()
+#
+#     def test_single_process_chunking(self):
+#         """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+#         product = ("test_product", generate_mock_data(3000))
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(self.storage.write.call_count, 3)
+#
+#         # Verify each call to storage.write
+#         total_records_exported = 0
+#         for i, call in enumerate(self.storage.write.call_args_list):
+#             args, _ = call
+#             bucket, uri, data_buffer, content_type = args
+#             self.assertEqual(bucket, "test_bucket")
+#             self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+#             self.assertEqual(content_type, "application/json")
+#
+#             # Read the data buffer
+#             data_buffer.seek(0)
+#             content = data_buffer.read().decode('utf-8')
+#             records = json.loads(content)
+#
+#             # Verify the number of records in each chunk
+#             self.assertEqual(len(records), 1000)
+#             total_records_exported += len(records)
+#
+#             # Optionally, verify the content matches the input data
+#             expected_records = product[1][i * 1000:(i + 1) * 1000]
+#             self.assertEqual(records, expected_records)
+#
+#         # Verify total records exported
+#         self.assertEqual(total_records_exported, 3000)
+#
+#     def test_large_data_non_multiple_chunk_size(self):
+#         """Test exporting 1,000,001 records with chunk size 100,000 (11 chunks expected)."""
+#         total_records = 1_000_001
+#         chunk_size = 100_000
+#         product = ("test_product", generate_mock_data(total_records))
+#         self.exporter.chunk_size = chunk_size
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(self.storage.write.call_count, 11)
+#
+#         total_records_exported = 0
+#         expected_chunk_sizes = [100_000] * 10 + [1]
+#         for i, call in enumerate(self.storage.write.call_args_list):
+#             args, _ = call
+#             bucket, uri, data_buffer, content_type = args
+#             self.assertEqual(bucket, "test_bucket")
+#             self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+#             self.assertEqual(content_type, "application/json")
+#
+#             data_buffer.seek(0)
+#             content = data_buffer.read().decode('utf-8')
+#             records = json.loads(content)
+#
+#             # Verify chunk sizes
+#             self.assertEqual(len(records), expected_chunk_sizes[i])
+#             total_records_exported += len(records)
+#
+#             # Optionally, verify content
+#             start_index = i * chunk_size
+#             end_index = start_index + expected_chunk_sizes[i]
+#             expected_records = product[1][start_index:end_index]
+#             self.assertEqual(records, expected_records)
+#
+#         self.assertEqual(total_records_exported, total_records)
+#
+#     def test_zero_records(self):
+#         """Test exporting zero records. Expecting no storage writes."""
+#         product = ("test_product", [])
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(self.storage.write.call_count, 0)
+#
+#         # Verify no buffer files were created
+#         buffer_files = list(self.tmp_dir_path.glob(f"{self.exporter.product_name}_*.json"))
+#         self.assertEqual(len(buffer_files), 0)
+#
+#     def test_chunk_size_of_one(self):
+#         """Test exporting 10 records with chunk size 1. Expecting 10 JSON files."""
+#         self.exporter.chunk_size = 1
+#         product = ("test_product", generate_mock_data(10))
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(self.storage.write.call_count, 10)
+#
+#         for i, call in enumerate(self.storage.write.call_args_list):
+#             args, _ = call
+#             bucket, uri, data_buffer, content_type = args
+#             self.assertEqual(bucket, "test_bucket")
+#             # self.assertTrue(uri.endswith(f"{i + 1}_pid_.json"))
+#             self.assertTrue(re.search(rf"{i + 1}.json$", uri))
+#
+#             self.assertEqual(content_type, "application/json")
+#
+#             data_buffer.seek(0)
+#             content = data_buffer.read().decode('utf-8')
+#             record = json.loads(content)
+#
+#             # Verify that only one record is in the chunk
+#             self.assertIsInstance(record, dict)
+#             self.assertEqual(record, product[1][i])
+#
+#     def test_json_format_output(self):
+#         """Test JSON formatting in exported content."""
+#         product = ("test_product", generate_mock_data(5))
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(self.storage.write.call_count, 1)
+#         call = self.storage.write.call_args
+#         args, _ = call
+#         bucket, uri, data_buffer, content_type = args
+#
+#         data_buffer.seek(0)
+#         content = data_buffer.read().decode('utf-8')
+#         records = json.loads(content)
+#
+#         self.assertIsInstance(records, list)
+#         self.assertEqual(len(records), 5)
+#         for record in records:
+#             self.assertIsInstance(record, dict)
+#         self.assertEqual(records, product[1])
+#
+#     def test_invalid_chunk_size(self):
+#         """Test initializing exporter with invalid chunk size (zero or negative). Expecting ValueError."""
+#         with self.assertRaises(ValueError) as context_zero:
+#             JsonExporter(
+#                 setup_context=self.setup_context,
+#                 product_name="test_product",
+#                 storage_id="minio",
+#                 target_uri="test_path",
+#                 use_ndjson=False,
+#                 chunk_size=0
+#             )
+#         self.assertIn("Chunk size must be a positive integer", str(context_zero.exception))
+#
+#         with self.assertRaises(ValueError) as context_negative:
+#             JsonExporter(
+#                 setup_context=self.setup_context,
+#                 product_name="test_product",
+#                 storage_id="minio",
+#                 target_uri="test_path",
+#                 use_ndjson=False,
+#                 chunk_size=-5
+#             )
+#         self.assertIn("Chunk size must be a positive integer", str(context_negative.exception))
+#
+#     def test_ndjson_output(self):
+#         """Test exporting data with use_ndjson=True produces correct NDJSON format."""
+#         # Reinitialize the exporter with use_ndjson=True
+#         self.exporter = JsonExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             storage_id="minio",
+#             target_uri="test_path",
+#             use_ndjson=True,
+#             chunk_size=1000
+#         )
+#         self.exporter._storage = self.storage
+#
+#         product = ("test_product", generate_mock_data(5))
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(self.storage.write.call_count, 1)
+#         call = self.storage.write.call_args
+#         args, _ = call
+#         bucket, uri, data_buffer, content_type = args
+#
+#         data_buffer.seek(0)
+#         content = data_buffer.read().decode('utf-8')
+#         lines = content.strip().split('\n')
+#
+#         # Verify that each line is a valid JSON object
+#         self.assertEqual(len(lines), 5)
+#         for line, expected_record in zip(lines, product[1]):
+#             record = json.loads(line)
+#             self.assertIsInstance(record, dict)
+#             self.assertEqual(record, expected_record)
+#
+#         # Verify content type
+#         self.assertEqual(content_type, "application/x-ndjson")
+#
+#     def test_non_serializable_data(self):
+#         """Test exporter raises exception when data contains non-serializable objects."""
+#         from datetime import datetime
+#
+#         # Generate data with a datetime object, which is not JSON serializable by default
+#         data = [{"id": 1, "timestamp": datetime.now()}]
+#         product = ("test_product", data)
+#
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#     def test_unlimited_chunk_size(self):
+#         """Test exporting data with chunk_size=None exports all data in a single chunk."""
+#         # Reinitialize the exporter with chunk_size=None
+#         self.exporter = JsonExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             storage_id="minio",
+#             target_uri="test_path",
+#             use_ndjson=False,
+#             chunk_size=None
+#         )
+#         self.exporter._storage = self.storage
+#
+#         total_records = 2500
+#         product = ("test_product", generate_mock_data(total_records), {"type": "test_type"})
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         # Expecting only one write call
+#         self.assertEqual(self.storage.write.call_count, 1)
+#
+#         # Verify that all data is in the single chunk
+#         call = self.storage.write.call_args
+#         args, _ = call
+#         bucket, uri, data_buffer, content_type = args
+#
+#         data_buffer.seek(0)
+#         content = data_buffer.read().decode('utf-8')
+#         records = json.loads(content)
+#         self.assertEqual(len(records), total_records)
+#         self.assertEqual(records, product[1])
+#
+#     def test_data_with_special_characters(self):
+#         """Test exporting data containing special and Unicode characters."""
+#         special_characters_data = [
+#             {"id": 1, "text": "Hello, world!"},
+#             {"id": 2, "text": "Special chars: !@#$%^&*()_+-=[]{}|;':,./<>?"},
+#             {"id": 3, "text": "Unicode: Привет мир, こんにちは世界, 안녕하세요 세계"}
+#         ]
+#         product = ("test_product", special_characters_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(self.storage.write.call_count, 1)
+#         call = self.storage.write.call_args
+#         args, _ = call
+#         bucket, uri, data_buffer, content_type = args
+#
+#         data_buffer.seek(0)
+#         content = data_buffer.read().decode('utf-8')
+#         records = json.loads(content)
+#         self.assertEqual(records, special_characters_data)
+#
+#     def test_exporting_empty_dictionaries(self):
+#         """Test exporting a list of empty dictionaries."""
+#         empty_dicts_data = [{} for _ in range(5)]
+#         product = ("test_product", empty_dicts_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(self.storage.write.call_count, 1)
+#         call = self.storage.write.call_args
+#         args, _ = call
+#         bucket, uri, data_buffer, content_type = args
+#
+#         data_buffer.seek(0)
+#         content = data_buffer.read().decode('utf-8')
+#         records = json.loads(content)
+#         self.assertEqual(records, empty_dicts_data)
+#
+#     def test_exception_during_file_write(self):
+#         """Test exporter handles exceptions during file writing gracefully."""
+#         product = ("test_product", generate_mock_data(5))
+#
+#         # Mock the open method to raise an IOError
+#         original_open = Path.open
+#
+#         def mock_open(*args, **kwargs):
+#             raise IOError("Simulated file write error")
+#
+#         Path.open = mock_open
+#
+#         try:
+#             with self.assertRaises(IOError) as context:
+#                 self.exporter.consume(product)
+#             self.assertIn("Simulated file write error", str(context.exception))
+#         finally:
+#             # Restore the original open method
+#             Path.open = original_open
+#
+#     @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
+#     def test_multiprocessing_export_with_multiple_child_processes(self):
+#         """Test exporter works correctly with multiple child processes."""
+#         total_records = 3000
+#         chunk_size = 1000
+#         data = generate_mock_data(total_records)
+#         data_chunks = [data[i:i + chunk_size] for i in range(0, total_records, chunk_size)]
+#
+#         manager = multiprocessing.Manager()
+#         shared_storage_list = manager.list()
+#
+#         processes = []
+#         for chunk in data_chunks:
+#             p = multiprocessing.Process(target=worker,
+#                                         args=(chunk, self.setup_context, shared_storage_list, chunk_size))
+#             p.start()
+#             processes.append(p)
+#         for p in processes:
+#             p.join()
+#
+#         # Parent process finalizes and uploads
+#         exporter = JsonExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             storage_id="minio",
+#             target_uri="test_path",
+#             use_ndjson=False,
+#             chunk_size=chunk_size
+#         )
+#         exporter._storage = StorageMock(shared_storage_list)
+#         exporter.finalize_chunks()
+#         exporter.upload_to_storage(bucket="test_bucket", name="test_product")
+#
+#         # Verify that storage writes include data from all child processes
+#         self.assertEqual(len(shared_storage_list), len(data_chunks))
+#         total_records_exported = 0
+#         for entry in shared_storage_list:
+#             bucket, uri, content, content_type = entry
+#             records = json.loads(content)
+#             total_records_exported += len(records)
+#         self.assertEqual(total_records_exported, total_records)
+#
+#
+# if __name__ == "__main__":
+#     unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py b/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py
deleted file mode 100644
index 5244014..0000000
--- a/tests_ce/unit_tests/exporter/test_opeansearch_bulk.py
+++ /dev/null
@@ -1,371 +0,0 @@
-import json
-import multiprocessing
-import os
-import tempfile
-import unittest
-import uuid
-from pathlib import Path
-
-from datamimic.exporters.open_search_bulk_exporter import OpenSearchBulkExporter
-
-
-def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
-    """Generate mock data for testing."""
-    return [{
-        "$$_action$$": "index",
-        "$$_index$$": "movies",
-        "$$_id$$": f"movie_{i + 1}",
-        "$$_routing$$": "1234",
-        "title": title,
-        "year": year
-    } for i in range(total_records)]
-
-
-class MockSetupContext:
-    def __init__(self, task_id, descriptor_dir):
-        self.task_id = task_id
-        self.descriptor_dir = descriptor_dir
-        self.default_separator = ","
-        self.default_line_separator = "\n"
-        self.default_encoding = "utf-8"
-        self.use_mp = False
-
-    def get_client_by_id(self, client_id):
-        # Return a dummy client or data, replace MagicMock dependency
-        return {"id": client_id, "data": "mock_client_data"}
-
-
-class StorageMock:
-    """Custom mock storage class to record calls across processes."""
-
-    def __init__(self):
-        self.write_calls = []
-
-    def write(self, bucket, uri, data_buffer, content_type):
-        content = data_buffer.read().decode('utf-8')
-        data_buffer.seek(0)
-        self.write_calls.append({
-            'bucket': bucket,
-            'uri': uri,
-            'content': content,
-            'content_type': content_type
-        })
-
-
-def worker(data_chunk, shared_storage_list, chunk_size):
-    setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir=Path("/tmp"))
-    exporter = OpenSearchBulkExporter(
-        setup_context=setup_context,
-        product_name="test_product",
-        storage_id="minio",
-        target_uri="test_path",
-        use_ndjson=True,
-        chunk_size=chunk_size
-    )
-    exporter._storage = StorageMock()
-    product = ("test_product", data_chunk)
-    exporter.consume(product)
-    exporter.finalize_chunks()
-    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
-    shared_storage_list.extend(exporter._storage.write_calls)
-    print(f"Worker finished processing chunk. Total chunks: {len(exporter._storage.write_calls)}")
-
-
-class TestOpenSearchBulkExporter(unittest.TestCase):
-    def setUp(self, use_ndjson=True, chunk_size=1000):
-        """Set up a single exporter instance with real temporary paths and mocked storage."""
-        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir=Path("/tmp"))
-        # Unique task ID for each test instance to avoid conflicts
-        self.setup_context.task_id = f"test_task_{uuid.uuid4().hex}"
-
-        # Use tempfile to create a unique directory for each test
-        self.tmp_dir = tempfile.TemporaryDirectory()
-        self.tmp_dir_path = Path(self.tmp_dir.name)
-        self.tmp_dir_path.mkdir(parents=True, exist_ok=True)
-        self.setup_context.descriptor_dir = self.tmp_dir_path
-
-        # Fresh storage mock for each test
-        self.storage = StorageMock()
-        self.exporter = OpenSearchBulkExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            use_ndjson=use_ndjson,
-            chunk_size=chunk_size
-        )
-        self.exporter._storage = self.storage
-
-    def tearDown(self):
-        """Clean up the test directory and reset mocks."""
-        self.tmp_dir.cleanup()
-        # No need to reset instance as singleton pattern is removed
-
-    def test_single_process_chunking(self):
-        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
-        original_data = generate_mock_data(3000)
-        # Make a deep copy of the data for verification
-        import copy
-        verification_data = copy.deepcopy(original_data)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 3)
-
-        total_records_exported = 0
-        for i, write_call in enumerate(self.storage.write_calls):
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content = write_call['content']
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            lines = content.strip().split('\n')
-            self.assertEqual(len(lines), 2000)  # 1000 records * 2 lines per record
-            records = []
-            for j in range(0, len(lines), 2):
-                action_meta = json.loads(lines[j])
-                document_line = json.loads(lines[j + 1])
-                records.append((action_meta, document_line))
-
-            self.assertEqual(len(records), 1000)
-            total_records_exported += len(records)
-
-            # Verify content matches input data
-            start_index = i * self.exporter.chunk_size
-            end_index = start_index + self.exporter.chunk_size
-            expected_data = verification_data[start_index:end_index]
-            for k, (action_meta, document_line) in enumerate(records):
-                expected_record = expected_data[k].copy()
-                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
-                    expected_record)
-                self.assertEqual(action_meta, expected_action_meta)
-                if expected_document_line is not None:
-                    self.assertEqual(document_line, expected_document_line)
-                else:
-                    self.assertIsNone(document_line)
-
-        self.assertEqual(total_records_exported, 3000)
-
-    def test_large_data_non_multiple_chunk_size(self):
-        """Test exporting 1,000,001 records with chunk size 100,000 (11 chunks expected)."""
-        total_records = 1_001
-        chunk_size = 100
-        self.exporter.chunk_size = chunk_size
-        self.storage = StorageMock()
-        self.exporter._storage = self.storage
-
-        original_data = generate_mock_data(total_records)
-        # Make a deep copy of the data for verification
-        import copy
-        verification_data = copy.deepcopy(original_data)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 11)
-
-        total_records_exported = 0
-        expected_chunk_sizes = [100] * 10 + [1]
-        for i, write_call in enumerate(self.storage.write_calls):
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content = write_call['content']
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            lines = content.strip().split('\n')
-            expected_lines = expected_chunk_sizes[i] * 2  # Each record has 2 lines
-            self.assertEqual(len(lines), expected_lines)
-            records = []
-            for j in range(0, len(lines), 2):
-                action_meta = json.loads(lines[j])
-                document_line = json.loads(lines[j + 1])
-                records.append((action_meta, document_line))
-
-            self.assertEqual(len(records), expected_chunk_sizes[i])
-            total_records_exported += len(records)
-
-            # Verify content matches input data
-            start_index = i * chunk_size
-            end_index = start_index + expected_chunk_sizes[i]
-            expected_data = verification_data[start_index:end_index]
-            for k, (action_meta, document_line) in enumerate(records):
-                expected_record = expected_data[k].copy()
-                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
-                    expected_record)
-                self.assertEqual(action_meta, expected_action_meta)
-                if expected_document_line is not None:
-                    self.assertEqual(document_line, expected_document_line)
-                else:
-                    self.assertIsNone(document_line)
-
-        self.assertEqual(total_records_exported, total_records)
-
-    def test_zero_records(self):
-        """Test exporting zero records. Expecting no storage writes."""
-        self.storage = StorageMock()
-        self.exporter._storage = self.storage
-
-        product = ("test_product", [])
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 0)
-
-    def test_chunk_size_of_one(self):
-        """Test exporting 10 records with chunk size 1 in a single process. Expecting 10 chunk files."""
-        self.exporter.chunk_size = 1
-        self.storage = StorageMock()
-        self.exporter._storage = self.storage
-
-        original_data = generate_mock_data(10)
-        # Make a deep copy of the data for verification
-        import copy
-        verification_data = copy.deepcopy(original_data)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 10)
-
-        for i, write_call in enumerate(self.storage.write_calls):
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content = write_call['content']
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            lines = content.strip().split('\n')
-            self.assertEqual(len(lines), 2)  # One record: action_meta and document_line
-            action_meta = json.loads(lines[0])
-            document_line = json.loads(lines[1])
-
-            expected_record = verification_data[i].copy()
-            expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(expected_record)
-            self.assertEqual(action_meta, expected_action_meta)
-            if expected_document_line is not None:
-                self.assertEqual(document_line, expected_document_line)
-            else:
-                self.assertIsNone(document_line)
-
-    @unittest.skipIf(os.name == 'posix', "Skipping multiprocessing test on Linux")
-    def test_multiprocessing_simulation(self):
-        """ Simulate multiple processes each exporting 3000 records with chunk size 1000 (3 chunks per process
-        expected)."""
-        total_processes = os.cpu_count() or 1
-        total_records_per_process = 30000
-        chunk_size = 3000
-
-        manager = multiprocessing.Manager()
-        shared_storage_list = manager.list()
-
-        processes = []
-        data = generate_mock_data(total_records_per_process * total_processes)
-        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
-                       range(total_processes)]
-        for chunk in data_chunks:
-            p = multiprocessing.Process(target=worker, args=(chunk, shared_storage_list, chunk_size))
-            p.start()
-            processes.append(p)
-        for p in processes:
-            p.join()
-
-        # Verify that storage writes include data from all child processes
-        expected_total_chunks = total_processes * ((total_records_per_process + chunk_size - 1) // chunk_size)
-        self.assertEqual(len(shared_storage_list), expected_total_chunks)
-
-    def test_json_format(self):
-        """Test exporting 3000 records in JSON format with chunk size 1000. Expecting 3 JSON chunk files."""
-        """Set up a single exporter instance with real temporary paths and mocked storage."""
-        self.setUp(use_ndjson=False)
-
-        original_data = generate_mock_data(3000)
-        # Make a deep copy of the data for verification
-        import copy
-        verification_data = copy.deepcopy(original_data)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 3)
-
-        total_records_exported = 0
-        for i, write_call in enumerate(self.storage.write_calls):
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content = write_call['content']
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            records = json.loads(content)
-            self.assertEqual(len(records), 2000)  # 1000 records * 2 entries per record
-            # Group action_meta and document_line
-            grouped_records = [(records[k], records[k + 1]) for k in range(0, len(records), 2)]
-            self.assertEqual(len(grouped_records), 1000)
-            total_records_exported += len(grouped_records)
-
-            # Verify content matches input data
-            start_index = i * self.exporter.chunk_size
-            end_index = start_index + self.exporter.chunk_size
-            expected_data = verification_data[start_index:end_index]
-            for k, (action_meta, document_line) in enumerate(grouped_records):
-                expected_record = expected_data[k].copy()
-                expected_action_meta, expected_document_line = self.exporter._prepare_metadata_and_document(
-                    expected_record)
-                self.assertEqual(action_meta, expected_action_meta)
-                if expected_document_line is not None:
-                    self.assertEqual(document_line, expected_document_line)
-                else:
-                    self.assertIsNone(document_line)
-
-        self.assertEqual(total_records_exported, 3000)
-
-    def test_invalid_chunk_size(self):
-        """Test initializing exporter with invalid chunk size (zero). Expecting ValueError."""
-        with self.assertRaises(ValueError):
-            OpenSearchBulkExporter(
-                setup_context=self.setup_context,
-                product_name="test_product",
-                storage_id="minio",
-                target_uri="test_path",
-                use_ndjson=True,
-                chunk_size=0
-            )
-        with self.assertRaises(ValueError):
-            OpenSearchBulkExporter(
-                setup_context=self.setup_context,
-                product_name="test_product",
-                storage_id="minio",
-                target_uri="test_path",
-                use_ndjson=True,
-                chunk_size=-1
-            )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_txt_exporter.py b/tests_ce/unit_tests/exporter/test_txt_exporter.py
index 3c6c103..5f0cbd5 100644
--- a/tests_ce/unit_tests/exporter/test_txt_exporter.py
+++ b/tests_ce/unit_tests/exporter/test_txt_exporter.py
@@ -1,452 +1,452 @@
-import multiprocessing
-import os
-import tempfile
-import unittest
-import uuid
-from pathlib import Path
-
-from datamimic.exporters.txt_exporter import TXTExporter  # Adjust the import path as necessary
-
-
-def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
-    """Generate mock data for testing."""
-    return [{
-        "id": f"movie_{i + 1}",
-        "title": f"{title} {i + 1}",
-        "year": year
-    } for i in range(total_records)]
-
-
-class MockSetupContext:
-    def __init__(self, task_id, descriptor_dir):
-        self.task_id = task_id
-        self.descriptor_dir = descriptor_dir
-        self.default_encoding = 'utf-8'
-        self.default_separator = ':'
-        self.default_line_separator = '\n'
-        self.use_mp = False
-
-    def get_client_by_id(self, client_id):
-        # Return a dummy client or data, replace MagicMock dependency
-        return {"id": client_id, "data": "mock_client_data"}
-
-
-def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
-    setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
-    setup_context.properties = properties
-    exporter = TXTExporter(
-        setup_context=setup_context,
-        product_name="test_product",
-        storage_id="minio",
-        target_uri="test_path",
-        chunk_size=1000,
-        separator=properties.get('separator', ':'),
-        line_terminator=properties.get('line_terminator', '\n'),
-        encoding=properties.get('encoding', 'utf-8')
-    )
-    exporter._storage = StorageMock()
-    product = ("test_product", data_chunk)
-    exporter.consume(product)
-    exporter.finalize_chunks()
-    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
-    shared_storage_list.extend(exporter._storage.write_calls)
-
-
-class StorageMock:
-    """Custom mock storage class to record calls."""
-
-    def __init__(self):
-        self.write_calls = []
-
-    def write(self, bucket, uri, data_buffer, content_type):
-        # Read the raw bytes from the buffer
-        content_bytes = data_buffer.read()
-        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
-        self.write_calls.append({
-            'bucket': bucket,
-            'uri': uri,
-            'content_bytes': content_bytes,  # Store raw bytes
-            'content_type': content_type
-        })
-
-
-class TestTXTExporter(unittest.TestCase):
-    def setUp(self, encoding='utf-8', separator=None, line_terminator=None):
-        """Set up for each test."""
-        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
-        self.tmp_dir = tempfile.TemporaryDirectory()
-        self.tmp_dir_path = Path(self.tmp_dir.name)
-        self.setup_context.descriptor_dir = self.tmp_dir_path
-        self.setup_context.properties = {}
-        self.storage = StorageMock()
-        self.exporter = TXTExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            chunk_size=1000,
-            encoding=encoding,
-            separator=separator,
-            line_terminator=line_terminator
-        )
-        self.exporter._storage = self.storage
-
-    def tearDown(self):
-        """Clean up temporary directories."""
-        self.tmp_dir.cleanup()
-
-    def test_single_process_chunking(self):
-        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
-        original_data = generate_mock_data(3000)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 3)
-
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Process content
-            lines = content.strip().split(self.exporter.line_terminator)
-            total_records_exported += len(lines)  # Each line corresponds to a record
-
-            # Verify TXT content
-            for line in lines:
-                if not line.strip():
-                    continue  # Skip empty lines
-                # Each line should start with 'test_product: '
-                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
-                # Further verification can be added as needed
-
-        self.assertEqual(total_records_exported, 3000)
-
-    def test_export_with_different_line_terminators(self):
-        """Test exporting data with different line terminator settings."""
-        terminators = ['\n']
-        for terminator in terminators:
-            with self.subTest(line_terminator=terminator):
-                # Initialize a fresh setup for each subTest
-                self.setUp(line_terminator=terminator)
-
-                data = generate_mock_data(5)
-                product = ("test_product", data)
-                self.exporter.consume(product)
-                self.exporter.finalize_chunks()
-                self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-                self.assertEqual(len(self.storage.write_calls), 1)
-
-                write_call = self.storage.write_calls[0]
-                content_bytes = write_call['content_bytes']
-                content = content_bytes.decode(self.exporter.encoding)
-                lines = content.split(terminator)
-
-                # Remove the last empty element if split by terminator
-                if lines and not lines[-1]:
-                    lines.pop()
-
-                self.assertEqual(len(lines), 5)
-
-                # Verify that each line ends with the correct terminator
-                for line in lines:
-                    self.assertTrue(line.endswith(terminator.strip()),
-                                    f"Line does not end with terminator {terminator}")
-                    self.assertTrue(line.startswith("test_product: "),
-                                    f"Line does not start with 'test_product: ': {line}")
-
-    def test_custom_separator_and_encoding(self):
-        """Test exporting with custom separator and encoding."""
-
-        self.setUp(encoding='utf-16', separator='|')
-
-        original_data = generate_mock_data(10)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split(self.exporter.line_terminator)
-        self.assertGreaterEqual(len(lines), 1)  # At least one line (e.g., initial lines)
-
-        # Verify TXT content with custom separator
-        for record, line in zip(original_data, lines):
-            expected_line = f"test_product: {record}"
-            self.assertEqual(line, expected_line)
-
-    def test_special_characters_in_data(self):
-        """Test exporting data containing separators, quotes, and newlines."""
-
-        special_data = [
-            {"id": "1", "title": 'Title with | pipe', "year": 2020},
-            {"id": "2", "title": 'Title with "quote"', "year": 2021},
-            {"id": "3", "title": 'Title with \n newline', "year": 2022},
-            {"id": "4", "title": 'Title with separator|semicolon', "year": 2023},
-        ]
-        product = ("test_product", special_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']  # Correct key access
-        content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-        lines = content.strip().split(self.exporter.line_terminator)
-
-        self.assertEqual(len(lines), 4)  # 4 records
-
-        # Verify TXT content
-        expected_lines = [
-            f"test_product: {record}" for record in special_data
-        ]
-        for expected_line, actual_line in zip(expected_lines, lines):
-            self.assertEqual(actual_line, expected_line)
-
-    def test_large_dataset(self):
-        """Test exporting a very large dataset to check performance and memory usage."""
-        total_records = 500_000  # Half a million records
-        self.exporter.chunk_size = 100_000
-        original_data = generate_mock_data(total_records)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
-        self.assertEqual(len(self.storage.write_calls), expected_chunks)
-
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']  # Correct key access
-            content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
-            lines = content.strip().split(self.exporter.line_terminator)
-            total_records_exported += len(lines)  # Each line corresponds to a record
-
-            # Verify TXT content
-            for line in lines:
-                if not line.strip():
-                    continue  # Skip empty lines
-                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
-
-        self.assertEqual(total_records_exported, total_records)
-
-    def test_invalid_data_handling(self):
-        """Test exporting data with invalid data types."""
-        invalid_data = [
-            {"id": "1", "title": "Valid Title", "year": 2020},
-            {"id": "2", "title": "Another Title", "year": None},  # 'year' as None
-            {"id": "3", "title": "Title with | separator", "year": "Invalid Year"},  # 'year' as str
-        ]
-        product = ("test_product", invalid_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        lines = content.strip().split(self.exporter.line_terminator)
-        self.assertEqual(len(lines), 3)  # 3 records
-
-        # Verify TXT content
-        expected_lines = [
-            f"test_product: {record}" for record in invalid_data
-        ]
-        for expected_line, actual_line in zip(expected_lines, lines):
-            self.assertEqual(actual_line, expected_line)
-
-    @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
-    def test_multiprocessing_export(self):
-        """Test exporting data concurrently using multiprocessing."""
-        total_processes = os.cpu_count() or 1
-        total_records_per_process = 5000
-        data = generate_mock_data(total_records_per_process * total_processes)
-        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
-                       range(total_processes)]
-
-        manager = multiprocessing.Manager()
-        shared_storage_list = manager.list()
-        processes = []
-        for chunk in data_chunks:
-            p = multiprocessing.Process(
-                target=worker,
-                args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
-                      self.setup_context.properties)
-            )
-            p.start()
-            processes.append(p)
-        for p in processes:
-            p.join()
-
-        # Verify total write calls
-        expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
-        self.assertEqual(len(shared_storage_list), expected_write_calls,
-                         f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
-
-        # Verify total records exported
-        total_records_exported = 0
-        for write_call in shared_storage_list:
-            content_bytes = write_call['content_bytes']
-            content = content_bytes.decode('utf-8')  # Assuming utf-8
-            lines = content.strip().split('\n')
-            total_records_exported += len(lines)
-            # Verify each line starts with 'test_product: '
-            for line in lines:
-                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
-
-        self.assertEqual(total_records_exported, total_processes * total_records_per_process)
-
-    def test_empty_records_and_missing_fields(self):
-        """Test exporting data with empty records and missing fields."""
-        data_with_missing_fields = [
-            {"id": "1", "title": "Title 1", "year": 2020},
-            {"id": "2", "title": "Title 2"},  # Missing 'year'
-            {},  # Empty record
-            {"id": "3", "year": 2022},  # Missing 'title'
-        ]
-        product = ("test_product", data_with_missing_fields)
-        # For TXTExporter, missing fields are handled as per _write_data_to_buffer implementation
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(len(self.storage.write_calls), 1)
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        lines = content.strip().split(self.exporter.line_terminator)
-        self.assertEqual(len(lines), 4)  # 4 records
-
-        # Verify TXT content
-        expected_lines = [
-            f"test_product: {record}" for record in data_with_missing_fields
-        ]
-        for expected_line, actual_line in zip(expected_lines, lines):
-            self.assertEqual(actual_line, expected_line)
-
-    def test_consume_invalid_product(self):
-        """Test that consuming an invalid product raises ValueError."""
-        with self.assertRaises(ValueError):
-            self.exporter.consume("invalid_product")
-
-    def test_chunk_rotation_without_remainder(self):
-        """Test exporting data where total records are a multiple of chunk size."""
-        total_records = 5000
-        self.exporter.chunk_size = 1000
-        original_data = generate_mock_data(total_records)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        self.assertEqual(len(self.storage.write_calls), 5)
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']
-            content = content_bytes.decode(self.exporter.encoding)
-            lines = content.strip().split(self.exporter.line_terminator)
-            total_records_exported += len(lines)  # Each line corresponds to a record
-
-            # Verify TXT content
-            for line in lines:
-                if not line.strip():
-                    continue  # Skip empty lines
-                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
-
-        self.assertEqual(total_records_exported, total_records)
-
-    def test_chunk_rotation_with_remainder(self):
-        """Test exporting data where total records are not a multiple of chunk size."""
-        total_records = 5500
-        self.exporter.chunk_size = 1000
-        original_data = generate_mock_data(total_records)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-
-        expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
-        self.assertEqual(len(self.storage.write_calls), expected_chunks)
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            content_bytes = write_call['content_bytes']
-            content = content_bytes.decode(self.exporter.encoding)
-            lines = content.strip().split(self.exporter.line_terminator)
-            total_records_exported += len(lines)  # Each line corresponds to a record
-
-            # Verify TXT content
-            for line in lines:
-                if not line.strip():
-                    continue  # Skip empty lines
-                self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
-
-        self.assertEqual(total_records_exported, total_records)
-
-    def test_no_name_provided(self):
-        """Test exporting when the product name is not provided."""
-        # Modify consume method to handle missing name appropriately
-        # Since the current consume method expects a tuple with (name, data), this test ensures proper error handling
-        with self.assertRaises(ValueError):
-            self.exporter.consume((None, generate_mock_data(10)))
-
-    def test_export_with_custom_line_terminator(self):
-        """Test exporting data with a custom line terminator."""
-        self.setUp(line_terminator='|')
-
-        data = generate_mock_data(5)
-        product = ("test_product", data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        lines = content.split('|')  # Split using custom line terminator
-
-        # Remove the last empty element if split by terminator
-        if lines and not lines[-1]:
-            lines.pop()
-
-        self.assertEqual(len(lines), 5)
-
-        # Verify TXT content
-        for record, line in zip(data, lines):
-            expected_line = f"test_product: {record}"
-            self.assertEqual(line, expected_line)
-
-    def test_export_with_different_quoting_options(self):
-        """Test exporting data with different quoting options."""
-        # Since TXTExporter does not handle quoting, this test might not be relevant.
-        # If you plan to handle quoting in TXTExporter, implement accordingly.
-        pass  # Placeholder for potential future tests
-
-    def test_export_empty_data_list(self):
-        """Test exporting when data list is empty."""
-        product = ("test_product", [])
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        # Should not write any files
-        self.assertEqual(len(self.storage.write_calls), 0)
-
-
-if __name__ == '__main__':
-    unittest.main()
+# import multiprocessing
+# import os
+# import tempfile
+# import unittest
+# import uuid
+# from pathlib import Path
+#
+# from datamimic_ce.exporters.txt_exporter import TXTExporter  # Adjust the import path as necessary
+#
+#
+# def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+#     """Generate mock data for testing."""
+#     return [{
+#         "id": f"movie_{i + 1}",
+#         "title": f"{title} {i + 1}",
+#         "year": year
+#     } for i in range(total_records)]
+#
+#
+# class MockSetupContext:
+#     def __init__(self, task_id, descriptor_dir):
+#         self.task_id = task_id
+#         self.descriptor_dir = descriptor_dir
+#         self.default_encoding = 'utf-8'
+#         self.default_separator = ':'
+#         self.default_line_separator = '\n'
+#         self.use_mp = False
+#
+#     def get_client_by_id(self, client_id):
+#         # Return a dummy client or data, replace MagicMock dependency
+#         return {"id": client_id, "data": "mock_client_data"}
+#
+#
+# def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
+#     setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
+#     setup_context.properties = properties
+#     exporter = TXTExporter(
+#         setup_context=setup_context,
+#         product_name="test_product",
+#         storage_id="minio",
+#         target_uri="test_path",
+#         chunk_size=1000,
+#         separator=properties.get('separator', ':'),
+#         line_terminator=properties.get('line_terminator', '\n'),
+#         encoding=properties.get('encoding', 'utf-8')
+#     )
+#     exporter._storage = StorageMock()
+#     product = ("test_product", data_chunk)
+#     exporter.consume(product)
+#     exporter.finalize_chunks()
+#     exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+#     shared_storage_list.extend(exporter._storage.write_calls)
+#
+#
+# class StorageMock:
+#     """Custom mock storage class to record calls."""
+#
+#     def __init__(self):
+#         self.write_calls = []
+#
+#     def write(self, bucket, uri, data_buffer, content_type):
+#         # Read the raw bytes from the buffer
+#         content_bytes = data_buffer.read()
+#         data_buffer.seek(0)  # Reset buffer position if needed elsewhere
+#         self.write_calls.append({
+#             'bucket': bucket,
+#             'uri': uri,
+#             'content_bytes': content_bytes,  # Store raw bytes
+#             'content_type': content_type
+#         })
+#
+#
+# class TestTXTExporter(unittest.TestCase):
+#     def setUp(self, encoding='utf-8', separator=None, line_terminator=None):
+#         """Set up for each test."""
+#         self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
+#         self.tmp_dir = tempfile.TemporaryDirectory()
+#         self.tmp_dir_path = Path(self.tmp_dir.name)
+#         self.setup_context.descriptor_dir = self.tmp_dir_path
+#         self.setup_context.properties = {}
+#         self.storage = StorageMock()
+#         self.exporter = TXTExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             storage_id="minio",
+#             target_uri="test_path",
+#             chunk_size=1000,
+#             encoding=encoding,
+#             separator=separator,
+#             line_terminator=line_terminator
+#         )
+#         self.exporter._storage = self.storage
+#
+#     def tearDown(self):
+#         """Clean up temporary directories."""
+#         self.tmp_dir.cleanup()
+#
+#     def test_single_process_chunking(self):
+#         """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+#         original_data = generate_mock_data(3000)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 3)
+#
+#         total_records_exported = 0
+#         for write_call in self.storage.write_calls:
+#             bucket = write_call['bucket']
+#             uri = write_call['uri']
+#             content_bytes = write_call['content_bytes']  # Correct key access
+#             content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+#             content_type = write_call['content_type']
+#
+#             # Verify bucket and URI
+#             self.assertEqual(bucket, "test_bucket")
+#             self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+#             # Verify content type
+#             self.assertEqual(content_type, self.exporter._get_content_type())
+#
+#             # Process content
+#             lines = content.strip().split(self.exporter.line_terminator)
+#             total_records_exported += len(lines)  # Each line corresponds to a record
+#
+#             # Verify TXT content
+#             for line in lines:
+#                 if not line.strip():
+#                     continue  # Skip empty lines
+#                 # Each line should start with 'test_product: '
+#                 self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+#                 # Further verification can be added as needed
+#
+#         self.assertEqual(total_records_exported, 3000)
+#
+#     def test_export_with_different_line_terminators(self):
+#         """Test exporting data with different line terminator settings."""
+#         terminators = ['\n']
+#         for terminator in terminators:
+#             with self.subTest(line_terminator=terminator):
+#                 # Initialize a fresh setup for each subTest
+#                 self.setUp(line_terminator=terminator)
+#
+#                 data = generate_mock_data(5)
+#                 product = ("test_product", data)
+#                 self.exporter.consume(product)
+#                 self.exporter.finalize_chunks()
+#                 self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#                 self.assertEqual(len(self.storage.write_calls), 1)
+#
+#                 write_call = self.storage.write_calls[0]
+#                 content_bytes = write_call['content_bytes']
+#                 content = content_bytes.decode(self.exporter.encoding)
+#                 lines = content.split(terminator)
+#
+#                 # Remove the last empty element if split by terminator
+#                 if lines and not lines[-1]:
+#                     lines.pop()
+#
+#                 self.assertEqual(len(lines), 5)
+#
+#                 # Verify that each line ends with the correct terminator
+#                 for line in lines:
+#                     self.assertTrue(line.endswith(terminator.strip()),
+#                                     f"Line does not end with terminator {terminator}")
+#                     self.assertTrue(line.startswith("test_product: "),
+#                                     f"Line does not start with 'test_product: ': {line}")
+#
+#     def test_custom_separator_and_encoding(self):
+#         """Test exporting with custom separator and encoding."""
+#
+#         self.setUp(encoding='utf-16', separator='|')
+#
+#         original_data = generate_mock_data(10)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']  # Correct key access
+#         content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+#         lines = content.strip().split(self.exporter.line_terminator)
+#         self.assertGreaterEqual(len(lines), 1)  # At least one line (e.g., initial lines)
+#
+#         # Verify TXT content with custom separator
+#         for record, line in zip(original_data, lines):
+#             expected_line = f"test_product: {record}"
+#             self.assertEqual(line, expected_line)
+#
+#     def test_special_characters_in_data(self):
+#         """Test exporting data containing separators, quotes, and newlines."""
+#
+#         special_data = [
+#             {"id": "1", "title": 'Title with | pipe', "year": 2020},
+#             {"id": "2", "title": 'Title with "quote"', "year": 2021},
+#             {"id": "3", "title": 'Title with \n newline', "year": 2022},
+#             {"id": "4", "title": 'Title with separator|semicolon', "year": 2023},
+#         ]
+#         product = ("test_product", special_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']  # Correct key access
+#         content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+#         lines = content.strip().split(self.exporter.line_terminator)
+#
+#         self.assertEqual(len(lines), 4)  # 4 records
+#
+#         # Verify TXT content
+#         expected_lines = [
+#             f"test_product: {record}" for record in special_data
+#         ]
+#         for expected_line, actual_line in zip(expected_lines, lines):
+#             self.assertEqual(actual_line, expected_line)
+#
+#     def test_large_dataset(self):
+#         """Test exporting a very large dataset to check performance and memory usage."""
+#         total_records = 500_000  # Half a million records
+#         self.exporter.chunk_size = 100_000
+#         original_data = generate_mock_data(total_records)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+#         self.assertEqual(len(self.storage.write_calls), expected_chunks)
+#
+#         total_records_exported = 0
+#         for write_call in self.storage.write_calls:
+#             content_bytes = write_call['content_bytes']  # Correct key access
+#             content = content_bytes.decode(self.exporter.encoding)  # Decode using specified encoding
+#             lines = content.strip().split(self.exporter.line_terminator)
+#             total_records_exported += len(lines)  # Each line corresponds to a record
+#
+#             # Verify TXT content
+#             for line in lines:
+#                 if not line.strip():
+#                     continue  # Skip empty lines
+#                 self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+#
+#         self.assertEqual(total_records_exported, total_records)
+#
+#     def test_invalid_data_handling(self):
+#         """Test exporting data with invalid data types."""
+#         invalid_data = [
+#             {"id": "1", "title": "Valid Title", "year": 2020},
+#             {"id": "2", "title": "Another Title", "year": None},  # 'year' as None
+#             {"id": "3", "title": "Title with | separator", "year": "Invalid Year"},  # 'year' as str
+#         ]
+#         product = ("test_product", invalid_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         lines = content.strip().split(self.exporter.line_terminator)
+#         self.assertEqual(len(lines), 3)  # 3 records
+#
+#         # Verify TXT content
+#         expected_lines = [
+#             f"test_product: {record}" for record in invalid_data
+#         ]
+#         for expected_line, actual_line in zip(expected_lines, lines):
+#             self.assertEqual(actual_line, expected_line)
+#
+#     @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
+#     def test_multiprocessing_export(self):
+#         """Test exporting data concurrently using multiprocessing."""
+#         total_processes = os.cpu_count() or 1
+#         total_records_per_process = 5000
+#         data = generate_mock_data(total_records_per_process * total_processes)
+#         data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+#                        range(total_processes)]
+#
+#         manager = multiprocessing.Manager()
+#         shared_storage_list = manager.list()
+#         processes = []
+#         for chunk in data_chunks:
+#             p = multiprocessing.Process(
+#                 target=worker,
+#                 args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
+#                       self.setup_context.properties)
+#             )
+#             p.start()
+#             processes.append(p)
+#         for p in processes:
+#             p.join()
+#
+#         # Verify total write calls
+#         expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
+#         self.assertEqual(len(shared_storage_list), expected_write_calls,
+#                          f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
+#
+#         # Verify total records exported
+#         total_records_exported = 0
+#         for write_call in shared_storage_list:
+#             content_bytes = write_call['content_bytes']
+#             content = content_bytes.decode('utf-8')  # Assuming utf-8
+#             lines = content.strip().split('\n')
+#             total_records_exported += len(lines)
+#             # Verify each line starts with 'test_product: '
+#             for line in lines:
+#                 self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+#
+#         self.assertEqual(total_records_exported, total_processes * total_records_per_process)
+#
+#     def test_empty_records_and_missing_fields(self):
+#         """Test exporting data with empty records and missing fields."""
+#         data_with_missing_fields = [
+#             {"id": "1", "title": "Title 1", "year": 2020},
+#             {"id": "2", "title": "Title 2"},  # Missing 'year'
+#             {},  # Empty record
+#             {"id": "3", "year": 2022},  # Missing 'title'
+#         ]
+#         product = ("test_product", data_with_missing_fields)
+#         # For TXTExporter, missing fields are handled as per _write_data_to_buffer implementation
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         lines = content.strip().split(self.exporter.line_terminator)
+#         self.assertEqual(len(lines), 4)  # 4 records
+#
+#         # Verify TXT content
+#         expected_lines = [
+#             f"test_product: {record}" for record in data_with_missing_fields
+#         ]
+#         for expected_line, actual_line in zip(expected_lines, lines):
+#             self.assertEqual(actual_line, expected_line)
+#
+#     def test_consume_invalid_product(self):
+#         """Test that consuming an invalid product raises ValueError."""
+#         with self.assertRaises(ValueError):
+#             self.exporter.consume("invalid_product")
+#
+#     def test_chunk_rotation_without_remainder(self):
+#         """Test exporting data where total records are a multiple of chunk size."""
+#         total_records = 5000
+#         self.exporter.chunk_size = 1000
+#         original_data = generate_mock_data(total_records)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         self.assertEqual(len(self.storage.write_calls), 5)
+#         total_records_exported = 0
+#         for write_call in self.storage.write_calls:
+#             content_bytes = write_call['content_bytes']
+#             content = content_bytes.decode(self.exporter.encoding)
+#             lines = content.strip().split(self.exporter.line_terminator)
+#             total_records_exported += len(lines)  # Each line corresponds to a record
+#
+#             # Verify TXT content
+#             for line in lines:
+#                 if not line.strip():
+#                     continue  # Skip empty lines
+#                 self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+#
+#         self.assertEqual(total_records_exported, total_records)
+#
+#     def test_chunk_rotation_with_remainder(self):
+#         """Test exporting data where total records are not a multiple of chunk size."""
+#         total_records = 5500
+#         self.exporter.chunk_size = 1000
+#         original_data = generate_mock_data(total_records)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#
+#         expected_chunks = (total_records + self.exporter.chunk_size - 1) // self.exporter.chunk_size
+#         self.assertEqual(len(self.storage.write_calls), expected_chunks)
+#         total_records_exported = 0
+#         for write_call in self.storage.write_calls:
+#             content_bytes = write_call['content_bytes']
+#             content = content_bytes.decode(self.exporter.encoding)
+#             lines = content.strip().split(self.exporter.line_terminator)
+#             total_records_exported += len(lines)  # Each line corresponds to a record
+#
+#             # Verify TXT content
+#             for line in lines:
+#                 if not line.strip():
+#                     continue  # Skip empty lines
+#                 self.assertTrue(line.startswith("test_product: "), f"Line does not start with 'test_product: ': {line}")
+#
+#         self.assertEqual(total_records_exported, total_records)
+#
+#     def test_no_name_provided(self):
+#         """Test exporting when the product name is not provided."""
+#         # Modify consume method to handle missing name appropriately
+#         # Since the current consume method expects a tuple with (name, data), this test ensures proper error handling
+#         with self.assertRaises(ValueError):
+#             self.exporter.consume((None, generate_mock_data(10)))
+#
+#     def test_export_with_custom_line_terminator(self):
+#         """Test exporting data with a custom line terminator."""
+#         self.setUp(line_terminator='|')
+#
+#         data = generate_mock_data(5)
+#         product = ("test_product", data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         lines = content.split('|')  # Split using custom line terminator
+#
+#         # Remove the last empty element if split by terminator
+#         if lines and not lines[-1]:
+#             lines.pop()
+#
+#         self.assertEqual(len(lines), 5)
+#
+#         # Verify TXT content
+#         for record, line in zip(data, lines):
+#             expected_line = f"test_product: {record}"
+#             self.assertEqual(line, expected_line)
+#
+#     def test_export_with_different_quoting_options(self):
+#         """Test exporting data with different quoting options."""
+#         # Since TXTExporter does not handle quoting, this test might not be relevant.
+#         # If you plan to handle quoting in TXTExporter, implement accordingly.
+#         pass  # Placeholder for potential future tests
+#
+#     def test_export_empty_data_list(self):
+#         """Test exporting when data list is empty."""
+#         product = ("test_product", [])
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         # Should not write any files
+#         self.assertEqual(len(self.storage.write_calls), 0)
+#
+#
+# if __name__ == '__main__':
+#     unittest.main()
diff --git a/tests_ce/unit_tests/exporter/test_xml_exporter.py b/tests_ce/unit_tests/exporter/test_xml_exporter.py
index af4443a..58abef2 100644
--- a/tests_ce/unit_tests/exporter/test_xml_exporter.py
+++ b/tests_ce/unit_tests/exporter/test_xml_exporter.py
@@ -1,286 +1,286 @@
-# test_xml_exporter.py
-
-import multiprocessing
-import os
-import tempfile
-import unittest
-import uuid
-import xml.etree.ElementTree as ET
-from pathlib import Path
-
-from datamimic.exporters.xml_exporter import XMLExporter  # Adjust the import path as necessary
-
-
-def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
-    """Generate mock data for testing."""
-    return [{
-        "id": f"movie_{i + 1}",
-        "title": f"{title} {i + 1}",
-        "year": year
-    } for i in range(total_records)]
-
-
-class MockSetupContext:
-    def __init__(self, task_id, descriptor_dir):
-        self.task_id = task_id
-        self.descriptor_dir = descriptor_dir
-        self.default_encoding = 'utf-8'
-        self.default_separator = ','
-        self.default_line_separator = '\n'
-        self.use_mp = False
-
-    def get_client_by_id(self, client_id):
-        # Return a dummy client or data, replace MagicMock dependency
-        return {"id": client_id, "data": "mock_client_data"}
-
-
-def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
-    setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
-    setup_context.properties = properties
-    exporter = XMLExporter(
-        setup_context=setup_context,
-        product_name="test_product",
-        storage_id="minio",
-        target_uri="test_path",
-        chunk_size=1000,
-        root_element=properties.get('root_element', 'list'),
-        item_element=properties.get('item_element', 'item'),
-        encoding=properties.get('encoding', 'utf-8')
-    )
-    exporter._storage = StorageMock()
-    product = ("test_product", data_chunk)
-    exporter.consume(product)
-    exporter.finalize_chunks()
-    exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
-    shared_storage_list.extend(exporter._storage.write_calls)
-
-
-class StorageMock:
-    """Custom mock storage class to record calls."""
-
-    def __init__(self):
-        self.write_calls = []
-
-    def write(self, bucket, uri, data_buffer, content_type):
-        # Read the raw bytes from the buffer
-        content_bytes = data_buffer.read()
-        data_buffer.seek(0)  # Reset buffer position if needed elsewhere
-        self.write_calls.append({
-            'bucket': bucket,
-            'uri': uri,
-            'content_bytes': content_bytes,  # Store raw bytes
-            'content_type': content_type
-        })
-
-
-class TestXMLExporter(unittest.TestCase):
-    def setUp(self, encoding='utf-8', root_element='list', item_element='item'):
-        """Set up for each test."""
-        self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
-        self.tmp_dir = tempfile.TemporaryDirectory()
-        self.tmp_dir_path = Path(self.tmp_dir.name)
-        self.setup_context.descriptor_dir = self.tmp_dir_path
-        self.setup_context.properties = {}
-        self.storage = StorageMock()
-        self.exporter = XMLExporter(
-            setup_context=self.setup_context,
-            product_name="test_product",
-            storage_id="minio",
-            target_uri="test_path",
-            chunk_size=1000,
-            root_element=root_element,
-            item_element=item_element,
-            encoding=encoding
-        )
-        self.exporter._storage = self.storage
-
-    def tearDown(self):
-        """Clean up temporary directories."""
-        self.tmp_dir.cleanup()
-
-    def test_single_process_chunking(self):
-        """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
-        original_data = generate_mock_data(3000)
-        product = ("test_product", original_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 3)
-
-        total_records_exported = 0
-        for write_call in self.storage.write_calls:
-            bucket = write_call['bucket']
-            uri = write_call['uri']
-            content_bytes = write_call['content_bytes']
-            content = content_bytes.decode(self.exporter.encoding)
-            content_type = write_call['content_type']
-
-            # Verify bucket and URI
-            self.assertEqual(bucket, "test_bucket")
-            self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
-            # Verify content type
-            self.assertEqual(content_type, self.exporter._get_content_type())
-
-            # Parse XML and count records
-            root = ET.fromstring(content)
-            records = root.findall(f".//{self.exporter.item_element}")
-            total_records_exported += len(records)
-
-        self.assertEqual(total_records_exported, 3000)
-
-    def test_export_with_custom_root_and_item_elements(self):
-        """Test exporting data with custom root and item element names."""
-        self.setUp(root_element='movies', item_element='movie')
-
-        data = generate_mock_data(5)
-        product = ("test_product", data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        root = ET.fromstring(content)
-
-        # Verify root element
-        self.assertEqual(root.tag, 'movies')
-
-        # Verify number of items
-        records = root.findall(".//movie")
-        self.assertEqual(len(records), 5)
-
-        # Verify each record's content
-        for record, data in zip(records, data):
-            for key, value in data.items():
-                self.assertEqual(record.find(key).text, str(value))
-
-    def test_invalid_product_handling(self):
-        """Test that consuming an invalid product raises ValueError."""
-        # Product is not a tuple
-        with self.assertRaises(ValueError):
-            self.exporter.consume("invalid_product")
-
-        # Product tuple has less than two elements
-        with self.assertRaises(ValueError):
-            self.exporter.consume(("test_product",))
-
-        # Product name is None
-        with self.assertRaises(ValueError):
-            self.exporter.consume((None, generate_mock_data(10)))
-
-    def test_special_characters_in_data(self):
-        """Test exporting data containing special characters."""
-        special_data = [
-            {"id": "1", "title": 'Title with <tag>', "year": 2020},
-            {"id": "2", "title": 'Title with & ampersand', "year": 2021},
-            {"id": "3", "title": 'Title with "quotes"', "year": 2022},
-            {"id": "4", "title": "Title with 'apostrophe'", "year": 2023},
-        ]
-        product = ("test_product", special_data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        root = ET.fromstring(content)
-
-        # Verify number of items
-        records = root.findall(f".//{self.exporter.item_element}")
-        self.assertEqual(len(records), 4)
-
-        # Verify each record's content
-        for record, data in zip(records, special_data):
-            for key, value in data.items():
-                self.assertEqual(record.find(key).text, str(value))
-
-    @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
-    def test_multiprocessing_export(self):
-        """Test exporting data concurrently using multiprocessing."""
-        total_processes = os.cpu_count() or 1
-        total_records_per_process = 5000
-        data = generate_mock_data(total_records_per_process * total_processes)
-        data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
-                       range(total_processes)]
-
-        manager = multiprocessing.Manager()
-        shared_storage_list = manager.list()
-        processes = []
-        for chunk in data_chunks:
-            p = multiprocessing.Process(
-                target=worker,
-                args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
-                      self.setup_context.properties)
-            )
-            p.start()
-            processes.append(p)
-        for p in processes:
-            p.join()
-
-        # Each process has 5000 records with chunk_size=1000, so 5 chunks per process
-        expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
-        self.assertEqual(len(shared_storage_list), expected_write_calls,
-                         f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
-
-        # Verify total records exported
-        total_records_exported = 0
-        for write_call in shared_storage_list:
-            content_bytes = write_call['content_bytes']
-            content = content_bytes.decode('utf-8')  # Assuming utf-8
-            root = ET.fromstring(content)
-            records = root.findall(f".//{self.exporter.item_element}")
-            total_records_exported += len(records)
-
-            # Optionally, verify each record's content
-            # This can be expensive for large datasets; consider limiting checks
-            for record in records[:10]:  # Check first 10 records as a sample
-                self.assertTrue(record.find("id") is not None)
-                self.assertTrue(record.find("title") is not None)
-                self.assertTrue(record.find("year") is not None)
-
-        self.assertEqual(total_records_exported, total_processes * total_records_per_process)
-
-    def test_empty_data_handling(self):
-        """Test exporting when data list is empty."""
-        product = ("test_product", [])
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        # Should not write any files
-        self.assertEqual(len(self.storage.write_calls), 0)
-
-    def test_custom_root_element(self):
-        """Test exporting data with a custom root element."""
-        self.setUp(root_element='catalog', item_element='movie')
-
-        data = generate_mock_data(3)
-        product = ("test_product", data)
-        self.exporter.consume(product)
-        self.exporter.finalize_chunks()
-        self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
-        self.assertEqual(len(self.storage.write_calls), 1)
-
-        write_call = self.storage.write_calls[0]
-        content_bytes = write_call['content_bytes']
-        content = content_bytes.decode(self.exporter.encoding)
-        root = ET.fromstring(content)
-
-        # Verify root element
-        self.assertEqual(root.tag, 'catalog')
-
-        # Verify number of items
-        records = root.findall(".//movie")
-        self.assertEqual(len(records), 3)
-
-        # Verify each record's content
-        for record, data in zip(records, data):
-            for key, value in data.items():
-                self.assertEqual(record.find(key).text, str(value))
-
-
-if __name__ == '__main__':
-    unittest.main()
+# # test_xml_exporter.py
+#
+# import multiprocessing
+# import os
+# import tempfile
+# import unittest
+# import uuid
+# import xml.etree.ElementTree as ET
+# from pathlib import Path
+#
+# from datamimic_ce.exporters.xml_exporter import XMLExporter  # Adjust the import path as necessary
+#
+#
+# def generate_mock_data(total_records=3000, title="Mock Title", year=2020):
+#     """Generate mock data for testing."""
+#     return [{
+#         "id": f"movie_{i + 1}",
+#         "title": f"{title} {i + 1}",
+#         "year": year
+#     } for i in range(total_records)]
+#
+#
+# class MockSetupContext:
+#     def __init__(self, task_id, descriptor_dir):
+#         self.task_id = task_id
+#         self.descriptor_dir = descriptor_dir
+#         self.default_encoding = 'utf-8'
+#         self.default_separator = ','
+#         self.default_line_separator = '\n'
+#         self.use_mp = False
+#
+#     def get_client_by_id(self, client_id):
+#         # Return a dummy client or data, replace MagicMock dependency
+#         return {"id": client_id, "data": "mock_client_data"}
+#
+#
+# def worker(data_chunk, shared_storage_list, task_id, descriptor_dir, properties):
+#     setup_context = MockSetupContext(task_id=task_id, descriptor_dir=descriptor_dir)
+#     setup_context.properties = properties
+#     exporter = XMLExporter(
+#         setup_context=setup_context,
+#         product_name="test_product",
+#         storage_id="minio",
+#         target_uri="test_path",
+#         chunk_size=1000,
+#         root_element=properties.get('root_element', 'list'),
+#         item_element=properties.get('item_element', 'item'),
+#         encoding=properties.get('encoding', 'utf-8')
+#     )
+#     exporter._storage = StorageMock()
+#     product = ("test_product", data_chunk)
+#     exporter.consume(product)
+#     exporter.finalize_chunks()
+#     exporter.upload_to_storage(bucket="test_bucket", name=exporter.product_name)
+#     shared_storage_list.extend(exporter._storage.write_calls)
+#
+#
+# class StorageMock:
+#     """Custom mock storage class to record calls."""
+#
+#     def __init__(self):
+#         self.write_calls = []
+#
+#     def write(self, bucket, uri, data_buffer, content_type):
+#         # Read the raw bytes from the buffer
+#         content_bytes = data_buffer.read()
+#         data_buffer.seek(0)  # Reset buffer position if needed elsewhere
+#         self.write_calls.append({
+#             'bucket': bucket,
+#             'uri': uri,
+#             'content_bytes': content_bytes,  # Store raw bytes
+#             'content_type': content_type
+#         })
+#
+#
+# class TestXMLExporter(unittest.TestCase):
+#     def setUp(self, encoding='utf-8', root_element='list', item_element='item'):
+#         """Set up for each test."""
+#         self.setup_context = MockSetupContext(task_id=f"test_task_{uuid.uuid4().hex}", descriptor_dir="test_dir")
+#         self.tmp_dir = tempfile.TemporaryDirectory()
+#         self.tmp_dir_path = Path(self.tmp_dir.name)
+#         self.setup_context.descriptor_dir = self.tmp_dir_path
+#         self.setup_context.properties = {}
+#         self.storage = StorageMock()
+#         self.exporter = XMLExporter(
+#             setup_context=self.setup_context,
+#             product_name="test_product",
+#             storage_id="minio",
+#             target_uri="test_path",
+#             chunk_size=1000,
+#             root_element=root_element,
+#             item_element=item_element,
+#             encoding=encoding
+#         )
+#         self.exporter._storage = self.storage
+#
+#     def tearDown(self):
+#         """Clean up temporary directories."""
+#         self.tmp_dir.cleanup()
+#
+#     def test_single_process_chunking(self):
+#         """Test exporting 3000 records with chunk size 1000 in a single process (3 chunk files expected)."""
+#         original_data = generate_mock_data(3000)
+#         product = ("test_product", original_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 3)
+#
+#         total_records_exported = 0
+#         for write_call in self.storage.write_calls:
+#             bucket = write_call['bucket']
+#             uri = write_call['uri']
+#             content_bytes = write_call['content_bytes']
+#             content = content_bytes.decode(self.exporter.encoding)
+#             content_type = write_call['content_type']
+#
+#             # Verify bucket and URI
+#             self.assertEqual(bucket, "test_bucket")
+#             self.assertTrue(uri.startswith(f"{self.exporter._task_id}"))
+#             # Verify content type
+#             self.assertEqual(content_type, self.exporter._get_content_type())
+#
+#             # Parse XML and count records
+#             root = ET.fromstring(content)
+#             records = root.findall(f".//{self.exporter.item_element}")
+#             total_records_exported += len(records)
+#
+#         self.assertEqual(total_records_exported, 3000)
+#
+#     def test_export_with_custom_root_and_item_elements(self):
+#         """Test exporting data with custom root and item element names."""
+#         self.setUp(root_element='movies', item_element='movie')
+#
+#         data = generate_mock_data(5)
+#         product = ("test_product", data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         root = ET.fromstring(content)
+#
+#         # Verify root element
+#         self.assertEqual(root.tag, 'movies')
+#
+#         # Verify number of items
+#         records = root.findall(".//movie")
+#         self.assertEqual(len(records), 5)
+#
+#         # Verify each record's content
+#         for record, data in zip(records, data):
+#             for key, value in data.items():
+#                 self.assertEqual(record.find(key).text, str(value))
+#
+#     def test_invalid_product_handling(self):
+#         """Test that consuming an invalid product raises ValueError."""
+#         # Product is not a tuple
+#         with self.assertRaises(ValueError):
+#             self.exporter.consume("invalid_product")
+#
+#         # Product tuple has less than two elements
+#         with self.assertRaises(ValueError):
+#             self.exporter.consume(("test_product",))
+#
+#         # Product name is None
+#         with self.assertRaises(ValueError):
+#             self.exporter.consume((None, generate_mock_data(10)))
+#
+#     def test_special_characters_in_data(self):
+#         """Test exporting data containing special characters."""
+#         special_data = [
+#             {"id": "1", "title": 'Title with <tag>', "year": 2020},
+#             {"id": "2", "title": 'Title with & ampersand', "year": 2021},
+#             {"id": "3", "title": 'Title with "quotes"', "year": 2022},
+#             {"id": "4", "title": "Title with 'apostrophe'", "year": 2023},
+#         ]
+#         product = ("test_product", special_data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         root = ET.fromstring(content)
+#
+#         # Verify number of items
+#         records = root.findall(f".//{self.exporter.item_element}")
+#         self.assertEqual(len(records), 4)
+#
+#         # Verify each record's content
+#         for record, data in zip(records, special_data):
+#             for key, value in data.items():
+#                 self.assertEqual(record.find(key).text, str(value))
+#
+#     @unittest.skipIf(os.name == 'posix', "skip multiprocessing test on posix")
+#     def test_multiprocessing_export(self):
+#         """Test exporting data concurrently using multiprocessing."""
+#         total_processes = os.cpu_count() or 1
+#         total_records_per_process = 5000
+#         data = generate_mock_data(total_records_per_process * total_processes)
+#         data_chunks = [data[i * total_records_per_process:(i + 1) * total_records_per_process] for i in
+#                        range(total_processes)]
+#
+#         manager = multiprocessing.Manager()
+#         shared_storage_list = manager.list()
+#         processes = []
+#         for chunk in data_chunks:
+#             p = multiprocessing.Process(
+#                 target=worker,
+#                 args=(chunk, shared_storage_list, self.setup_context.task_id, self.setup_context.descriptor_dir,
+#                       self.setup_context.properties)
+#             )
+#             p.start()
+#             processes.append(p)
+#         for p in processes:
+#             p.join()
+#
+#         # Each process has 5000 records with chunk_size=1000, so 5 chunks per process
+#         expected_write_calls = total_processes * (total_records_per_process // self.exporter.chunk_size)
+#         self.assertEqual(len(shared_storage_list), expected_write_calls,
+#                          f"Expected {expected_write_calls} write calls, but got {len(shared_storage_list)}")
+#
+#         # Verify total records exported
+#         total_records_exported = 0
+#         for write_call in shared_storage_list:
+#             content_bytes = write_call['content_bytes']
+#             content = content_bytes.decode('utf-8')  # Assuming utf-8
+#             root = ET.fromstring(content)
+#             records = root.findall(f".//{self.exporter.item_element}")
+#             total_records_exported += len(records)
+#
+#             # Optionally, verify each record's content
+#             # This can be expensive for large datasets; consider limiting checks
+#             for record in records[:10]:  # Check first 10 records as a sample
+#                 self.assertTrue(record.find("id") is not None)
+#                 self.assertTrue(record.find("title") is not None)
+#                 self.assertTrue(record.find("year") is not None)
+#
+#         self.assertEqual(total_records_exported, total_processes * total_records_per_process)
+#
+#     def test_empty_data_handling(self):
+#         """Test exporting when data list is empty."""
+#         product = ("test_product", [])
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         # Should not write any files
+#         self.assertEqual(len(self.storage.write_calls), 0)
+#
+#     def test_custom_root_element(self):
+#         """Test exporting data with a custom root element."""
+#         self.setUp(root_element='catalog', item_element='movie')
+#
+#         data = generate_mock_data(3)
+#         product = ("test_product", data)
+#         self.exporter.consume(product)
+#         self.exporter.finalize_chunks()
+#         self.exporter.upload_to_storage(bucket="test_bucket", name=self.exporter.product_name)
+#         self.assertEqual(len(self.storage.write_calls), 1)
+#
+#         write_call = self.storage.write_calls[0]
+#         content_bytes = write_call['content_bytes']
+#         content = content_bytes.decode(self.exporter.encoding)
+#         root = ET.fromstring(content)
+#
+#         # Verify root element
+#         self.assertEqual(root.tag, 'catalog')
+#
+#         # Verify number of items
+#         records = root.findall(".//movie")
+#         self.assertEqual(len(records), 3)
+#
+#         # Verify each record's content
+#         for record, data in zip(records, data):
+#             for key, value in data.items():
+#                 self.assertEqual(record.find(key).text, str(value))
+#
+#
+# if __name__ == '__main__':
+#     unittest.main()

From f1afd4af7d790b3a8c7f5e8cf26a9f8e85449bcb Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 13:34:02 +0700
Subject: [PATCH 3/9] Add unified buffer exporter

---
 .../exporters/unified_buffered_exporter.py    | 359 ++++++++++++++++++
 1 file changed, 359 insertions(+)
 create mode 100644 datamimic_ce/exporters/unified_buffered_exporter.py

diff --git a/datamimic_ce/exporters/unified_buffered_exporter.py b/datamimic_ce/exporters/unified_buffered_exporter.py
new file mode 100644
index 0000000..5d56cb7
--- /dev/null
+++ b/datamimic_ce/exporters/unified_buffered_exporter.py
@@ -0,0 +1,359 @@
+import json
+import os
+import re
+import time
+from abc import ABC, abstractmethod
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+from datamimic_ce.contexts.setup_context import SetupContext
+from datamimic_ce.logger import logger
+
+
+class ExporterError(Exception):
+    """Base class for exporter exceptions."""
+
+    pass
+
+
+class BufferFileError(ExporterError):
+    """Exception raised for errors related to buffer file operations."""
+
+    pass
+
+
+class ExportError(ExporterError):
+    """Exception raised during data export/upload."""
+
+    pass
+
+
+class UnifiedBufferedExporter(ABC):
+    """
+    Abstract exporter that collects data until chunk size is reached.
+    Manages chunking by grouping entities into files based on user-defined chunk size.
+    Supports multiple formats (e.g., JSON, CSV, XML) and storage backends.
+    """
+
+    _buffer_tmp_dir = None
+
+    def __init__(
+        self,
+        exporter_type: str,
+        setup_context: SetupContext,
+        product_name: str,
+        chunk_size: Optional[int] = None,
+        **kwargs,
+    ):
+        if chunk_size is not None and chunk_size <= 0:
+            raise ValueError("Chunk size must be a positive integer or None for unlimited size.")
+
+        self._exported_data_dir = setup_context.descriptor_dir / "exported_data"
+        os.makedirs(self._exported_data_dir, exist_ok=True)
+        self._exporter_type = exporter_type
+        self.product_name = product_name  # Name of the product being exported
+        self._encoding = kwargs.get("encoding", setup_context.default_encoding or "utf-8")
+        self._pid = os.getpid()
+        self._mp = setup_context.use_mp  # Multiprocessing flag
+        self._task_id = setup_context.task_id  # Task ID for tracking
+        self._descriptor_dir = setup_context.descriptor_dir  # Directory for storing temp files
+        self.chunk_size = chunk_size  # Max entities per chunk
+
+        # Prepare temporary buffer directory
+        self._buffer_tmp_dir = (
+            self._descriptor_dir
+            / f"exporter_result_{self._task_id}_pid_{self._pid}_exporter_{self._exporter_type}_product_{self.product_name}"
+        )
+        self._buffer_tmp_dir.mkdir(parents=True, exist_ok=True)
+
+        # Initialize state variables
+        self._is_first_write = None
+        self._load_state()
+
+        # Handle any additional parameters from kwargs
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+        self._buffer_file = self._get_buffer_file()  # Initialize first buffer file
+
+    def _get_buffer_dir_by_pid(self) -> Path:
+        """
+        Returns the buffer directory for the current PID.
+        :return:
+        """
+        return (
+            self._descriptor_dir
+            / f"exporter_result_{self._task_id}_pid_{self._pid}_exporter_{self._exporter_type}_product_{self.product_name}"
+        )
+
+    def _load_state(self):
+        """Loads the exporter state from the metadata file if it exists."""
+        state_file = self._buffer_tmp_dir / "state.meta"
+        if state_file.exists():
+            with state_file.open("r", encoding=self._encoding) as f:
+                state = json.load(f)
+                self.current_counter = state.get("current_counter")
+                self.global_counter = state.get("global_counter")
+                self.chunk_index = state.get("chunk_index")
+                self._is_first_write = state.get("is_first_write")
+                logger.debug(f"Loaded state from {state_file}: {state}")
+        else:
+            # Initialize state variables
+            self.current_counter = 0
+            self.global_counter = 0
+            self.chunk_index = 0
+            self._is_first_write = True
+            logger.debug("Initialized new state variables.")
+
+    def _save_state(self):
+        """Saves the exporter state to the state file."""
+        state_file = self._buffer_tmp_dir / "state.meta"
+        state = {
+            "current_counter": self.current_counter,
+            "global_counter": self.global_counter,
+            "chunk_index": self.chunk_index,
+            "is_first_write": self._is_first_write,
+        }
+        with state_file.open("w", encoding=self._encoding) as f:
+            json.dump(state, f)
+        logger.debug(f"Saved state to {state_file}: {state}")
+
+    def _get_buffer_file(self) -> Path:
+        """Generates a temporary buffer file path for the current chunk and PID."""
+
+        buffer_file = self._buffer_tmp_dir / Path(
+            f"product_{self.product_name}_pid_{self._pid}_chunk_{self.chunk_index}.{self._define_suffix()}"
+        )
+        metadata_file = buffer_file.with_suffix(".meta")  # Separate metadata file
+
+        # Ensure metadata file exists with initial values if not present
+        if not metadata_file.exists():
+            with metadata_file.open("w", encoding=self._encoding) as f:
+                initial_metadata = {
+                    "total_count": 0,
+                    "product_name": self.product_name,
+                    "chunk_index": self.chunk_index,
+                    "task_id": self._task_id,
+                    "exporter_type": self._exporter_type,
+                    "chunk_size": self.chunk_size,
+                    "created_at": str(datetime.now()),
+                }
+                json.dump(initial_metadata, f)
+            logger.debug(f"Initialized metadata file {metadata_file} with content: {initial_metadata}")
+
+        logger.debug(f"Buffer file initialized: {buffer_file} with metadata file {metadata_file}")
+        return buffer_file
+
+    def _rotate_chunk(self) -> None:
+        """Finalizes the current chunk file by updating metadata and sets up a new one."""
+        logger.debug(f"Rotating chunk for PID {self._pid} with current chunk index {self.chunk_index}")
+        self.chunk_index += 1
+        self.current_counter = 0  # Reset the current count for new chunk
+        self._save_state()
+        self._buffer_file = self._get_buffer_file()  # Create new buffer file for the next chunk
+        self._is_first_write = True  # Reset for the new chunk
+
+    def store_data(self, data: List[dict]) -> None:
+        idx = 0
+        total_data = len(data)
+        logger.debug(f"Storing data for PID {self._pid}, initial current_count {self.current_counter}")
+
+        while idx < total_data:
+            space_left = self.chunk_size - self.current_counter if self.chunk_size else total_data
+            batch = data[idx : idx + space_left]
+            self._write_data_to_buffer(batch)
+            self.current_counter += len(batch)
+            self.global_counter += len(batch)
+
+            # Update metadata file
+
+            self._update_metadata_file()
+            self._save_state()
+
+            idx += len(batch)
+            if self.chunk_size and self.current_counter >= self.chunk_size:
+                if idx < total_data:
+                    # Only rotate chunk if there is more data to process
+                    self._rotate_chunk()
+
+    def _update_metadata_file(self, retries=3, delay=0.1) -> None:
+        """Updates the total count in the metadata file with retry on failure."""
+        metadata_file = self._buffer_file.with_suffix(".meta")
+        attempt = 0
+
+        while attempt < retries:
+            try:
+                with metadata_file.open("r+", encoding=self._encoding) as f:
+                    metadata = json.load(f)
+                    metadata["total_count"] = self.global_counter
+                    metadata["chunk_index"] = self.chunk_index
+                    f.seek(0)  # Move to the start of the file to overwrite
+                    json.dump(metadata, f)
+                    f.truncate()  # Remove any leftover data from previous writes
+                logger.debug(f"Updated metadata file {metadata_file} with total_count: {self.global_counter}")
+                break  # Success, exit the loop
+            except Exception as e:
+                logger.error(f"Failed to update metadata file {metadata_file}: {e}")
+                attempt += 1
+                if attempt < retries:
+                    time.sleep(delay)  # Wait before retrying
+                else:
+                    raise  # Reraise after retries
+
+    def _load_metadata(self, metadata_file: Path) -> dict:
+        """Loads metadata from the specified metadata file."""
+        with metadata_file.open("r", encoding=self._encoding) as f:
+            metadata = json.load(f)
+        return metadata
+
+    @abstractmethod
+    def _write_data_to_buffer(self, data: List[dict]) -> None:
+        """Writes data to the current buffer file."""
+        pass
+
+    def finalize_chunks(self):
+        """Finalizes any remaining chunks (e.g., updates metadata)."""
+        for buffer_file in self._buffer_tmp_dir.glob(f"*.{self._define_suffix()}"):
+            self._finalize_buffer_file(buffer_file)
+
+    @staticmethod
+    def _validate_product(product: Tuple) -> Tuple[str, List[Dict], Optional[Dict]]:
+        """
+        Validates the structure of a product tuple.
+
+        :param product: Tuple in the form of (name, data) or (name, data, extra).
+        :return: Tuple unpacked as (name, data, extra).
+        :raises ValueError: If product structure is invalid.
+        """
+        # Check the type and length of product
+        if not isinstance(product, tuple):
+            raise ValueError("Product must be a tuple of (name, data) or (name, data, extra)")
+
+        if len(product) not in {2, 3}:
+            raise ValueError("Product must be a tuple of (name, data) or (name, data, extra)")
+
+        name, data = product[:2]  # Always present
+        extra = product[2] if len(product) == 3 else None
+
+        # Check for None name or data
+        if name is None or data is None:
+            raise ValueError("Product must contain non-None name and data")
+
+        # Check that extra, if present, is a dictionary
+        if extra is not None and not isinstance(extra, dict):
+            raise ValueError("Extra data, if present, must be a dictionary")
+
+        return name, data, extra
+
+    def consume(self, product: tuple):
+        """Retrieve the product tuple and stores data."""
+
+        self.product_name, data, extra = self._validate_product(product)
+        logger.debug(f"Storing data for '{self.product_name}' with {len(data)} records")
+        self.store_data(data)
+
+    # def upload_to_storage(self, bucket: str, name: str) -> None:
+    #     """Uploads all buffered chunk files to the object storage in the specified format."""
+    #     buffer_dir = self._get_buffer_dir_by_pid()
+    #
+    #     # logger.debug(f"Found {len(buffer_dirs)} buffer directories for export.")
+    #
+    #     # Define file suffix
+    #     suffix = self._define_suffix()
+    #
+    #     buffer_files = sorted(
+    #         buffer_dir.glob(f"*.{suffix}"),
+    #         key=lambda x: int(re.search(r"_chunk_(\d+)", x.name).group(1)),
+    #     )
+    #     logger.debug(f"Found {len(buffer_files)} buffer files in directory {buffer_dir}.")
+    #
+    #     for buffer_file in buffer_files:
+    #         # Corresponding metadata file
+    #         metadata = self._load_metadata(buffer_file.with_suffix(".meta"))
+    #
+    #         total_count, uri = self._craft_uri(metadata, suffix)
+    #
+    #         with buffer_file.open("r", encoding=self._encoding) as f:
+    #             data_content = f.read()
+    #             data_buffer = BytesIO(data_content.encode(self._encoding))
+    #             content_type = self._get_content_type()
+    #             # Perform the upload
+    #             self._storage.write(bucket or self._default_bucket, uri, data_buffer, content_type)
+    #             logger.debug(f"Uploaded {uri} with {total_count} records")
+    #
+    #     logger.info(f"Exported buffer files successfully.")
+    #
+    #     # Cleanup buffer directories
+    #     self._reset_state()
+
+    @abstractmethod
+    def _define_suffix(self) -> str:
+        """Defines the file suffix based on the format."""
+        pass
+
+    @abstractmethod
+    def _get_content_type(self) -> str:
+        """Returns the MIME type for the data content."""
+        pass
+
+    def _craft_uri(self, metadata, suffix):
+        # Extract metadata information
+        chunk_index = metadata.get("chunk_index", 0)
+        total_count = metadata.get("total_count", 0)
+        product_name = metadata.get("product_name", None)
+        chunk_size = metadata.get("chunk_size", None)
+        pid = self._pid
+
+        # Adjust range for chunk_start and chunk_end depending on whether chunk_size is defined
+        chunk_start = (chunk_index * self.chunk_size + 1) if self.chunk_size else 1
+        chunk_end = min((chunk_start + self.chunk_size - 1) if self.chunk_size else total_count, total_count)
+
+        # Determine URI based on chunk size and multiprocessing
+        if self._mp:
+            if chunk_size is None:
+                uri = f"{product_name}_pid_{pid}.{suffix}"
+            elif chunk_size == 1:
+                uri = f"{product_name}_{chunk_start}_pid_{pid}.{suffix}"
+            else:
+                uri = f"{product_name}_{chunk_start}_{chunk_end}_pid_{pid}.{suffix}"
+        else:
+            if chunk_size is None:
+                uri = f"{product_name}.{suffix}"
+            elif chunk_size == 1:
+                uri = f"{product_name}_{chunk_start}.{suffix}"
+            else:
+                uri = f"{product_name}_{chunk_start}_{chunk_end}.{suffix}"
+
+        return total_count, f"{self._task_id}/{uri}"
+
+    def cleanup(self) -> None:
+        """Cleans up the temporary buffer directories."""
+        # Identify all buffer directories for this task
+
+        buffer_dir = self._get_buffer_dir_by_pid()
+
+        if buffer_dir.exists():
+            # Delete all files in the buffer directory
+            for file in buffer_dir.iterdir():
+                if file.is_file():
+                    file.unlink()
+            # Remove the buffer directory
+            try:
+                buffer_dir.rmdir()
+            except OSError as e:
+                logger.error(f"Failed to remove buffer directory {buffer_dir}: {e}")
+        logger.debug("Buffer directory cleanup complete.")
+
+    def _reset_state(self):
+        """Resets the exporter state."""
+        self.current_counter = 0
+        self.global_counter = 0
+        self.chunk_index = 0
+        self._is_first_write = True
+        self._initialized = False
+
+    @abstractmethod
+    def _finalize_buffer_file(self, buffer_file: Path) -> None:
+        """Finalizes the current buffer file."""
+        pass

From 06089f83eda8523eecd77c70cda2a32717595701 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 14:06:48 +0700
Subject: [PATCH 4/9] Fix UT

---
 datamimic_ce/statements/statement_util.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/datamimic_ce/statements/statement_util.py b/datamimic_ce/statements/statement_util.py
index ff28bd9..60864fc 100644
--- a/datamimic_ce/statements/statement_util.py
+++ b/datamimic_ce/statements/statement_util.py
@@ -16,11 +16,8 @@ def parse_consumer(consumer_string: str) -> Set[str]:
         Parse the 'consumer' attribute into a set of consumers.
         Splits on commas not enclosed within parentheses.
         """
-        consumer_list = (
-            []
-            if consumer_string is None
-            else list(map(lambda ele: ele.strip(), consumer_string.split(",")))
-        )
+        if not consumer_string:
+            return set()
 
             # Pattern to split on commas not inside parentheses
         pattern = r",\s*(?![^(]*\))"
@@ -30,7 +27,6 @@ def parse_consumer(consumer_string: str) -> Set[str]:
         consumer_list = [consumer.strip() for consumer in consumer_list if consumer.strip()]
 
         # Avoid duplicated consumers
->>>>>>> e96c3c0 (Migrate CE)
         consumer_set = set(consumer_list)
 
         return consumer_set

From 73278d696e0231f908008fd0c1a6f070954d0a58 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 14:57:33 +0700
Subject: [PATCH 5/9] Fix IT

---
 datamimic_ce/exporters/exporter_util.py   |  7 ++---
 datamimic_ce/parsers/parser_util.py       | 20 +++++--------
 datamimic_ce/statements/statement_util.py |  2 +-
 datamimic_ce/tasks/generate_task.py       | 35 +++++++++--------------
 datamimic_ce/tasks/task_util.py           |  6 +---
 5 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/datamimic_ce/exporters/exporter_util.py b/datamimic_ce/exporters/exporter_util.py
index f05cad9..7c293ac 100644
--- a/datamimic_ce/exporters/exporter_util.py
+++ b/datamimic_ce/exporters/exporter_util.py
@@ -3,7 +3,7 @@
 # Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
 # For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
 # Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+import ast
 import json
 import re
 from datetime import date, datetime
@@ -61,7 +61,7 @@ class ExporterUtil:
     @staticmethod
     def create_exporter_list(
         setup_context: SetupContext,
-        stmt: GenerateStatement,
+        targets: List[str],
     ) -> Tuple[List[Tuple[Exporter, str]], List[Exporter]]:
         """
         Create list of consumers with and without operation from consumer string
@@ -73,7 +73,7 @@ def create_exporter_list(
         consumers_with_operation = []
         consumers_without_operation = []
 
-        exporter_str_list = list(stmt.targets)
+        exporter_str_list = list(targets)
 
         # Join the list back into a string
         target_str = ",".join(exporter_str_list)
@@ -100,7 +100,6 @@ def create_exporter_list(
                 consumer = ExporterUtil.get_exporter_by_name(
                     setup_context=setup_context,
                     name=exporter_name,
-                    params=params,
                 )
                 if consumer is not None:
                     consumers_without_operation.append(consumer)
diff --git a/datamimic_ce/parsers/parser_util.py b/datamimic_ce/parsers/parser_util.py
index b6e89dd..080097b 100644
--- a/datamimic_ce/parsers/parser_util.py
+++ b/datamimic_ce/parsers/parser_util.py
@@ -325,7 +325,7 @@ def fulfill_credentials_v2(
 
         environment = (
             descriptor_attr.get(ATTR_ENVIRONMENT)
-            or ("local" if os.environ.get("DATAMIMIC_LIB_ENVIRONMENT") == "lib_local" else None)
+            or ("local" if settings.RUNTIME_ENVIRONMENT == "development" else None)
             or "environment"
         )
         system = descriptor_attr.get(ATTR_SYSTEM)
@@ -361,18 +361,12 @@ def fulfill_credentials_v2(
                 attr_name = "".join(attr_key.split(".")[2:])
                 credentials[attr_name] = attr_value
 
-                # if value not none log it, if attr_name is password, hide it
-                if os.environ.get("DATAMIMIC_LIB_ENVIRONMENT") in (
-                    "lib_local",
-                    "lib_staging",
-                    "local",
+                if any(
+                    pattern in attr_name.lower()
+                    for pattern in ["password", "pwd", "pass"]
                 ):
-                    if any(
-                        pattern in attr_name.lower()
-                        for pattern in ["password", "pwd", "pass"]
-                    ):
-                        logger.debug(f"Get value for {attr_name}: ******")
-                    else:
-                        logger.debug(f"Get value for {attr_name}: {attr_value}")
+                    logger.debug(f"Get value for {attr_name}: ******")
+                else:
+                    logger.debug(f"Get value for {attr_name}: {attr_value}")
 
         return credentials
diff --git a/datamimic_ce/statements/statement_util.py b/datamimic_ce/statements/statement_util.py
index 60864fc..1ce918b 100644
--- a/datamimic_ce/statements/statement_util.py
+++ b/datamimic_ce/statements/statement_util.py
@@ -3,7 +3,7 @@
 # Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
 # For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
 # Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
+import re
 from typing import Set
 
 from datamimic_ce.contexts.context import Context
diff --git a/datamimic_ce/tasks/generate_task.py b/datamimic_ce/tasks/generate_task.py
index 36ac7c8..4ce88a4 100644
--- a/datamimic_ce/tasks/generate_task.py
+++ b/datamimic_ce/tasks/generate_task.py
@@ -360,13 +360,11 @@ def _finalize_and_export_consumers(context: Context, stmt: GenerateStatement):
     :param stmt: GenerateStatement instance.
     :return: None
     """
+    exporter_util_cls = context.root.class_factory_util.get_exporter_util()
     # Create list of consumers that need to finalize and export
-    consumers_with_operation, consumers_without_operation = ExporterUtil.create_exporter_list(
+    consumers_with_operation, consumers_without_operation = exporter_util_cls.create_exporter_list(
         setup_context=context.root,
-        product_name=stmt.name,
-        consumer_str_list=list(stmt.targets),
-        storage_type=stmt.storage_id,
-        target_uri=stmt.export_uri,
+        targets=stmt.targets,
     )
 
     # Combine all consumers
@@ -708,8 +706,7 @@ def _mp_page_process(
                 )
 
                 # Initialize exporters for each post-process consumer
-                _, post_consumer_list_instances = exporter_util.create_exporter_list(
-                    setup_ctx, post_consumer_list, self.statement.storage_id, self.statement.export_uri)
+                _, post_consumer_list_instances = exporter_util.create_exporter_list(setup_ctx, post_consumer_list)
                 logger.debug(
                     f"Post-consumer exporters initialized: {[consumer.__class__.__name__ for consumer in post_consumer_list_instances]}"
                 )
@@ -722,12 +719,11 @@ def _mp_page_process(
             # 5. Post-processing with consumer consumption for merged results across processes
             if post_consumer_list_instances:
                 logger.debug("Processing merged results with post-consumers.")
-
-                for mp_result in mp_result_list:
+                for consumer in post_consumer_list_instances:
+                    for mp_result in mp_result_list:
                         for key, value in mp_result.items():
-                            for consumer in post_consumer_list_instances:
-                                logger.debug(f"Consuming result for {key} with {consumer.__class__.__name__}")
-                                consumer.consume((key, value))
+                            logger.debug(f"Consuming result for {key} with {consumer.__class__.__name__}")
+                            consumer.consume((key, value))
 
             # 6. Clean up and finalize
             del mp_result_list  # Free up memory from the merged results
@@ -817,16 +813,12 @@ def execute(self, context: SetupContext) -> Dict[str, List] | None:
 
         page_size = self._calculate_default_page_size(count)
 
-        task_util_cls = context.root.class_factory_util.get_task_util_cls()
-
         # Generate and consume if gen_stmt is outermost (which has context as SetupContext)
+        exporter_util = context.root.class_factory_util.get_exporter_util()
         if isinstance(context, SetupContext):
-            exporter_util = context.root.class_factory_util.get_exporter_util()
             consumer_with_operations, consumer_without_operations = exporter_util.create_exporter_list(
                 setup_context=context,
-                consumer_str_list=list(self.statement.targets),
-                storage_type=self.statement.storage_id,
-                target_uri=self.statement.export_uri,
+                targets=self.statement.targets,
             )
             # Check for conditions to use multiprocessing
             has_mongodb_delete = any(
@@ -848,7 +840,6 @@ def execute(self, context: SetupContext) -> Dict[str, List] | None:
                 # Process data by page
                 logger.info(f"Processing by page with size {page_size} for '{self.statement.name}'")
                 self._mp_page_process(copied_ctx, page_size, _geniter_single_process_generate_and_consume_by_page)
-                task_util_cls.consume_minio_after_page_processing(self.statement, context)
             # Generate and consume in single process
             else:
                 # Process data by page in single process
@@ -862,7 +853,7 @@ def execute(self, context: SetupContext) -> Dict[str, List] | None:
                     # Generate product
                     result = self._sp_generate(context, start, end)
                     # Consume by page
-                    _consume_by_page(self.statement, context, result, page_index, page_size, None, None)
+                    _consume_by_page(self.statement, context, result, page_index, page_size, None, None, page_index == len(index_chunk) - 1)
 
                     # Manual garbage collection to free memory
                     del result
@@ -915,10 +906,10 @@ def pre_execute(self, context: Context):
         :return: None
         """
         root_context = context.root
-        from datamimic.tasks.task_util import TaskUtil
+        task_util_cls = root_context.class_factory_util.get_task_util_cls()
 
         pre_tasks = [
-            TaskUtil.get_task_by_statement(root_context, child_stmt, None)
+            task_util_cls.get_task_by_statement(root_context, child_stmt, None)
             for child_stmt in self.statement.sub_statements
             if isinstance(child_stmt, KeyStatement)
         ]
diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py
index e1c2262..b58855a 100644
--- a/datamimic_ce/tasks/task_util.py
+++ b/datamimic_ce/tasks/task_util.py
@@ -30,7 +30,6 @@
 from datamimic_ce.exporters.csv_exporter import CSVExporter
 from datamimic_ce.exporters.exporter_util import ExporterUtil
 from datamimic_ce.exporters.json_exporter import JsonExporter
-from datamimic_ce.exporters.json_single_exporter import JsonSingleExporter
 from datamimic_ce.exporters.mongodb_exporter import MongoDBExporter
 from datamimic_ce.exporters.txt_exporter import TXTExporter
 from datamimic_ce.exporters.xml_exporter import XMLExporter
@@ -69,7 +68,6 @@
     _evaluate_selector_script,
     _load_csv_file,
     _load_json_file,
-    _load_temp_result_file,
     _load_xml_file,
     _pre_consume_product,
 )
@@ -441,7 +439,7 @@ def consume_product_by_page(
         # dbms consumer can have operation (e.g. mongodb.update), if so consumer is tuple[Consumer, operation]
         exporter_util = root_context.class_factory_util.get_exporter_util()
         consumers_with_operation, consumers_without_operation = exporter_util.create_exporter_list(
-            setup_context=root_context, stmt=stmt
+            setup_context=root_context, targets=list(consumer_set)
         )
 
         # run consumer with operation first, because some operation may change the product result
@@ -461,7 +459,5 @@ def consume_product_by_page(
             if isinstance(consumer, XMLExporter):
                 xml_product = _pre_consume_product(stmt, xml_result)
                 consumer.consume(xml_product)
-            elif isinstance(consumer, JsonSingleExporter):
-                consumer.consume(json_product, page_info)
             else:
                 consumer.consume(json_product)

From 75a98fd50ecf623ad4df6338cb0c97bd8ba06843 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 14:58:07 +0700
Subject: [PATCH 6/9] Add .env.example [skip ci]

---
 .env.example | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..122faf3
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,13 @@
+# Since the ".env" file is gitignored, you can use the ".env.example" file to
+# build a new ".env" file when you clone the repo. Keep this file up-to-date
+# when you add new variables to `.env`.
+
+# This file will be committed to version control, so make sure not to have any
+# secrets in it. If you are cloning this repo, create a copy of this file named
+# ".env" and populate it with your secrets.
+
+# Should be development or production
+#WARNING: Please make sure this is set correctly for desired deployment behavior
+RUNTIME_ENVIRONMENT=development
+SQLALCHEMY_WARN_20=1
+

From 4070c983348d4fa13239532ab5dbedb07312b816 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Mon, 11 Nov 2024 16:02:41 +0700
Subject: [PATCH 7/9] Fix IT

---
 datamimic_ce/exporters/exporter_util.py       | 14 +++++------
 datamimic_ce/exporters/txt_exporter.py        |  6 +----
 .../exporters/unified_buffered_exporter.py    |  3 ++-
 datamimic_ce/exporters/xml_exporter.py        |  4 ----
 datamimic_ce/tasks/generate_task.py           | 23 +++++++++++--------
 datamimic_ce/tasks/task_util.py               |  2 +-
 6 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/datamimic_ce/exporters/exporter_util.py b/datamimic_ce/exporters/exporter_util.py
index 7c293ac..a4c7f87 100644
--- a/datamimic_ce/exporters/exporter_util.py
+++ b/datamimic_ce/exporters/exporter_util.py
@@ -61,6 +61,7 @@ class ExporterUtil:
     @staticmethod
     def create_exporter_list(
         setup_context: SetupContext,
+        stmt: GenerateStatement,
         targets: List[str],
     ) -> Tuple[List[Tuple[Exporter, str]], List[Exporter]]:
         """
@@ -100,6 +101,7 @@ def create_exporter_list(
                 consumer = ExporterUtil.get_exporter_by_name(
                     setup_context=setup_context,
                     name=exporter_name,
+                    product_name=stmt.name
                 )
                 if consumer is not None:
                     consumers_without_operation.append(consumer)
@@ -190,7 +192,7 @@ def parse_function_string(function_string):
         return parsed_functions
 
     @staticmethod
-    def get_exporter_by_name(setup_context: SetupContext, name: str) -> Exporter:
+    def get_exporter_by_name(setup_context: SetupContext, name: str, product_name:str) -> Exporter:
         """
         Consumer factory: Create consumer based on name
 
@@ -205,17 +207,15 @@ def get_exporter_by_name(setup_context: SetupContext, name: str) -> Exporter:
         elif name == EXPORTER_LOG_EXPORTER:
             return LogExporter()
         elif name == EXPORTER_JSON:
-            return JsonExporter(setup_context)
+            return JsonExporter(setup_context, product_name)
         elif name == EXPORTER_CSV:
-            return CSVExporter(setup_context)
+            return CSVExporter(setup_context, product_name)
         elif name == EXPORTER_XML:
-            return XMLExporter(setup_context)
+            return XMLExporter(setup_context, product_name)
         elif name == EXPORTER_TXT:
-            return TXTExporter(setup_context)
+            return TXTExporter(setup_context, product_name)
         elif name == EXPORTER_TEST_RESULT_EXPORTER:
             return setup_context.test_result_exporter
-        elif name == EXPORTER_JSON_SINGLE:
-            return JsonSingleExporter(setup_context)
         elif name in setup_context.clients:
             # 1. get client from context
             client = setup_context.get_client_by_id(name)
diff --git a/datamimic_ce/exporters/txt_exporter.py b/datamimic_ce/exporters/txt_exporter.py
index c73fff1..a3f29a8 100644
--- a/datamimic_ce/exporters/txt_exporter.py
+++ b/datamimic_ce/exporters/txt_exporter.py
@@ -32,8 +32,6 @@ def __init__(
         self,
         setup_context: SetupContext,
         product_name: str,
-        storage_id: str,
-        target_uri: str,
         chunk_size: int = None,
         separator: str = None,
         line_terminator: str = None,
@@ -45,8 +43,6 @@ def __init__(
 
         Parameters:
             setup_context (SetupContext): The setup context containing configurations.
-            storage_id (str): Identifier for the storage backend.
-            target_uri (str): The target URI/path where files will be stored.
             chunk_size (int, optional): Number of records per chunk. Defaults to None.
             separator (str, optional): Separator to use between fields. Defaults to ':'.
             line_terminator (str, optional): Line terminator to use. Defaults to system's default.
@@ -60,7 +56,7 @@ def __init__(
         # Pass encoding via kwargs to the base class
         kwargs["encoding"] = encoding or setup_context.default_encoding or "utf-8"
 
-        super().__init__("txt", setup_context, product_name, storage_id, target_uri, chunk_size=chunk_size, **kwargs)
+        super().__init__("txt", setup_context, product_name, chunk_size=chunk_size, **kwargs)
         logger.info(
             f"TXTExporter initialized with chunk size {chunk_size}, separator '{self.separator}', "
             f"encoding '{self.encoding}', line terminator '{self.line_terminator}'"
diff --git a/datamimic_ce/exporters/unified_buffered_exporter.py b/datamimic_ce/exporters/unified_buffered_exporter.py
index 5d56cb7..24302d0 100644
--- a/datamimic_ce/exporters/unified_buffered_exporter.py
+++ b/datamimic_ce/exporters/unified_buffered_exporter.py
@@ -9,6 +9,7 @@
 from typing import Dict, List, Optional, Tuple
 
 from datamimic_ce.contexts.setup_context import SetupContext
+from datamimic_ce.exporters.exporter import Exporter
 from datamimic_ce.logger import logger
 
 
@@ -30,7 +31,7 @@ class ExportError(ExporterError):
     pass
 
 
-class UnifiedBufferedExporter(ABC):
+class UnifiedBufferedExporter(Exporter, ABC):
     """
     Abstract exporter that collects data until chunk size is reached.
     Manages chunking by grouping entities into files based on user-defined chunk size.
diff --git a/datamimic_ce/exporters/xml_exporter.py b/datamimic_ce/exporters/xml_exporter.py
index 769d132..8c0c4f5 100644
--- a/datamimic_ce/exporters/xml_exporter.py
+++ b/datamimic_ce/exporters/xml_exporter.py
@@ -27,8 +27,6 @@ def __init__(
         self,
         setup_context: SetupContext,
         product_name: str,
-        storage_id: str,
-        target_uri: str,
         chunk_size: int = None,
         root_element: str = "list",
         item_element: str = "item",
@@ -59,8 +57,6 @@ def __init__(
             exporter_type="xml",
             setup_context=setup_context,
             product_name=product_name,
-            storage_id=storage_id,
-            target_uri=target_uri,
             chunk_size=chunk_size,
             **kwargs,
         )
diff --git a/datamimic_ce/tasks/generate_task.py b/datamimic_ce/tasks/generate_task.py
index 4ce88a4..38b6225 100644
--- a/datamimic_ce/tasks/generate_task.py
+++ b/datamimic_ce/tasks/generate_task.py
@@ -364,6 +364,7 @@ def _finalize_and_export_consumers(context: Context, stmt: GenerateStatement):
     # Create list of consumers that need to finalize and export
     consumers_with_operation, consumers_without_operation = exporter_util_cls.create_exporter_list(
         setup_context=context.root,
+        stmt=stmt,
         targets=stmt.targets,
     )
 
@@ -697,19 +698,20 @@ def _mp_page_process(
             logger.debug(f"Prepared argument list for multiprocessing with chunks: {chunk_info}")
             # 3. Initialize any required post-process consumers, e.g., for testing or memory storage
             post_consumer_list = []
+            # Add test result exporter if test mode is enabled
             if setup_ctx.test_mode:
                 post_consumer_list.append(EXPORTER_TEST_RESULT_EXPORTER)
+            # Add memstore exporters
+            post_consumer_list.extend(
+                filter(lambda consumer_str: setup_ctx.memstore_manager.contain(consumer_str),
+                       self.statement.targets)
+            )
 
-                post_consumer_list.extend(
-                    filter(lambda consumer_str: setup_ctx.memstore_manager.contain(consumer_str),
-                           self.statement.targets)
-                )
-
-                # Initialize exporters for each post-process consumer
-                _, post_consumer_list_instances = exporter_util.create_exporter_list(setup_ctx, post_consumer_list)
-                logger.debug(
-                    f"Post-consumer exporters initialized: {[consumer.__class__.__name__ for consumer in post_consumer_list_instances]}"
-                )
+            # Initialize exporters for each post-process consumer
+            _, post_consumer_list_instances = exporter_util.create_exporter_list(setup_ctx, self.statement, post_consumer_list)
+            logger.debug(
+                f"Post-consumer exporters initialized: {[consumer.__class__.__name__ for consumer in post_consumer_list_instances]}"
+            )
 
             # 4. Run multiprocessing Pool to handle the generation/consumption function for each chunk
             with multiprocessing.Pool(processes=num_processes) as pool:
@@ -818,6 +820,7 @@ def execute(self, context: SetupContext) -> Dict[str, List] | None:
         if isinstance(context, SetupContext):
             consumer_with_operations, consumer_without_operations = exporter_util.create_exporter_list(
                 setup_context=context,
+                stmt=self.statement,
                 targets=self.statement.targets,
             )
             # Check for conditions to use multiprocessing
diff --git a/datamimic_ce/tasks/task_util.py b/datamimic_ce/tasks/task_util.py
index b58855a..93f6d88 100644
--- a/datamimic_ce/tasks/task_util.py
+++ b/datamimic_ce/tasks/task_util.py
@@ -439,7 +439,7 @@ def consume_product_by_page(
         # dbms consumer can have operation (e.g. mongodb.update), if so consumer is tuple[Consumer, operation]
         exporter_util = root_context.class_factory_util.get_exporter_util()
         consumers_with_operation, consumers_without_operation = exporter_util.create_exporter_list(
-            setup_context=root_context, targets=list(consumer_set)
+            setup_context=root_context, stmt=stmt, targets=list(consumer_set)
         )
 
         # run consumer with operation first, because some operation may change the product result

From c0463b97fefe05602f50d2112ab377968cdc942e Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Tue, 12 Nov 2024 11:18:39 +0700
Subject: [PATCH 8/9] Fix IT 2

---
 .gitignore                                    |  5 +++-
 .../test_json_single/test_json_single.py      | 23 -------------------
 .../test_json_single/test_json_single_mp.xml  |  5 ----
 .../test_json_single/test_json_single_sp.xml  |  5 ----
 .../test_mongodb/test_mongodb_intergration.py |  2 +-
 5 files changed, 5 insertions(+), 35 deletions(-)
 delete mode 100644 tests_ce/integration_tests/test_json_single/test_json_single.py
 delete mode 100644 tests_ce/integration_tests/test_json_single/test_json_single_mp.xml
 delete mode 100644 tests_ce/integration_tests/test_json_single/test_json_single_sp.xml

diff --git a/.gitignore b/.gitignore
index dbbdeac..af56287 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,4 +62,7 @@ datamimic.log.*
 exported_data/
 
 # Temporary result files
-**/**/temp_result*
\ No newline at end of file
+**/**/temp_result*
+
+# Exporter files result
+**/**/exporter_result*
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_json_single/test_json_single.py b/tests_ce/integration_tests/test_json_single/test_json_single.py
deleted file mode 100644
index 79e965e..0000000
--- a/tests_ce/integration_tests/test_json_single/test_json_single.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# DATAMIMIC
-# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd.
-# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
-# For commercial use, please contact Rapiddweller at info@rapiddweller.com to obtain a commercial license.
-# Full license text available at: http://creativecommons.org/licenses/by-nc-sa/4.0/
-
-
-
-from pathlib import Path
-
-from tests_ce.data_mimic_test import DataMimicTest
-
-
-class TestJsonSingle:
-    _test_dir = Path(__file__).resolve().parent
-
-    def test_json_single_sp(self):
-        engine = DataMimicTest(test_dir=self._test_dir, filename="test_json_single_sp.xml")
-        engine.test_with_timer()
-
-    def test_json_single_mp(self):
-        engine = DataMimicTest(test_dir=self._test_dir, filename="test_json_single_mp.xml")
-        engine.test_with_timer()
diff --git a/tests_ce/integration_tests/test_json_single/test_json_single_mp.xml b/tests_ce/integration_tests/test_json_single/test_json_single_mp.xml
deleted file mode 100644
index b276e14..0000000
--- a/tests_ce/integration_tests/test_json_single/test_json_single_mp.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<setup multiprocessing="1">
-    <generate name="user" count="10" target="JSONSingle">
-        <key name="id" generator="IncrementGenerator"/>
-    </generate>
-</setup>
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_json_single/test_json_single_sp.xml b/tests_ce/integration_tests/test_json_single/test_json_single_sp.xml
deleted file mode 100644
index 0bc44dd..0000000
--- a/tests_ce/integration_tests/test_json_single/test_json_single_sp.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<setup multiprocessing="0">
-    <generate name="user" count="10" target="JSONSingle">
-        <key name="id" generator="IncrementGenerator"/>
-    </generate>
-</setup>
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py b/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
index f519680..8c2d764 100644
--- a/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
+++ b/tests_ce/integration_tests/test_mongodb/test_mongodb_intergration.py
@@ -50,7 +50,7 @@ def test_mongodb_local_env(self):
         test_engine = DataMimicTest(test_dir=self._test_dir, filename="test_mongodb_local_env.xml")
         test_engine.test_with_timer()
 
-    @pytest.mark.skipif(settings.RUNTIME_ENVIRONMENT == "lib_local", reason="Not run on local")
+    @pytest.mark.skipif(settings.RUNTIME_ENVIRONMENT == "development", reason="Not run on local")
     def test_mongodb_global_env(self):
         test_engine = DataMimicTest(test_dir=self._test_dir, filename="test_mongodb_global_env.xml")
         test_engine.test_with_timer()

From 7ffcb8a2b5fbb2b5a74b6c8d8ddbd1d2f0181508 Mon Sep 17 00:00:00 2001
From: Dang Ly <dang.ly@rapiddweller.com>
Date: Tue, 12 Nov 2024 11:28:36 +0700
Subject: [PATCH 9/9] Fix IT 3

---
 .../test_exporters/multi_opensearch_bulk.xml  | 46 --------------
 .../single_json_single_cascaded_cases.xml     | 56 -----------------
 .../test_exporters/single_opensearch_bulk.xml | 60 -------------------
 .../test_exporters/test_exporters.py          | 23 +------
 4 files changed, 1 insertion(+), 184 deletions(-)
 delete mode 100644 tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
 delete mode 100644 tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
 delete mode 100644 tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml

diff --git a/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml b/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
deleted file mode 100644
index f70d082..0000000
--- a/tests_ce/integration_tests/test_exporters/multi_opensearch_bulk.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<setup multiprocessing="1">
-    <generate name="special" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="8000"
-              cyclic="True"
-              target="OpenSearchBulk"
-              pageSize="1000"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-    <generate name="special2" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="8000"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=False)"
-              pageSize="1000"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-    <generate name="special3" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="16"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=False,chunk_size=1)"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-</setup>
diff --git a/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml b/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
deleted file mode 100644
index 5f46a9e..0000000
--- a/tests_ce/integration_tests/test_exporters/single_json_single_cascaded_cases.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<setup multiprocessing="0" defaultSeparator=",">
-    <generate name="p" source="data/products.ent.csv">
-        <generate name="special00001" source="script/template_xyz.json"
-                  sourceScripted="True"
-                  count="2"
-                  cyclic="True"
-                  target="JSONSingle"
-                  pageSize="100"
-        >
-            <variable name="randomNumberVar" type="int"/>
-            <key name="title" constant="Prisoners"/>
-            <key name="year" constant="2013"/>
-            <key name="ean" script="ean_code"/>
-            <key name="product_name" script="name"/>
-            <generate name="special00001_1" source="script/template_xyz.json"
-                      sourceScripted="True"
-                      count="4"
-                      cyclic="True"
-                      target="JSONSingle"
-                      pageSize="100"
-            >
-                <variable name="randomNumberVar" type="int"/>
-                <key name="title" constant="Prisoners"/>
-                <key name="year" constant="2013"/>
-                <key name="ean" script="ean_code"/>
-                <key name="product_name" script="name"/>
-            </generate>
-        </generate>
-        <generate name="special00002" source="script/template_xyz.json"
-                  sourceScripted="True"
-                  count="4"
-                  cyclic="True"
-                  target="JSONSingle"
-                  pageSize="100"
-        >
-            <variable name="randomNumberVar" type="int"/>
-            <key name="title" constant="Prisoners"/>
-            <key name="year" constant="2013"/>
-            <key name="ean" script="ean_code"/>
-            <key name="product_name" script="name"/>
-        </generate>
-        <generate name="special00003" source="script/template_xyz.json"
-                  sourceScripted="True"
-                  count="4"
-                  cyclic="True"
-                  target="JSONSingle"
-                  pageSize="100"
-        >
-            <variable name="randomNumberVar" type="int"/>
-            <key name="title" constant="Prisoners"/>
-            <key name="year" constant="2013"/>
-            <key name="ean" script="ean_code"/>
-            <key name="product_name" script="name"/>
-        </generate>
-    </generate>
-</setup>
\ No newline at end of file
diff --git a/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml b/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml
deleted file mode 100644
index 372ab29..0000000
--- a/tests_ce/integration_tests/test_exporters/single_opensearch_bulk.xml
+++ /dev/null
@@ -1,60 +0,0 @@
-<setup multiprocessing="0">
-    <generate name="special" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="8000"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=False, chunk_size=1000)"
-              pageSize="1000"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-    <generate name="special2" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="8000"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=True, chunk_size=1000)"
-              pageSize="2000"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-    <generate name="special3" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="16"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=False,chunk_size=1)"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-    <generate name="special4" source="script/template_xyz.json"
-              sourceScripted="True"
-              count="3000"
-              cyclic="True"
-              target="OpenSearchBulk(use_ndjson=False,chunk_size=1000)"
-    >
-        <variable name="randomNumberVar" type="int"/>
-        <key name="$$_action$$" constant="index"/>
-        <key name="$$_index$$" constant="movies"/>
-        <key name="$$_id$$" generator="IncrementGenerator"/>
-        <key name="$$routing$$" constant="12341243"/>
-        <key name="title" constant="Prisoners"/>
-        <key name="year" constant="2013"/>
-    </generate>
-</setup>
diff --git a/tests_ce/integration_tests/test_exporters/test_exporters.py b/tests_ce/integration_tests/test_exporters/test_exporters.py
index 4d47189..7653b42 100644
--- a/tests_ce/integration_tests/test_exporters/test_exporters.py
+++ b/tests_ce/integration_tests/test_exporters/test_exporters.py
@@ -8,28 +8,12 @@
 from pathlib import Path
 from unittest import skip
 
-from tests.data_mimic_test import DataMimicTest
+from tests_ce.data_mimic_test import DataMimicTest
 
 
 class TestExporter:
     _test_dir = Path(__file__).resolve().parent
 
-    def test_single_opensearch_bulk(self):
-        for i in range(1):
-            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_opensearch_bulk.xml")
-            test_engine.test_with_timer()
-
-    def test_multi_opensearch_bulk(self):
-        for i in range(1):
-            test_engine = DataMimicTest(test_dir=self._test_dir, filename="multi_opensearch_bulk.xml")
-            test_engine.test_with_timer()
-
-    def test_single_json(self):
-        for i in range(1):
-            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_json.xml")
-            test_engine.test_with_timer()
-
-    @skip("Race-condition")
     def test_multi_json(self):
         for i in range(1):
             test_engine = DataMimicTest(test_dir=self._test_dir, filename="multi_json.xml")
@@ -49,8 +33,3 @@ def test_single_cascaded_cases(self):
         for i in range(1):
             test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_cascaded_cases.xml")
             test_engine.test_with_timer()
-
-    def test_single_json_single_cascaded_cases(self):
-        for i in range(1):
-            test_engine = DataMimicTest(test_dir=self._test_dir, filename="single_json_single_cascaded_cases.xml")
-            test_engine.test_with_timer()