From 4c315c04edcd66fd2cd6f5757c7bb71da6445613 Mon Sep 17 00:00:00 2001
From: Richard Taylor <richard@engineb.com>
Date: Wed, 24 Jan 2024 18:28:41 +0000
Subject: [PATCH 1/3] #1622: Fix issues with nested context manager calls

# Summary

## Problem statement

The `Resource` class is also a [Context Manager](https://docs.python.org/3/reference/datamodel.html#context-managers).  That is, it implements the
`__enter()__` and `__exit()__` methods to allow the use of
`with Resource(...)` statements.

Prior to this PR, there was no limit on nesting `with` statements on
the same `Resource`, but this caused problems because while the second
`__enter()__` allowed the `Resource` to already be open, the first
`__exit()__` would `close()` the Resource while the higher level context
would expect it to still be open.

This would cause errors like "ValueError: I/O operation on closed file",
or the iterator would appear to start from part way through a file rather
than at the start of the file, and other similar behaviour depending on
the exact locations of the nested functions.

This was made more complex because these `with` statements were often
far removed from each other in the code, hidden behind iterators driven
by generators, etc. They also could have different behaviour depending on
number of rows read, the type of Resource (local file vs inline, etc.),
the different steps in a pipeline, etc. etc.

All this meant that the problem was rare, hard to reduce down to an
obvious reproduction case, and not realistic to expect developers to
understand while developing new functionality.

## Solution

This PR prevents nested contexts being created by throwing an exception
when the second, nested, `with` is attempted.  This means that code that
risks these issues can be quickly identified and resolved during development.

The best way to resolve it is to use `Resource.to_copy()` to copy so that
the nested `with` is acting on an independent view of the same Resource,
which is likely what is intended in most cases anyway.

This PR also updates a number of the internal uses of `with` to work
on a copy of the Resource they are passed so that they are independent
of any external code and what it might have done with the Resource prior
to the library methods being called.

## Breaking Change

This is technically a breaking change as any external code that was
developed using nested `with` statements - possibly deliberately, but
more likely unknowingly not falling into the error cases - will have to
be updated to use `to_copy()` or similar.

However, the library functions have all been updated in a way that
doesn't change their signature or their expected behaviour as documented
by the unit tests. All pre-existing unit tests pass with no changes,
and added unit tests for the specific updated behaviour do not require
any unusual constructs.  It is still possible that some undocumented
and untested side effect behaviours are different than before and any
code relying on those may also be affected (e.g. `to_petl()` iterators are now independent rather than causing changes in each other)

So it is likely that very few actual impacts will occur in real world
code, and the exception thrown does it's best to explain the issue
and suggest resolutions.

# Tests

- All existing unit tests run and pass unchanged
- New unit tests were added to cover the updated behaviour
    - These unit tests were confirmed to fail without the updates
      in this PR (where appropriate).
    - These unit tests now pass with the updated code.
- The original script that identified the issue in #1622 was run and
  now gives the correct result (all rows appropriately converted and
  saved to file)
---
 frictionless/formats/csv/parser.py          |   3 +-
 frictionless/formats/excel/parsers/xls.py   |   3 +-
 frictionless/formats/excel/parsers/xlsx.py  |   3 +-
 frictionless/formats/inline/parser.py       |   3 +-
 frictionless/formats/json/parsers/json.py   |   3 +-
 frictionless/formats/json/parsers/jsonl.py  |   3 +-
 frictionless/formats/ods/parser.py          |   7 +-
 frictionless/formats/yaml/parser.py         |   3 +-
 frictionless/resource/resource.py           |  52 +++++++++-
 frictionless/resources/table.py             |  10 +-
 frictionless/steps/table/table_normalize.py |   5 +-
 frictionless/validator/validator.py         |  32 +++---
 tests/resource/test_context_manager.py      | 102 ++++++++++++++++++++
 tests/table/test_to_petl.py                 |  74 ++++++++++++++
 14 files changed, 274 insertions(+), 29 deletions(-)
 create mode 100644 tests/resource/test_context_manager.py
 create mode 100644 tests/table/test_to_petl.py

diff --git a/frictionless/formats/csv/parser.py b/frictionless/formats/csv/parser.py
index 898b4cc18a..79d16fcec1 100644
--- a/frictionless/formats/csv/parser.py
+++ b/frictionless/formats/csv/parser.py
@@ -63,7 +63,8 @@ def write_row_stream(self, source: TableResource):
             "wt", delete=False, encoding=self.resource.encoding, newline=""
         ) as file:
             writer = csv.writer(file, **options)  # type: ignore
-            with source:
+            # Use a copy of the source to avoid side effects (see #1622)
+            with source.to_copy() as source:
                 if self.resource.dialect.header:
                     writer.writerow(source.schema.field_names)
                 for row in source.row_stream:
diff --git a/frictionless/formats/excel/parsers/xls.py b/frictionless/formats/excel/parsers/xls.py
index 48de1cde18..7818e0f80e 100644
--- a/frictionless/formats/excel/parsers/xls.py
+++ b/frictionless/formats/excel/parsers/xls.py
@@ -109,7 +109,8 @@ def write_row_stream(self, source: TableResource):
         if isinstance(title, int):
             title = f"Sheet {control.sheet}"
         sheet = book.add_sheet(title)
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header:
                 for field_index, name in enumerate(source.schema.field_names):
                     sheet.write(0, field_index, name)
diff --git a/frictionless/formats/excel/parsers/xlsx.py b/frictionless/formats/excel/parsers/xlsx.py
index b402fdc6d5..f9810070d3 100644
--- a/frictionless/formats/excel/parsers/xlsx.py
+++ b/frictionless/formats/excel/parsers/xlsx.py
@@ -148,7 +148,8 @@ def write_row_stream(self, source: TableResource):
         if isinstance(title, int):
             title = f"Sheet {control.sheet}"
         sheet = book.create_sheet(title)
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header:
                 sheet.append(source.schema.field_names)
             for row in source.row_stream:
diff --git a/frictionless/formats/inline/parser.py b/frictionless/formats/inline/parser.py
index 48d61c8d11..e22cc04ea1 100644
--- a/frictionless/formats/inline/parser.py
+++ b/frictionless/formats/inline/parser.py
@@ -91,7 +91,8 @@ def read_cell_stream_create(self):  # type: ignore
     def write_row_stream(self, source: TableResource):
         data: List[Any] = []
         control = InlineControl.from_dialect(self.resource.dialect)
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header and not control.keyed:
                 data.append(source.schema.field_names)
             for row in source.row_stream:
diff --git a/frictionless/formats/json/parsers/json.py b/frictionless/formats/json/parsers/json.py
index 42f22f4fc1..d88e83d903 100644
--- a/frictionless/formats/json/parsers/json.py
+++ b/frictionless/formats/json/parsers/json.py
@@ -54,7 +54,8 @@ def read_cell_stream_create(self) -> types.ICellStream:
     def write_row_stream(self, source: TableResource):
         data: List[Any] = []
         control = JsonControl.from_dialect(self.resource.dialect)
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header and not control.keyed:
                 data.append(source.schema.field_names)
             for row in source.row_stream:
diff --git a/frictionless/formats/json/parsers/jsonl.py b/frictionless/formats/json/parsers/jsonl.py
index 62e35e6081..3ed2cc0035 100644
--- a/frictionless/formats/json/parsers/jsonl.py
+++ b/frictionless/formats/json/parsers/jsonl.py
@@ -46,7 +46,8 @@ def write_row_stream(self, source: TableResource):
         control = JsonControl.from_dialect(self.resource.dialect)
         with tempfile.NamedTemporaryFile(delete=False) as file:
             writer = platform.jsonlines.Writer(file)
-            with source:
+            # Write from a copy of the source to avoid side effects (see #1622)
+            with source.to_copy() as source:
                 if self.resource.dialect.header and not control.keyed:
                     writer.write(source.schema.field_names)
                 for row in source.row_stream:
diff --git a/frictionless/formats/ods/parser.py b/frictionless/formats/ods/parser.py
index 96dcaaac63..bd878cf009 100644
--- a/frictionless/formats/ods/parser.py
+++ b/frictionless/formats/ods/parser.py
@@ -82,15 +82,16 @@ def write_row_stream(self, source: TableResource):
         file.close()
         book = platform.ezodf.newdoc(doctype="ods", filename=file.name)
         title = f"Sheet {control.sheet}"
-        # Get size
-        with source:
+        # Get size. Use a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             row_size = 1
             col_size = len(source.schema.fields)
             for _ in source.row_stream:
                 row_size += 1
         book.sheets += platform.ezodf.Sheet(title, size=(row_size, col_size))
         sheet = book.sheets[title]
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header:
                 for field_index, name in enumerate(source.schema.field_names):
                     sheet[(0, field_index)].set_value(name)
diff --git a/frictionless/formats/yaml/parser.py b/frictionless/formats/yaml/parser.py
index 7d2e3016c5..6d8da920ae 100644
--- a/frictionless/formats/yaml/parser.py
+++ b/frictionless/formats/yaml/parser.py
@@ -52,7 +52,8 @@ def read_cell_stream_create(self) -> types.ICellStream:
     def write_row_stream(self, source: TableResource):
         data: List[Any] = []
         control = YamlControl.from_dialect(self.resource.dialect)
-        with source:
+        # Write from a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             if self.resource.dialect.header and not control.keyed:
                 data.append(source.schema.field_names)
             for row in source.row_stream:
diff --git a/frictionless/resource/resource.py b/frictionless/resource/resource.py
index 3d93d85c95..3eb462244b 100644
--- a/frictionless/resource/resource.py
+++ b/frictionless/resource/resource.py
@@ -238,6 +238,7 @@ def __attrs_post_init__(self):
         # Internal
         self.__loader: Optional[Loader] = None
         self.__buffer: Optional[types.IBuffer] = None
+        self.__context_manager_entered: bool = False
 
         # Detect resource
         system.detect_resource(self)
@@ -257,11 +258,58 @@ def __attrs_post_init__(self):
     # TODO: shall we guarantee here that it's at the beginning for the file?
     # TODO: maybe it's possible to do type narrowing here?
     def __enter__(self):
-        if self.closed:
-            self.open()
+        """
+        Enters a context manager for the resource.
+        We need to be careful with contexts because they open and close the Resource
+        (and thus any underlying files) and we don't want to close a file that is
+        being used somewhere higher up the call stack.
+
+        e.g. if nested contexts were allowed then:
+
+            with Resource("in.csv") as resource:
+                with resource:
+                    # use resource
+                resource.write("out.csv")
+
+        would result in errors because the second context would close the file
+        before the write happened.  While the above code is obvious, similar
+        things can happen when composing steps in pipelines, calling petl code etc. 
+        where the various functions may have no knowledge of each other.
+        See #1622 for more details.
+
+        So we only allow a single context to be open at a time, and raise an 
+        exception if nested context is attempted.  For similar reasons, we
+        also raise an exception if a context is attempted on an open resource.
+
+        The above code can be successfully written as:
+
+            with Resource("in.csv") as resource:
+                with resource.to_copy() as resource2:
+                    use resource2:
+                resource.write("out.csv")
+
+        which keeps resource and resource2 as independent views on the same file.
+
+        Note that if you absolutely need to use a resource in a manner where you
+        don't care if it is "opened" multiple times and closed once then you
+        can directly use `open()` and `close()` but you also become responsible
+        for ensuring the file is closed at the correct time.
+        """
+        if self.__context_manager_entered:
+            note = "Resource has previously entered a context manager (`with` statement) and does not support nested contexts.  To use in a nested context use `to_copy()` then use the copy in the `with`."
+            raise FrictionlessException(note)
+        if self.closed == False:
+            note = "Resource is currently open, and cannot be used in a `with` statement (which would reopen the file).  To use `with` on an open Resouece, use to_copy() then use the copy in the `with`."
+            raise FrictionlessException(note)
+
+        self.__context_manager_entered = True
+
+        self.open()
         return self
 
     def __exit__(self, type, value, traceback):  # type: ignore
+        # Mark the context manager as closed so that sequential contexts are allowed.
+        self.__context_manager_entered = False
         self.close()
 
     @property
diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py
index 98a76f3473..0c2f00c38a 100644
--- a/frictionless/resources/table.py
+++ b/frictionless/resources/table.py
@@ -254,7 +254,8 @@ def __open_lookup(self):
             self.__lookup[source_name][source_key] = set()
             if not source_res:
                 continue
-            with source_res:
+            # Iterate on a copy to avoid side effects (see #1622)
+            with source_res.to_copy() as source_res:
                 for row in source_res.row_stream:  # type: ignore
                     cells = tuple(row.get(field_name) for field_name in source_key)  # type: ignore
                     if set(cells) == {None}:  # type: ignore
@@ -641,12 +642,15 @@ def from_petl(view: Any, **options: Any):
 
     def to_petl(self, normalize: bool = False):
         """Export resource as a PETL table"""
-        resource = self.to_copy()
+        # Store a copy of self to avoid side effects (see #1622)
+        self_copy = self.to_copy()
 
         # Define view
         class ResourceView(platform.petl.Table):  # type: ignore
             def __iter__(self):  # type: ignore
-                with resource:
+                # Iterate over a copy of the resource so that each instance of the iterator is independent  (see #1622)
+                # If we didn't do this, then different iterators on the same table would interfere with each other.
+                with self_copy.to_copy() as resource:
                     if normalize:
                         yield resource.schema.field_names
                         yield from (row.to_list() for row in resource.row_stream)
diff --git a/frictionless/steps/table/table_normalize.py b/frictionless/steps/table/table_normalize.py
index 409d2a90ab..cd5bbb2fb5 100644
--- a/frictionless/steps/table/table_normalize.py
+++ b/frictionless/steps/table/table_normalize.py
@@ -24,11 +24,12 @@ class table_normalize(Step):
     # Transform
 
     def transform_resource(self, resource: Resource):
-        current = resource.to_copy()
+        resource_copy = resource.to_copy()
 
         # Data
         def data():  # type: ignore
-            with current:
+            # Yield from a copy to avoid side effects (see #1622)
+            with resource_copy.to_copy() as current:
                 yield current.header.to_list()  # type: ignore
                 for row in current.row_stream:  # type: ignore
                     yield row.to_list()  # type: ignore
diff --git a/frictionless/validator/validator.py b/frictionless/validator/validator.py
index 4657c9f758..28088325d9 100644
--- a/frictionless/validator/validator.py
+++ b/frictionless/validator/validator.py
@@ -94,10 +94,6 @@ def validate_resource(
         errors: List[Error] = []
         warnings: List[str] = []
 
-        # Prepare checklist
-        checklist = checklist or Checklist()
-        checks = checklist.connect(resource)
-
         # Validate metadata
         try:
             resource.to_descriptor(validate=True)
@@ -119,13 +115,20 @@ def validate_resource(
             try:
                 resource.open()
             except FrictionlessException as exception:
-                resource.close()
                 return Report.from_validation_task(
                     resource, time=timer.time, errors=exception.to_errors()
                 )
+            finally:
+                # Always close the resource if we opened it to avoid side effects
+                resource.close()
+
+        # Validate row data
+        # Run the per-row validation against a copy of the resource to avoid side effects  (see #1622)
+        with resource.to_copy() as resource_copy:
+            # Prepare checklist, and connect it to the resource copy
+            checklist = checklist or Checklist()
+            checks = checklist.connect(resource_copy)
 
-        # Validate data
-        with resource:
             # Validate start
             for index, check in enumerate(checks):
                 for error in check.validate_start():
@@ -135,20 +138,20 @@ def validate_resource(
                         errors.append(error)
 
             # Validate file
-            if not isinstance(resource, platform.frictionless_resources.TableResource):
-                if resource.hash is not None or resource.bytes is not None:
-                    helpers.pass_through(resource.byte_stream)
+            if not isinstance(resource_copy, platform.frictionless_resources.TableResource):
+                if resource_copy.hash is not None or resource_copy.bytes is not None:
+                    helpers.pass_through(resource_copy.byte_stream)
 
             # Validate table
             else:
                 row_count = 0
-                labels = resource.labels
+                labels = resource_copy.labels
                 while True:
                     row_count += 1
 
                     # Emit row
                     try:
-                        row = next(resource.row_stream)  # type: ignore
+                        row = next(resource_copy.row_stream)  # type: ignore
                     except FrictionlessException as exception:
                         errors.append(exception.error)
                         continue
@@ -189,6 +192,11 @@ def validate_resource(
                         if checklist.match(error):
                             errors.append(error)
 
+            # Update the stats in the base resource with those from the copy
+            # Note that this mutation of the base resource is an expected result of the validation,
+            # but depending on what other code does with the resource, they may be overwritten.
+            resource.stats = resource_copy.stats
+
         # Return report
         return Report.from_validation_task(
             resource, time=timer.time, labels=labels, errors=errors, warnings=warnings
diff --git a/tests/resource/test_context_manager.py b/tests/resource/test_context_manager.py
new file mode 100644
index 0000000000..4e50008e04
--- /dev/null
+++ b/tests/resource/test_context_manager.py
@@ -0,0 +1,102 @@
+from frictionless import Resource, FrictionlessException
+import pytest
+
+# Test that the context manager implementation works correctly
+
+# As per PEP-343, the context manager should be a single-use object (like files)
+# See https://peps.python.org/pep-0343/#caching-context-managers
+
+
+def test_context_manager_opens_resource():
+    with Resource("data/table.csv") as resource:
+        assert resource.closed is False
+
+
+def test_context_manager_closes_resource():
+    with Resource("data/table.csv") as resource:
+        pass
+    assert resource.closed is True
+
+
+def test_context_manager_returns_same_resource():
+    resource = Resource("data/table.csv")
+    with resource as context_manager_return_value:
+        assert resource == context_manager_return_value
+
+
+def test_nested_context_causes_exception():
+    with pytest.raises(FrictionlessException):
+        # Create nested with statements to test that we can't open
+        # the same resource twice via context managers
+        with Resource("data/table.csv") as resource:
+            with resource:
+                pass
+
+
+def test_resource_copy_can_use_nested_context():
+    # Create nested with statements to test that we can still open
+    # the same resource twice via context if we copy the resource
+    # before the second `with`
+    with Resource("data/table.csv") as resource:
+        copy = resource.to_copy()
+        with copy:
+            assert (copy.closed is False)
+            assert (resource.closed is False)
+
+        # Check that the original resource is still open
+        assert (copy.closed is True)
+        assert (resource.closed is False)
+
+
+def test_resource_can_use_repeated_non_nested_contexts():
+    # Repeat context allowed
+    resource = Resource("data/table.csv")
+    with resource:
+        assert (resource.closed is False)
+
+    assert (resource.closed is True)
+
+    with resource:
+        assert (resource.closed is False)
+    assert (resource.closed is True)
+
+
+def test_resource_copy_can_use_repeated_context():
+    # Repeated context with a copy is allowed
+    resource = Resource("data/table.csv")
+    copy = resource.to_copy()
+    with resource:
+        assert (resource.closed is False)
+        assert (copy.closed is True)
+
+    with copy:
+        assert (resource.closed is True)
+        assert (copy.closed is False)
+
+
+def test_context_manager_on_open_resource_throw_exception():
+    """
+    Using the Resource in a `with` statement after it has been opened will unexpectedly close the resource
+    at the end of the context.  So this is prevented by throwing an exception.
+    """
+    resource = Resource("data/table.csv")
+    resource.open()
+    assert (resource.closed is False)
+    with pytest.raises(FrictionlessException):
+        with resource:
+            pass
+
+
+def test_explicit_open_can_be_repeated():
+    # Explicit open can be nested
+    # Note that the first close() call will close the resource, so anyone
+    # using explicit open() calls must be aware of that.
+    resource = Resource("data/table.csv")
+    resource.open()
+    assert (resource.closed is False)
+    resource.open()
+    assert (resource.closed is False)
+    resource.close()
+    assert (resource.closed is True)
+    resource.close()
+    assert (resource.closed is True)
diff --git a/tests/table/test_to_petl.py b/tests/table/test_to_petl.py
new file mode 100644
index 0000000000..d4a89f1e40
--- /dev/null
+++ b/tests/table/test_to_petl.py
@@ -0,0 +1,74 @@
+from frictionless import Resource, FrictionlessException
+from petl import util
+
+
+def __assert_nth_row(it, n, expected):
+    """
+    A helper function to assert that the nth row of an iterator is as expected.
+    """
+    for _ in range(n-1):
+        next(it)
+    assert next(it) == expected
+
+
+def test_to_petl_gives_valid_table():
+    resource = Resource("data/table.csv")
+    table = resource.to_petl()
+    assert util.header(table) == ("id", "name")
+
+
+def test_to_petl_is_iterable():
+    resource = Resource("data/table.csv")
+    table = resource.to_petl()
+    it = iter(table)
+    assert next(it) == ["id", "name"]
+    assert next(it) == ["1", "english"]
+    assert next(it) == ["2", "中国人"]
+
+
+def test_to_petl_iterators_are_independent():
+    resource = Resource("data/table.csv")
+    table = resource.to_petl()
+    it1 = iter(table)
+    it2 = iter(table)
+
+    # Start reading from it1
+    assert next(it1) == ["id", "name"]
+    assert next(it1) == ["1", "english"]
+
+    # Check it2 now reads from the start again
+    assert next(it2) == ["id", "name"]
+    assert next(it2) == ["1", "english"]
+    assert next(it2) == ["2", "中国人"]
+
+    # Check it1 is still reading from where it left off
+    assert next(it1) == ["2", "中国人"]
+
+
+def test_to_petl_iterators_have_independent_lifetime():
+    resource = Resource("data/table-1MB.csv")
+    table = resource.to_petl()
+    it1 = iter(table)
+
+    # Assert the 101st row is as expected.
+    # Need to go that far to get past the buffer that is loaded on open()/__enter__
+    # and start reading from the file (as the file is closed by close()/__exit__,
+    # but the buffer is not, so you would get away with incorrectly closing the
+    # resource if you remain within the buffer).
+    # See #1622 for more.
+    __assert_nth_row(it1, 101, [
+        "ahltic", "22354", "428.17", "382.54", "false", "1926-09-15T01:15:27Z", "1956-04-14", "08:20:13", "4,5", "{\"x\":1,\"y\":7}"])
+
+    # Make a local function to give it2 a different scope
+    def read_from_it2():
+        it2 = iter(table)
+        __assert_nth_row(it2, 101, [
+            "ahltic", "22354", "428.17", "382.54", "false", "1926-09-15T01:15:27Z", "1956-04-14", "08:20:13", "4,5", "{\"x\":1,\"y\":7}"])
+
+    # Read from it2 within the nested function scope
+    read_from_it2()
+
+    # Check we can stil read from it1 from where we left off
+    # Prior to the fix for #1622 this would throw an exception: "ValueError: I/O operation on closed file."
+    __assert_nth_row(it1, 101, [
+        "tlbmv8", "91378", "101.19", "832.96", "false", "1983-02-26T12:44:52Z", "1960-08-28", "04:44:23", "5,6", "{\"x\":9,\"y\":4}"])

From 599d8a0e57c92719096ec55d2f1c9d04fbf27678 Mon Sep 17 00:00:00 2001
From: Richard Taylor <richard@engineb.com>
Date: Thu, 25 Jan 2024 14:32:13 +0000
Subject: [PATCH 2/3] Fix linting issues

# Summary

This fixes formatting issues identified by the linter.  No functional changes are included.

# Tests

- hatch run lint passes
- hatch run test passes (and coverage is sufficient)
---
 frictionless/resource/resource.py      |  6 +--
 frictionless/validator/validator.py    |  4 +-
 tests/resource/test_context_manager.py | 37 +++++++-------
 tests/table/test_to_petl.py            | 67 +++++++++++++++++++++-----
 4 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/frictionless/resource/resource.py b/frictionless/resource/resource.py
index 3eb462244b..b8f7d11fae 100644
--- a/frictionless/resource/resource.py
+++ b/frictionless/resource/resource.py
@@ -273,11 +273,11 @@ def __enter__(self):
 
         would result in errors because the second context would close the file
         before the write happened.  While the above code is obvious, similar
-        things can happen when composing steps in pipelines, calling petl code etc. 
+        things can happen when composing steps in pipelines, calling petl code etc.
         where the various functions may have no knowledge of each other.
         See #1622 for more details.
 
-        So we only allow a single context to be open at a time, and raise an 
+        So we only allow a single context to be open at a time, and raise an
         exception if nested context is attempted.  For similar reasons, we
         also raise an exception if a context is attempted on an open resource.
 
@@ -298,7 +298,7 @@ def __enter__(self):
         if self.__context_manager_entered:
             note = "Resource has previously entered a context manager (`with` statement) and does not support nested contexts.  To use in a nested context use `to_copy()` then use the copy in the `with`."
             raise FrictionlessException(note)
-        if self.closed == False:
+        if not self.closed:
             note = "Resource is currently open, and cannot be used in a `with` statement (which would reopen the file).  To use `with` on an open Resouece, use to_copy() then use the copy in the `with`."
             raise FrictionlessException(note)
 
diff --git a/frictionless/validator/validator.py b/frictionless/validator/validator.py
index 28088325d9..3674fcc89f 100644
--- a/frictionless/validator/validator.py
+++ b/frictionless/validator/validator.py
@@ -138,7 +138,9 @@ def validate_resource(
                         errors.append(error)
 
             # Validate file
-            if not isinstance(resource_copy, platform.frictionless_resources.TableResource):
+            if not isinstance(
+                resource_copy, platform.frictionless_resources.TableResource
+            ):
                 if resource_copy.hash is not None or resource_copy.bytes is not None:
                     helpers.pass_through(resource_copy.byte_stream)
 
diff --git a/tests/resource/test_context_manager.py b/tests/resource/test_context_manager.py
index 4e50008e04..0170001fe5 100644
--- a/tests/resource/test_context_manager.py
+++ b/tests/resource/test_context_manager.py
@@ -1,6 +1,7 @@
-from frictionless import Resource, FrictionlessException
 import pytest
 
+from frictionless import FrictionlessException, Resource
+
 # Test that the context manager implementation works correctly
 
 # As per PEP-343, the context manager should be a single-use object (like files)
@@ -40,25 +41,25 @@ def test_resource_copy_can_use_nested_context():
     with Resource("data/table.csv") as resource:
         copy = resource.to_copy()
         with copy:
-            assert (copy.closed is False)
-            assert (resource.closed is False)
+            assert copy.closed is False
+            assert resource.closed is False
 
         # Check that the original resource is still open
-        assert (copy.closed is True)
-        assert (resource.closed is False)
+        assert copy.closed is True
+        assert resource.closed is False
 
 
 def test_resource_can_use_repeated_non_nested_contexts():
     # Repeat context allowed
     resource = Resource("data/table.csv")
     with resource:
-        assert (resource.closed is False)
+        assert resource.closed is False
 
-    assert (resource.closed is True)
+    assert resource.closed is True
 
     with resource:
-        assert (resource.closed is False)
-    assert (resource.closed is True)
+        assert resource.closed is False
+    assert resource.closed is True
 
 
 def test_resource_copy_can_use_repeated_context():
@@ -66,12 +67,12 @@ def test_resource_copy_can_use_repeated_context():
     resource = Resource("data/table.csv")
     copy = resource.to_copy()
     with resource:
-        assert (resource.closed is False)
-        assert (copy.closed is True)
+        assert resource.closed is False
+        assert copy.closed is True
 
     with copy:
-        assert (resource.closed is True)
-        assert (copy.closed is False)
+        assert resource.closed is True
+        assert copy.closed is False
 
 
 def test_context_manager_on_open_resource_throw_exception():
@@ -81,7 +82,7 @@ def test_context_manager_on_open_resource_throw_exception():
     """
     resource = Resource("data/table.csv")
     resource.open()
-    assert (resource.closed is False)
+    assert resource.closed is False
     with pytest.raises(FrictionlessException):
         with resource:
             pass
@@ -93,10 +94,10 @@ def test_explicit_open_can_be_repeated():
     # using explicit open() calls must be aware of that.
     resource = Resource("data/table.csv")
     resource.open()
-    assert (resource.closed is False)
+    assert resource.closed is False
     resource.open()
-    assert (resource.closed is False)
+    assert resource.closed is False
     resource.close()
-    assert (resource.closed is True)
+    assert resource.closed is True
     resource.close()
-    assert (resource.closed is True)
+    assert resource.closed is True
diff --git a/tests/table/test_to_petl.py b/tests/table/test_to_petl.py
index d4a89f1e40..d56ba70ab3 100644
--- a/tests/table/test_to_petl.py
+++ b/tests/table/test_to_petl.py
@@ -1,24 +1,25 @@
-from frictionless import Resource, FrictionlessException
 from petl import util
 
+from frictionless.resources import TableResource
+
 
 def __assert_nth_row(it, n, expected):
     """
     A helper function to assert that the nth row of an iterator is as expected.
     """
-    for _ in range(n-1):
+    for _ in range(n - 1):
         next(it)
     assert next(it) == expected
 
 
 def test_to_petl_gives_valid_table():
-    resource = Resource("data/table.csv")
+    resource = TableResource("data/table.csv")
     table = resource.to_petl()
     assert util.header(table) == ("id", "name")
 
 
 def test_to_petl_is_iterable():
-    resource = Resource("data/table.csv")
+    resource = TableResource("data/table.csv")
     table = resource.to_petl()
     it = iter(table)
     assert next(it) == ["id", "name"]
@@ -27,7 +28,7 @@ def test_to_petl_is_iterable():
 
 
 def test_to_petl_iterators_are_independent():
-    resource = Resource("data/table.csv")
+    resource = TableResource("data/table.csv")
     table = resource.to_petl()
     it1 = iter(table)
     it2 = iter(table)
@@ -46,7 +47,7 @@ def test_to_petl_iterators_are_independent():
 
 
 def test_to_petl_iterators_have_independent_lifetime():
-    resource = Resource("data/table-1MB.csv")
+    resource = TableResource("data/table-1MB.csv")
     table = resource.to_petl()
     it1 = iter(table)
 
@@ -56,19 +57,61 @@ def test_to_petl_iterators_have_independent_lifetime():
     # but the buffer is not, so you would get away with incorrectly closing the
     # resource if you remain within the buffer).
     # See #1622 for more.
-    __assert_nth_row(it1, 101, [
-        "ahltic", "22354", "428.17", "382.54", "false", "1926-09-15T01:15:27Z", "1956-04-14", "08:20:13", "4,5", "{\"x\":1,\"y\":7}"])
+    __assert_nth_row(
+        it1,
+        101,
+        [
+            "ahltic",
+            "22354",
+            "428.17",
+            "382.54",
+            "false",
+            "1926-09-15T01:15:27Z",
+            "1956-04-14",
+            "08:20:13",
+            "4,5",
+            '{"x":1,"y":7}',
+        ],
+    )
 
     # Make a local function to give it2 a different scope
     def read_from_it2():
         it2 = iter(table)
-        __assert_nth_row(it2, 101, [
-            "ahltic", "22354", "428.17", "382.54", "false", "1926-09-15T01:15:27Z", "1956-04-14", "08:20:13", "4,5", "{\"x\":1,\"y\":7}"])
+        __assert_nth_row(
+            it2,
+            101,
+            [
+                "ahltic",
+                "22354",
+                "428.17",
+                "382.54",
+                "false",
+                "1926-09-15T01:15:27Z",
+                "1956-04-14",
+                "08:20:13",
+                "4,5",
+                '{"x":1,"y":7}',
+            ],
+        )
 
     # Read from it2 within the nested function scope
     read_from_it2()
 
     # Check we can stil read from it1 from where we left off
     # Prior to the fix for #1622 this would throw an exception: "ValueError: I/O operation on closed file."
-    __assert_nth_row(it1, 101, [
-        "tlbmv8", "91378", "101.19", "832.96", "false", "1983-02-26T12:44:52Z", "1960-08-28", "04:44:23", "5,6", "{\"x\":9,\"y\":4}"])
+    __assert_nth_row(
+        it1,
+        101,
+        [
+            "tlbmv8",
+            "91378",
+            "101.19",
+            "832.96",
+            "false",
+            "1983-02-26T12:44:52Z",
+            "1960-08-28",
+            "04:44:23",
+            "5,6",
+            '{"x":9,"y":4}',
+        ],
+    )

From 2d0103693b71650fcfb7e7c18eb29c013d7d5097 Mon Sep 17 00:00:00 2001
From: Richard Taylor <richard@engineb.com>
Date: Thu, 25 Jan 2024 18:11:31 +0000
Subject: [PATCH 3/3] #1622: Fix more cases (including CI)

# Summary

The CI tests identified some issues that don't show up on a normal test run.  This commit fixes those issues.

It also highlighted that there were numerous areas
that didn't have sufficient test coverage for the case
that the caller had already opened the resource.

The indexer has some notable changes, but the biggest
area affected is the parsers when writing
from an already opened source.  This commit adds
unit tests for the index and all the parser formats for this case,
and fixes the code to support the lack of nested contexts.

# Tests

- Setup the required databases for CI by copying the commands in the github actions
- Run `hatch run +py=3.11 ci:test` and ensure all tests pass and coverage remains sufficient
- Run `hatch run test` in case it is different and ensure all tests pass and coverage remains sufficient

This also means that all linting etc. has been run too.
---
 frictionless/analyzer/analyzer.py             |  3 +-
 frictionless/formats/gsheets/parser.py        |  3 +-
 frictionless/formats/html/parser.py           |  3 +-
 frictionless/formats/pandas/parser.py         |  3 +-
 frictionless/formats/qsv/adapter.py           |  3 +-
 frictionless/formats/spss/parser.py           |  6 +-
 frictionless/formats/sql/adapter.py           |  3 +-
 frictionless/formats/sql/parser.py            |  3 +-
 frictionless/indexer/indexer.py               | 60 +++++++++++--------
 frictionless/steps/table/table_debug.py       |  5 +-
 frictionless/steps/table/table_validate.py    | 13 ++--
 tests/analyzer/test_resource.py               | 26 ++++++++
 tests/formats/csv/test_parser.py              | 14 +++++
 tests/formats/excel/parsers/test_xls.py       | 13 ++++
 tests/formats/excel/parsers/test_xlsx.py      | 13 ++++
 tests/formats/gsheets/test_parser.py          | 15 ++---
 tests/formats/html/test_parser.py             | 14 +++++
 tests/formats/inline/test_parser.py           | 12 ++++
 tests/formats/json/parsers/test_json.py       | 17 ++++++
 tests/formats/json/parsers/test_jsonl.py      | 15 +++++
 tests/formats/ods/test_parser.py              | 16 +++++
 tests/formats/pandas/test_parser.py           | 13 ++++
 tests/formats/parquet/test_parser.py          | 17 ++++++
 tests/formats/spss/test_parser.py             | 15 +++++
 .../sql/databases/duckdb/test_parser.py       | 13 ++++
 .../sql/databases/mysql/test_parser.py        | 29 +++++++++
 .../sql/databases/postgresql/test_parser.py   | 29 +++++++++
 tests/formats/sql/test_parser.py              | 13 ++++
 tests/formats/yaml/test_parser.py             | 17 ++++++
 tests/indexer/test_resource.py                | 15 +++++
 tests/steps/table/test_table_debug.py         | 33 ++++++++++
 31 files changed, 405 insertions(+), 49 deletions(-)
 create mode 100644 tests/steps/table/test_table_debug.py

diff --git a/frictionless/analyzer/analyzer.py b/frictionless/analyzer/analyzer.py
index f015e70cd8..61eeb5aeea 100644
--- a/frictionless/analyzer/analyzer.py
+++ b/frictionless/analyzer/analyzer.py
@@ -34,7 +34,8 @@ def analyze_table_resource(
         # Iterate rows
         columns_data: Dict[str, List[Any]] = {}
         numeric = ["integer", "numeric", "number"]
-        with resource:
+        # Use a copy of the resource to avoid side effects (see #1622)
+        with resource.to_copy() as resource:
             for row in resource.row_stream:
                 null_columns = 0
                 for field_name in row:
diff --git a/frictionless/formats/gsheets/parser.py b/frictionless/formats/gsheets/parser.py
index 7cf81b2777..523118132f 100644
--- a/frictionless/formats/gsheets/parser.py
+++ b/frictionless/formats/gsheets/parser.py
@@ -53,7 +53,8 @@ def write_row_stream(self, source: TableResource):
         sh = gc.open_by_key(key)
         wks = sh.worksheet_by_id(gid) if gid else sh[0]  # type: ignore
         data: List[Any] = []
-        with source:
+        # Use a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             data.append(source.schema.field_names)
             for row in source.row_stream:
                 data.append(row.to_list())
diff --git a/frictionless/formats/html/parser.py b/frictionless/formats/html/parser.py
index a304685ef3..a3d0a5934c 100644
--- a/frictionless/formats/html/parser.py
+++ b/frictionless/formats/html/parser.py
@@ -57,7 +57,8 @@ def read_cell_stream_create(self) -> types.ICellStream:
     # It will give us an ability to support HtmlDialect
     def write_row_stream(self, source: TableResource):
         html = "<html><body><table>\n"
-        with source:
+        # Use a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             html += "<tr>"
             for name in source.schema.field_names:
                 html += f"<td>{name}</td>"
diff --git a/frictionless/formats/pandas/parser.py b/frictionless/formats/pandas/parser.py
index ab7b3389a0..28c2bd4f6c 100644
--- a/frictionless/formats/pandas/parser.py
+++ b/frictionless/formats/pandas/parser.py
@@ -128,7 +128,8 @@ def write_row_stream(self, source: TableResource):
         data_rows: List[Tuple[Any]] = []
         index_rows: List[Tuple[Any]] = []
         fixed_types = {}
-        with source:
+        # Use a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             for row in source.row_stream:
                 data_values: List[Any] = []
                 index_values: List[Any] = []
diff --git a/frictionless/formats/qsv/adapter.py b/frictionless/formats/qsv/adapter.py
index eae77976f6..2b18a7b371 100644
--- a/frictionless/formats/qsv/adapter.py
+++ b/frictionless/formats/qsv/adapter.py
@@ -27,7 +27,8 @@ def read_schema(self, resource: Resource) -> Schema:
         command = [self.qsv_path, "stats", "--infer-dates", "--dates-whitelist", "all"]
         process = sp.Popen(command, stdout=sp.PIPE, stdin=sp.PIPE)
         # TODO: Use FileResource here (or future resource.stream_bytes())
-        with resource:
+        # Use a copy of the resource to avoid side effects (see #1622)
+        with resource.to_copy() as resource:
             while True:
                 chunk = resource.read_bytes(size=BLOCK_SIZE)
                 if not chunk:
diff --git a/frictionless/formats/spss/parser.py b/frictionless/formats/spss/parser.py
index 0b706bdf9f..9a40054fd0 100644
--- a/frictionless/formats/spss/parser.py
+++ b/frictionless/formats/spss/parser.py
@@ -99,7 +99,8 @@ def write_row_stream(self, source: TableResource):
 
         # Write rows
         with sav.SavWriter(self.resource.normpath, ioUtf8=True, **spss_schema) as writer:  # type: ignore
-            with source:
+            # Use a copy of the source to avoid side effects (see #1622)
+            with source.to_copy() as source:
                 for row in source.row_stream:  # type: ignore
                     cells: List[Any] = []
                     for field in source.schema.fields:  # type: ignore
@@ -130,7 +131,8 @@ def __write_convert_schema(self, source: TableResource):
             "varTypes": {},
             "formats": {},
         }
-        with source:
+        # Use a copy of the source to avoid side effects (see #1622)
+        with source.to_copy() as source:
             # Add fields
             sizes: Dict[str, int] = {}
             mapping = self.__write_convert_type()
diff --git a/frictionless/formats/sql/adapter.py b/frictionless/formats/sql/adapter.py
index 5f49b7b4b5..554798a358 100644
--- a/frictionless/formats/sql/adapter.py
+++ b/frictionless/formats/sql/adapter.py
@@ -109,7 +109,8 @@ def write_package(self, package: Package):
         for table in self.metadata.sorted_tables:
             if package.has_table_resource(table.name):
                 resource = package.get_table_resource(table.name)
-                with resource:
+                # Use a copy of the resource to avoid side effects (see #1622)
+                with resource.to_copy() as resource:
                     self.write_row_stream(resource.row_stream, table_name=table.name)
         return models.PublishResult(
             url=self.engine.url.render_as_string(hide_password=True),
diff --git a/frictionless/formats/sql/parser.py b/frictionless/formats/sql/parser.py
index d9475e53fc..3e1d68883e 100644
--- a/frictionless/formats/sql/parser.py
+++ b/frictionless/formats/sql/parser.py
@@ -51,6 +51,7 @@ def write_row_stream(self, source: TableResource):
         adapter = SqlAdapter(engine, control=control)
         if not adapter:
             raise FrictionlessException(f"Not supported source: {self.resource.normpath}")
-        with source:
+        # Write from a copy to prevent side effects (see #1622)
+        with source.to_copy() as source:
             adapter.write_schema(source.schema, table_name=control.table)
             adapter.write_row_stream(source.row_stream, table_name=control.table)
diff --git a/frictionless/indexer/indexer.py b/frictionless/indexer/indexer.py
index e689315d41..8277987ba8 100644
--- a/frictionless/indexer/indexer.py
+++ b/frictionless/indexer/indexer.py
@@ -45,20 +45,24 @@ def __attrs_post_init__(self):
 
     def index(self) -> Optional[Report]:
         self.prepare_resource()
-        with self.resource:
-            # Index is resouce-based operation not supporting FKs
-            if self.resource.schema.foreign_keys:
-                self.resource.schema.foreign_keys = []
-            self.create_table()
-            while True:
-                try:
-                    return self.populate_table()
-                except Exception:
-                    if self.fast and self.use_fallback:
-                        self.fast = False
-                        continue
-                    self.delete_table()
-                    raise
+
+        # Infer resource if needed
+        if self.resource.closed:
+            self.resource.infer()
+
+        # Index is resouce-based operation not supporting FKs
+        if self.resource.schema.foreign_keys:
+            self.resource.schema.foreign_keys = []
+        self.create_table()
+        while True:
+            try:
+                return self.populate_table()
+            except Exception:
+                if self.fast and self.use_fallback:
+                    self.fast = False
+                    continue
+                self.delete_table()
+                raise
 
     def prepare_resource(self):
         if self.qsv_path:
@@ -108,10 +112,12 @@ def populate_table_fast_sqlite(self):
         sql_command = f".import '|cat -' \"{self.table_name}\""
         command = ["sqlite3", "-csv", self.adapter.engine.url.database, sql_command]
         process = subprocess.Popen(command, stdin=PIPE, stdout=PIPE)
-        for line_number, line in enumerate(self.resource.byte_stream, start=1):
-            if line_number > 1:
-                process.stdin.write(line)  # type: ignore
-            self.report_progress(f"{self.resource.stats.bytes} bytes")
+        # Iterate over a copy of the resouce to avoid side effects (see #1622)
+        with self.resource.to_copy() as resource:
+            for line_number, line in enumerate(resource.byte_stream, start=1):
+                if line_number > 1:
+                    process.stdin.write(line)  # type: ignore
+                self.report_progress(f"{self.resource.stats.bytes} bytes")
         process.stdin.close()  # type: ignore
         process.wait()
 
@@ -119,14 +125,16 @@ def populate_table_fast_postgresql(self):
         database_url = self.adapter.engine.url.render_as_string(hide_password=False)
         with platform.psycopg.connect(database_url) as connection:
             with connection.cursor() as cursor:
-                query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name
-                with cursor.copy(query) as copy:  # type: ignore
-                    while True:
-                        chunk = self.resource.read_bytes(size=settings.BLOCK_SIZE)
-                        if not chunk:
-                            break
-                        copy.write(chunk)
-                        self.report_progress(f"{self.resource.stats.bytes} bytes")
+                # Iterate over a copy of the resouce to avoid side effects (see #1622)
+                with self.resource.to_copy() as resource:
+                    query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name
+                    with cursor.copy(query) as copy:  # type: ignore
+                        while True:
+                            chunk = resource.read_bytes(size=settings.BLOCK_SIZE)
+                            if not chunk:
+                                break
+                            copy.write(chunk)
+                            self.report_progress(f"{self.resource.stats.bytes} bytes")
 
     def delete_table(self):
         self.adapter.delete_resource(self.table_name)
diff --git a/frictionless/steps/table/table_debug.py b/frictionless/steps/table/table_debug.py
index b5175bfd9b..1810785368 100644
--- a/frictionless/steps/table/table_debug.py
+++ b/frictionless/steps/table/table_debug.py
@@ -33,8 +33,9 @@ def transform_resource(self, resource: Resource):
 
         # Data
         def data():  # type: ignore
-            with current:
-                for row in current.row_stream:  # type: ignore
+            # Use a copy of the source to avoid side effects (see #1622)
+            with current.to_copy() as current_copy:
+                for row in current_copy.row_stream:  # type: ignore
                     self.function(row)  # type: ignore
                     yield row
 
diff --git a/frictionless/steps/table/table_validate.py b/frictionless/steps/table/table_validate.py
index 1d17bd1afd..dba4d2ff92 100644
--- a/frictionless/steps/table/table_validate.py
+++ b/frictionless/steps/table/table_validate.py
@@ -29,11 +29,14 @@ def transform_resource(self, resource: Resource):
 
         # Data
         def data():  # type: ignore
-            with current:
-                if not current.header.valid:  # type: ignore
-                    raise FrictionlessException(error=current.header.errors[0])  # type: ignore
-                yield current.header  # type: ignore
-                for row in current.row_stream:  # type: ignore
+            # Use a copy of the source to avoid side effects (see #1622)
+            with current.to_copy() as current_copy:  # type: ignore
+                if not current_copy.header.valid:  # type: ignore
+                    raise FrictionlessException(
+                        error=current_copy.header.errors[0]  # type: ignore
+                    )  # type: ignore
+                yield current_copy.header  # type: ignore
+                for row in current_copy.row_stream:  # type: ignore
                     if not row.valid:  # type: ignore
                         raise FrictionlessException(error=row.errors[0])  # type: ignore
                     yield row
diff --git a/tests/analyzer/test_resource.py b/tests/analyzer/test_resource.py
index 53da72cc12..f572afb9e7 100644
--- a/tests/analyzer/test_resource.py
+++ b/tests/analyzer/test_resource.py
@@ -241,3 +241,29 @@ def test_analyze_resource_detailed_with_invalid_data():
     assert analysis["rowsWithNullValues"] == 3
     assert analysis["notNullRows"] == 1
     assert analysis["variableTypes"] == {"integer": 3, "string": 1}
+
+
+def test_analyze_resource_is_independent_bug_1622():
+    # Test that we can analyze a resource without side effects
+    resource = TableResource(path="data/analysis-data.csv")
+    with resource:
+        analysis = resource.analyze()
+        assert list(analysis.keys()) == [
+            "variableTypes",
+            "notNullRows",
+            "rowsWithNullValues",
+            "fieldStats",
+            "averageRecordSizeInBytes",
+            "timeTaken",
+            "md5",
+            "sha256",
+            "bytes",
+            "fields",
+            "rows",
+        ]
+        assert round(analysis["averageRecordSizeInBytes"]) == 85
+        assert analysis["fields"] == 11
+        assert analysis["rows"] == 9
+        assert analysis["rowsWithNullValues"] == 2
+        assert analysis["notNullRows"] == 7
+        assert analysis["variableTypes"] == {}
diff --git a/tests/formats/csv/test_parser.py b/tests/formats/csv/test_parser.py
index 2bf4bca368..6978352174 100644
--- a/tests/formats/csv/test_parser.py
+++ b/tests/formats/csv/test_parser.py
@@ -344,3 +344,17 @@ def test_csv_parser_proper_quote_issue_493():
     resource.infer()
     assert resource.dialect.to_descriptor() == {}
     assert len(resource.schema.fields) == 126
+
+
+@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows")
+def test_csv_parser_write_independent_issue_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.csv")))
+        source.write(target)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/excel/parsers/test_xls.py b/tests/formats/excel/parsers/test_xls.py
index 73e5a02213..9668a32ee1 100644
--- a/tests/formats/excel/parsers/test_xls.py
+++ b/tests/formats/excel/parsers/test_xls.py
@@ -169,3 +169,16 @@ def test_xls_parser_cast_int_to_string_1251():
         {"A": "001", "B": "b", "C": "1", "D": "a", "E": 1},
         {"A": "002", "B": "c", "C": "1", "D": "1", "E": 1},
     ]
+
+
+def test_xls_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.xls")))
+        source.write(target)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/excel/parsers/test_xlsx.py b/tests/formats/excel/parsers/test_xlsx.py
index 2deb051e7f..61f2b520ed 100644
--- a/tests/formats/excel/parsers/test_xlsx.py
+++ b/tests/formats/excel/parsers/test_xlsx.py
@@ -307,3 +307,16 @@ def test_xlsx_parser_cannot_read_resource_from_remote_package_issue_1504():
     resource = package.get_table_resource("excel")
     table = resource.read_table()
     assert len(table.rows) == 4
+
+
+def test_xlsx_parser_write_independent_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.xlsx")))
+        source.write(target)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/gsheets/test_parser.py b/tests/formats/gsheets/test_parser.py
index 815167fb45..22f1dfbbff 100644
--- a/tests/formats/gsheets/test_parser.py
+++ b/tests/formats/gsheets/test_parser.py
@@ -52,10 +52,11 @@ def test_gsheets_parser_write(google_credentials_path):
     path = "https://docs.google.com/spreadsheets/d/1F2OiYmaf8e3x7jSc95_uNgfUyBlSXrcRg-4K_MFNZQI/edit"
     control = formats.GsheetsControl(credentials=google_credentials_path)
     source = TableResource(path="data/table.csv")
-    target = source.write(path=path, control=control)
-    with target:
-        assert target.header == ["id", "name"]
-        assert target.read_rows() == [
-            {"id": 1, "name": "english"},
-            {"id": 2, "name": "中国人"},
-        ]
+    with source:
+        target = source.write(path=path, control=control)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/html/test_parser.py b/tests/formats/html/test_parser.py
index 225cd22180..382cf325f6 100644
--- a/tests/formats/html/test_parser.py
+++ b/tests/formats/html/test_parser.py
@@ -62,3 +62,17 @@ def test_html_parser_newline_in_cell_construction_file_issue_865(tmpdir):
     target = source.write(str(tmpdir.join("table.csv")))
     target.infer(stats=True)
     assert target.stats.rows == 226
+
+
+@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows")
+def test_html_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.html")))
+        source.write(target)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/inline/test_parser.py b/tests/formats/inline/test_parser.py
index 829c695b2a..a5a060b920 100644
--- a/tests/formats/inline/test_parser.py
+++ b/tests/formats/inline/test_parser.py
@@ -139,3 +139,15 @@ def test_inline_parser_write_skip_header():
     with TableResource(path="data/table.csv") as resource:
         resource.write(target)
         assert target.data == [[1, "english"], [2, "中国人"]]
+
+
+@pytest.mark.skip
+def test_inline_parser_write_keyed_independent_bug_1622(tmpdir):
+    control = formats.InlineControl(keyed=True)
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = source.write(format="inline", control=control)
+        assert target.data == [
+            {"id": 1, "name": "english"},
+            {"id": 2, "name": "中国人"},
+        ]
diff --git a/tests/formats/json/parsers/test_json.py b/tests/formats/json/parsers/test_json.py
index 12a49af9a6..386b8df9b7 100644
--- a/tests/formats/json/parsers/test_json.py
+++ b/tests/formats/json/parsers/test_json.py
@@ -135,3 +135,20 @@ def test_json_parser_write_skip_header(tmpdir):
     with TableResource(path="data/table.csv") as resource:
         target = resource.write(target)
         assert target.read_data() == [[1, "english"], [2, "中国人"]]
+
+
+# Bugs
+
+
+def test_json_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.json")))
+        target = source.write(target)
+        assert target.normpath
+        with open(target.normpath) as file:
+            assert json.load(file) == [
+                ["id", "name"],
+                [1, "english"],
+                [2, "中国人"],
+            ]
diff --git a/tests/formats/json/parsers/test_jsonl.py b/tests/formats/json/parsers/test_jsonl.py
index b29cb9339d..6c55799a38 100644
--- a/tests/formats/json/parsers/test_jsonl.py
+++ b/tests/formats/json/parsers/test_jsonl.py
@@ -59,3 +59,18 @@ def test_jsonl_parser_write_skip_header(tmpdir):
             {"field1": 1, "field2": "english"},
             {"field1": 2, "field2": "中国人"},
         ]
+
+
+# Bugs
+
+
+def test_jsonl_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = source.write(path=str(tmpdir.join("table.jsonl")))
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/ods/test_parser.py b/tests/formats/ods/test_parser.py
index 1ab6d564a8..c8a491aa3d 100644
--- a/tests/formats/ods/test_parser.py
+++ b/tests/formats/ods/test_parser.py
@@ -139,3 +139,19 @@ def test_ods_parser_write_skip_header(tmpdir):
         resource.write_table(target)
     table = target.read_table()
     assert table.header == ["field1", "field2"]
+
+
+# Bugs
+
+
+def test_ods_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.ods")))
+        source.write(target)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/pandas/test_parser.py b/tests/formats/pandas/test_parser.py
index cb60d791da..ce22960a13 100644
--- a/tests/formats/pandas/test_parser.py
+++ b/tests/formats/pandas/test_parser.py
@@ -324,3 +324,16 @@ def test_validate_package_with_in_code_resources_1245():
     datapackage.add_resource(resource)
     report = validate(datapackage)
     assert len(report.errors) == 0
+
+
+# Bugs
+
+
+def test_pandas_parser_write_independent_bug_1622():
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = source.write(format="pandas")
+        assert target.data.to_dict("records") == [  # type: ignore
+            {"id": 1, "name": "english"},
+            {"id": 2, "name": "中国人"},
+        ]
diff --git a/tests/formats/parquet/test_parser.py b/tests/formats/parquet/test_parser.py
index 76b39efda0..142f257989 100644
--- a/tests/formats/parquet/test_parser.py
+++ b/tests/formats/parquet/test_parser.py
@@ -77,3 +77,20 @@ def test_parquet_parser_write_datetime_field_with_timezone(tmpdir):
                 )
             }
         ]
+
+
+# Bugs
+
+
+def test_parquet_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.parq")))
+        source.write(target)
+        with target:
+            assert target.format == "parq"
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/spss/test_parser.py b/tests/formats/spss/test_parser.py
index 4824056920..f79447cf96 100644
--- a/tests/formats/spss/test_parser.py
+++ b/tests/formats/spss/test_parser.py
@@ -128,3 +128,18 @@ def test_spss_parser_write_timezone(tmpdir):
                 "time": time(18),
             },
         ]
+
+
+# Bugs
+
+
+def test_spss_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = source.write(str(tmpdir.join("table.sav")))
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/sql/databases/duckdb/test_parser.py b/tests/formats/sql/databases/duckdb/test_parser.py
index 1393408e90..ce0c268cb2 100644
--- a/tests/formats/sql/databases/duckdb/test_parser.py
+++ b/tests/formats/sql/databases/duckdb/test_parser.py
@@ -160,3 +160,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(duckdb_url_data):
     resource = TableResource(path=duckdb_url_data, control=control)
     resource.infer()
     assert resource.to_yaml()
+
+
+def test_sql_parser_write_independent_issue_1622(duckdb_url_data):
+    source = TableResource(path="data/table.csv")
+    with source:
+        control = formats.SqlControl(table="name", order_by="id")
+        target = source.write(path=duckdb_url_data, control=control)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/sql/databases/mysql/test_parser.py b/tests/formats/sql/databases/mysql/test_parser.py
index c95b61b2fb..efd2d70c7b 100644
--- a/tests/formats/sql/databases/mysql/test_parser.py
+++ b/tests/formats/sql/databases/mysql/test_parser.py
@@ -55,3 +55,32 @@ def test_sql_parser_write_string_pk_issue_777_mysql(mysql_url):
             {"id": 1, "name": "english"},
             {"id": 2, "name": "中国人"},
         ]
+
+
+@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS")
+@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test  in Windows")
+def test_sql_parser_write_independent_bug_1622(mysql_url):
+    source = TableResource(path="data/timezone.csv")
+    with source:
+        control = formats.SqlControl(table="timezone")
+        target = source.write(path=mysql_url, control=control)
+        with target:
+            assert target.header == ["datetime", "time"]
+            assert target.read_rows() == [
+                {
+                    "datetime": datetime(2020, 1, 1, 15),
+                    "time": time(15),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 15),
+                    "time": time(15),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 12),
+                    "time": time(12),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 18),
+                    "time": time(18),
+                },
+            ]
diff --git a/tests/formats/sql/databases/postgresql/test_parser.py b/tests/formats/sql/databases/postgresql/test_parser.py
index 6e8f7acc33..94d43378c2 100644
--- a/tests/formats/sql/databases/postgresql/test_parser.py
+++ b/tests/formats/sql/databases/postgresql/test_parser.py
@@ -62,3 +62,32 @@ def test_sql_parser_write_string_pk_issue_777_postgresql(postgresql_url):
             {"id": 1, "name": "english"},
             {"id": 2, "name": "中国人"},
         ]
+
+
+@pytest.mark.skipif(platform.type == "darwin", reason="Skip SQL test in MacOS")
+@pytest.mark.skipif(platform.type == "windows", reason="Skip SQL test in Windows")
+def test_sql_parser_write_independent_bug_1622(postgresql_url):
+    source = TableResource(path="data/timezone.csv")
+    with source:
+        control = formats.SqlControl(table="timezone")
+        target = source.write(postgresql_url, control=control)
+        with target:
+            assert target.header == ["datetime", "time"]
+            assert target.read_rows() == [
+                {
+                    "datetime": datetime(2020, 1, 1, 15),
+                    "time": time(15),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 15),
+                    "time": time(15),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 12),
+                    "time": time(12),
+                },
+                {
+                    "datetime": datetime(2020, 1, 1, 18),
+                    "time": time(18),
+                },
+            ]
diff --git a/tests/formats/sql/test_parser.py b/tests/formats/sql/test_parser.py
index 996fee9ffc..beef5df76c 100644
--- a/tests/formats/sql/test_parser.py
+++ b/tests/formats/sql/test_parser.py
@@ -151,3 +151,16 @@ def test_sql_parser_describe_to_yaml_failing_issue_821(sqlite_url_data):
     resource = TableResource(path=sqlite_url_data, control=control)
     resource.infer()
     assert resource.to_yaml()
+
+
+def test_sql_parser_write_independent_bug_1622(sqlite_url_data):
+    source = TableResource(path="data/table.csv")
+    with source:
+        control = formats.SqlControl(table="name", order_by="id")
+        target = source.write(path=sqlite_url_data, control=control)
+        with target:
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/formats/yaml/test_parser.py b/tests/formats/yaml/test_parser.py
index 186eab9423..69bc7362fd 100644
--- a/tests/formats/yaml/test_parser.py
+++ b/tests/formats/yaml/test_parser.py
@@ -48,3 +48,20 @@ def test_yaml_parser_write_skip_header(tmpdir):
             {"field1": 1, "field2": "english"},
             {"field1": 2, "field2": "中国人"},
         ]
+
+
+# Bugs
+
+
+def test_yaml_parser_write_independent_bug_1622(tmpdir):
+    source = TableResource(path="data/table.csv")
+    with source:
+        target = TableResource(path=str(tmpdir.join("table.yaml")))
+        source.write(target)
+        with target:
+            assert target.format == "yaml"
+            assert target.header == ["id", "name"]
+            assert target.read_rows() == [
+                {"id": 1, "name": "english"},
+                {"id": 2, "name": "中国人"},
+            ]
diff --git a/tests/indexer/test_resource.py b/tests/indexer/test_resource.py
index 5df10bb027..561cb7d0b6 100644
--- a/tests/indexer/test_resource.py
+++ b/tests/indexer/test_resource.py
@@ -94,3 +94,18 @@ def test_resource_index_sqlite_on_progress(database_url, mocker):
     assert on_progress.call_count == 2
     on_progress.assert_any_call(control.table, "2 rows")
     on_progress.assert_any_call(control.table, "3 rows")
+
+
+# Bugs
+
+
+@pytest.mark.parametrize("database_url", database_urls)
+def test_resource_index_sqlite_independent_bug_1622(database_url):
+    assert control.table
+    resource = TableResource(path="data/table.csv")
+    with resource:
+        resource.index(database_url, name=control.table)
+        assert TableResource(path=database_url, control=control).read_rows() == [
+            {"id": 1, "name": "english"},
+            {"id": 2, "name": "中国人"},
+        ]
diff --git a/tests/steps/table/test_table_debug.py b/tests/steps/table/test_table_debug.py
new file mode 100644
index 0000000000..48019a3bc0
--- /dev/null
+++ b/tests/steps/table/test_table_debug.py
@@ -0,0 +1,33 @@
+from frictionless import Pipeline, steps
+from frictionless.resources import TableResource
+
+
+class Counter:
+    count = 0
+
+    def __call__(self, row):
+        self.count += 1
+
+
+def test_step_table_debug():
+    source = TableResource(path="data/transform.csv")
+    counter = Counter()
+
+    pipeline = Pipeline(
+        steps=[steps.table_debug(function=counter)],
+    )
+    target = source.transform(pipeline)
+    assert target.schema.to_descriptor() == {
+        "fields": [
+            {"name": "id", "type": "integer"},
+            {"name": "name", "type": "string"},
+            {"name": "population", "type": "integer"},
+        ]
+    }
+    assert target.read_rows() == [
+        {"id": 1, "name": "germany", "population": 83},
+        {"id": 2, "name": "france", "population": 66},
+        {"id": 3, "name": "spain", "population": 47},
+    ]
+
+    assert counter.count == 3