Skip to content

Commit

Permalink
Add gluonts.util.safe_extract (#2606)
Browse files Browse the repository at this point in the history
Co-authored-by: Jasper <[email protected]>
Co-authored-by: Lorenzo Stella <[email protected]>
  • Loading branch information
3 people authored Jan 30, 2023
1 parent 682e1cc commit 0554230
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/gluonts/dataset/repository/_gp_copula_2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from gluonts.dataset.common import MetaData, TrainDatasets
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.repository._util import metadata
from gluonts.util import safe_extractall


class GPCopulaDataset(NamedTuple):
Expand Down Expand Up @@ -140,7 +141,7 @@ def download_dataset(dataset_path: Path, ds_info: GPCopulaDataset):
request.urlretrieve(ds_info.url, dataset_path / f"{ds_info.name}.tar.gz")

with tarfile.open(dataset_path / f"{ds_info.name}.tar.gz") as tar:
tar.extractall(path=dataset_path)
safe_extractall(tar, path=dataset_path)


def get_data(dataset_path: Path, ds_info: GPCopulaDataset):
Expand Down
3 changes: 2 additions & 1 deletion src/gluonts/nursery/sagemaker_sdk/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from gluonts.dataset.repository import datasets
from gluonts.model.estimator import Estimator
from gluonts.model.predictor import Predictor
from gluonts.util import safe_extractall

from .defaults import (
ENTRY_POINTS_FOLDER,
Expand Down Expand Up @@ -503,7 +504,7 @@ def _retrieve_model(self, locations):
with self._s3fs.open(locations.model_archive, "rb") as stream:
with tarfile.open(mode="r:gz", fileobj=stream) as archive:
with TemporaryDirectory() as temp_dir:
archive.extractall(temp_dir)
safe_extractall(archive, temp_dir)
predictor = Predictor.deserialize(Path(temp_dir))

return predictor
Expand Down
3 changes: 2 additions & 1 deletion src/gluonts/nursery/tsbench/src/cli/evaluations/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Any, cast, Dict, List, Optional
import botocore
import click
from gluonts.util import safe_extract
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map
from tsbench.analysis.utils import run_parallel
Expand Down Expand Up @@ -104,7 +105,7 @@ def _download_public_evaluations(
file = Path(tmp) / "metrics.tar.gz"
client.download_file(public_bucket, "metrics.tar.gz", str(file))
with tarfile.open(file, mode="r:gz") as tar:
tar.extractall(evaluations_path)
safe_extractall(tar, evaluations_path)

# Then, optionally download the forecasts
if include_forecasts:
Expand Down
3 changes: 2 additions & 1 deletion src/gluonts/shell/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from gluonts.dataset.common import Dataset, FileDataset, MetaData
from gluonts.model import Predictor
from gluonts.util import safe_extractall

from . import sagemaker

Expand All @@ -41,7 +42,7 @@ def _load(self) -> Dict[str, Union[Dataset, Predictor]]:
if "model" in self.channels:
path = self.channels.pop("model")
with tarfile.open(path / "model.tar.gz") as targz:
targz.extractall(path=path)
safe_extractall(targz, path)
model = Predictor.deserialize(path)

file_dataset = partial(FileDataset, freq=self.hyperparameters["freq"])
Expand Down
35 changes: 35 additions & 0 deletions src/gluonts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# permissions and limitations under the License.

import copy
import tarfile
from functools import lru_cache
from pathlib import Path
from typing import TYPE_CHECKING, TypeVar

T = TypeVar("T")
Expand Down Expand Up @@ -66,3 +68,36 @@ def my_property(self):
introduced in Python 3.8.
"""
return property(lru_cache(1)(method))


def will_extractall_into(tar: tarfile.TarFile, path: Path) -> None:
"""
Check that the content of ``tar`` will be extracted within ``path``
upon calling ``extractall``.
Raise a ``PermissionError`` if not.
"""
path = Path(path).resolve()

for member in tar.getmembers():
member_path = (path / member.name).resolve()

try:
member_path.relative_to(path)
except ValueError:
raise PermissionError(f"'{member.name}' extracts out of target.")


def safe_extractall(
tar: tarfile.TarFile,
path: Path = Path("."),
members=None,
*,
numeric_owner=False,
):
"""
Safe wrapper around ``TarFile.extractall`` that checks all destination
files to be strictly within the given ``path``.
"""
will_extractall_into(tar, path)
tar.extractall(path, members, numeric_owner=numeric_owner)
39 changes: 38 additions & 1 deletion test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from gluonts.util import copy_with
import tempfile
import tarfile
from pathlib import Path
from typing import Optional

import pytest

from gluonts.util import copy_with, will_extractall_into


def test_copy_with():
Expand All @@ -24,3 +31,33 @@ def __init__(self, value):

assert a.value == 42
assert b.value == 99


@pytest.mark.parametrize(
"arcname, expect_failure",
[
(None, False),
("./file.txt", False),
("/a/../file.txt", False),
("/a/../../file.txt", True),
("../file.txt", True),
],
)
def test_will_extractall_into(arcname: Optional[str], expect_failure: bool):
with tempfile.TemporaryDirectory() as tempdir:
file_path = Path(tempdir) / "a" / "file.txt"
file_path.parent.mkdir(parents=True)
file_path.touch()

with tarfile.open(Path(tempdir) / "archive.tar.gz", "w:gz") as tar:
tar.add(file_path, arcname=arcname)

if expect_failure:
with pytest.raises(PermissionError):
with tarfile.open(
Path(tempdir) / "archive.tar.gz", "r:gz"
) as tar:
will_extractall_into(tar, Path(tempdir) / "b")
else:
with tarfile.open(Path(tempdir) / "archive.tar.gz", "r:gz") as tar:
will_extractall_into(tar, Path(tempdir) / "b")

0 comments on commit 0554230

Please sign in to comment.